From 83349046151acb6be52e01c6566c71a905ebf40e Mon Sep 17 00:00:00 2001 From: Segher Boessenkool <segher@kernel.crashing.org> Date: Wed, 24 May 2017 21:35:49 +0200 Subject: [PATCH] Split off powerpcspe from rs6000 port * config/powerpcspe: New port. Files are copied from the rs6000 port, with "rs6000" in filenames replaced by "powerpcspe". * config.gcc (powerpc*-*-*spe*): New. (powerpc-*-eabispe*): Use ${cpu_type} instead of hardcoded pathnames. (powerpc-*-rtems*spe*): New. (powerpc*-*-linux*spe*): New. (powerpc-wrs-vxworksspe): New. (powerpc*-*-*, rs6000-*-*): Use ${cpu_type}. (misc flags) [powerpc*-*-*, rs6000-*-*): Use ${cpu_type}. * config.host (powerpc*-*-*spe*): New. From-SVN: r248429 --- ChangeLog | 11 + gcc/ChangeLog | 5 + .../config/powerpcspe/powerpcspe-common.c | 333 + gcc/config.gcc | 49 +- gcc/config.host | 4 + gcc/config/powerpcspe/40x.md | 124 + gcc/config/powerpcspe/440.md | 138 + gcc/config/powerpcspe/476.h | 32 + gcc/config/powerpcspe/476.md | 143 + gcc/config/powerpcspe/476.opt | 24 + gcc/config/powerpcspe/601.md | 137 + gcc/config/powerpcspe/603.md | 147 + gcc/config/powerpcspe/6xx.md | 284 + gcc/config/powerpcspe/7450.md | 188 + gcc/config/powerpcspe/750cl.h | 30 + gcc/config/powerpcspe/7xx.md | 186 + gcc/config/powerpcspe/8540.md | 248 + gcc/config/powerpcspe/a2.md | 138 + gcc/config/powerpcspe/aix-stdint.h | 51 + gcc/config/powerpcspe/aix.h | 278 + gcc/config/powerpcspe/aix43.h | 167 + gcc/config/powerpcspe/aix51.h | 169 + gcc/config/powerpcspe/aix52.h | 179 + gcc/config/powerpcspe/aix53.h | 180 + gcc/config/powerpcspe/aix61.h | 213 + gcc/config/powerpcspe/aix64.opt | 55 + gcc/config/powerpcspe/aix71.h | 230 + gcc/config/powerpcspe/altivec.h | 648 + gcc/config/powerpcspe/altivec.md | 4028 ++ gcc/config/powerpcspe/biarch64.h | 26 + gcc/config/powerpcspe/bmi2intrin.h | 169 + gcc/config/powerpcspe/bmiintrin.h | 187 + gcc/config/powerpcspe/cell.md | 423 + gcc/config/powerpcspe/constraints.md | 323 + gcc/config/powerpcspe/crypto.md | 110 + gcc/config/powerpcspe/darwin.h | 422 + gcc/config/powerpcspe/darwin.md | 480 + gcc/config/powerpcspe/darwin.opt | 42 + gcc/config/powerpcspe/darwin64.h | 32 + gcc/config/powerpcspe/darwin7.h | 32 + gcc/config/powerpcspe/darwin8.h | 31 + gcc/config/powerpcspe/default64.h | 31 + gcc/config/powerpcspe/dfp.md | 419 + gcc/config/powerpcspe/driver-powerpcspe.c | 539 + gcc/config/powerpcspe/e300c2c3.md | 193 + gcc/config/powerpcspe/e500.h | 45 + gcc/config/powerpcspe/e500mc.md | 198 + gcc/config/powerpcspe/e500mc64.md | 200 + gcc/config/powerpcspe/e5500.md | 190 + gcc/config/powerpcspe/e6500.md | 228 + gcc/config/powerpcspe/eabi.h | 41 + gcc/config/powerpcspe/eabialtivec.h | 27 + gcc/config/powerpcspe/eabisim.h | 51 + gcc/config/powerpcspe/eabispe.h | 26 + gcc/config/powerpcspe/freebsd.h | 79 + gcc/config/powerpcspe/freebsd64.h | 433 + gcc/config/powerpcspe/genopt.sh | 64 + gcc/config/powerpcspe/host-darwin.c | 153 + gcc/config/powerpcspe/host-ppc64-darwin.c | 30 + gcc/config/powerpcspe/htm.md | 296 + gcc/config/powerpcspe/htmintrin.h | 131 + gcc/config/powerpcspe/htmxlintrin.h | 214 + gcc/config/powerpcspe/linux.h | 140 + gcc/config/powerpcspe/linux64.h | 642 + gcc/config/powerpcspe/linux64.opt | 42 + gcc/config/powerpcspe/linuxaltivec.h | 32 + gcc/config/powerpcspe/linuxspe.h | 32 + gcc/config/powerpcspe/lynx.h | 120 + gcc/config/powerpcspe/milli.exp | 7 + gcc/config/powerpcspe/mpc.md | 112 + gcc/config/powerpcspe/netbsd.h | 92 + gcc/config/powerpcspe/option-defaults.h | 64 + gcc/config/powerpcspe/paired.h | 75 + gcc/config/powerpcspe/paired.md | 505 + gcc/config/powerpcspe/power4.md | 451 + gcc/config/powerpcspe/power5.md | 351 + gcc/config/powerpcspe/power6.md | 629 + gcc/config/powerpcspe/power7.md | 366 + gcc/config/powerpcspe/power8.md | 396 + gcc/config/powerpcspe/power9.md | 489 + gcc/config/powerpcspe/powerpcspe-builtin.def | 2674 + gcc/config/powerpcspe/powerpcspe-c.c | 6581 +++ gcc/config/powerpcspe/powerpcspe-cpus.def | 264 + gcc/config/powerpcspe/powerpcspe-linux.c | 36 + gcc/config/powerpcspe/powerpcspe-modes.def | 56 + gcc/config/powerpcspe/powerpcspe-opts.h | 168 + gcc/config/powerpcspe/powerpcspe-passes.def | 27 + gcc/config/powerpcspe/powerpcspe-protos.h | 269 + gcc/config/powerpcspe/powerpcspe-tables.opt | 196 + gcc/config/powerpcspe/powerpcspe.c | 43668 ++++++++++++++++ gcc/config/powerpcspe/powerpcspe.h | 2928 ++ gcc/config/powerpcspe/powerpcspe.md | 14770 ++++++ gcc/config/powerpcspe/powerpcspe.opt | 702 + gcc/config/powerpcspe/ppc-asm.h | 381 + gcc/config/powerpcspe/ppc-auxv.h | 105 + gcc/config/powerpcspe/ppu_intrinsics.h | 727 + gcc/config/powerpcspe/predicates.md | 2124 + gcc/config/powerpcspe/rs64.md | 162 + gcc/config/powerpcspe/rtems.h | 60 + gcc/config/powerpcspe/secureplt.h | 21 + gcc/config/powerpcspe/si2vmx.h | 2048 + gcc/config/powerpcspe/singlefp.h | 40 + gcc/config/powerpcspe/spe.h | 1107 + gcc/config/powerpcspe/spe.md | 3512 ++ gcc/config/powerpcspe/spu2vmx.h | 2415 + gcc/config/powerpcspe/sync.md | 484 + gcc/config/powerpcspe/sysv4.h | 1048 + gcc/config/powerpcspe/sysv4.opt | 161 + gcc/config/powerpcspe/sysv4le.h | 35 + gcc/config/powerpcspe/t-aix43 | 39 + gcc/config/powerpcspe/t-aix52 | 26 + gcc/config/powerpcspe/t-darwin64 | 2 + gcc/config/powerpcspe/t-darwin8 | 3 + gcc/config/powerpcspe/t-fprules | 20 + gcc/config/powerpcspe/t-freebsd64 | 29 + gcc/config/powerpcspe/t-linux | 23 + gcc/config/powerpcspe/t-linux64 | 36 + gcc/config/powerpcspe/t-linux64bele | 7 + gcc/config/powerpcspe/t-linux64le | 3 + gcc/config/powerpcspe/t-linux64lebe | 7 + gcc/config/powerpcspe/t-lynx | 29 + gcc/config/powerpcspe/t-netbsd | 36 + gcc/config/powerpcspe/t-powerpcspe | 69 + gcc/config/powerpcspe/t-ppccomm | 23 + gcc/config/powerpcspe/t-ppcendian | 30 + gcc/config/powerpcspe/t-ppcgas | 32 + gcc/config/powerpcspe/t-ppcos | 8 + gcc/config/powerpcspe/t-rtems | 79 + gcc/config/powerpcspe/t-spe | 72 + gcc/config/powerpcspe/t-vxworks | 25 + gcc/config/powerpcspe/t-vxworksae | 5 + gcc/config/powerpcspe/t-vxworksmils | 10 + gcc/config/powerpcspe/t-xilinx | 28 + gcc/config/powerpcspe/titan.md | 168 + gcc/config/powerpcspe/vec_types.h | 52 + gcc/config/powerpcspe/vector.md | 1407 + gcc/config/powerpcspe/vsx.md | 4128 ++ gcc/config/powerpcspe/vxworks.h | 147 + gcc/config/powerpcspe/vxworksae.h | 28 + gcc/config/powerpcspe/vxworksmils.h | 29 + gcc/config/powerpcspe/x-aix | 5 + gcc/config/powerpcspe/x-darwin | 3 + gcc/config/powerpcspe/x-darwin64 | 3 + gcc/config/powerpcspe/x-linux-relax | 2 + gcc/config/powerpcspe/x-powerpcspe | 3 + gcc/config/powerpcspe/x86intrin.h | 43 + gcc/config/powerpcspe/xcoff.h | 316 + gcc/config/powerpcspe/xfpu.h | 26 + gcc/config/powerpcspe/xfpu.md | 140 + gcc/config/powerpcspe/xilinx.h | 47 + gcc/config/powerpcspe/xilinx.opt | 32 + 151 files changed, 112810 insertions(+), 8 deletions(-) create mode 100644 gcc/common/config/powerpcspe/powerpcspe-common.c create mode 100644 gcc/config/powerpcspe/40x.md create mode 100644 gcc/config/powerpcspe/440.md create mode 100644 gcc/config/powerpcspe/476.h create mode 100644 gcc/config/powerpcspe/476.md create mode 100644 gcc/config/powerpcspe/476.opt create mode 100644 gcc/config/powerpcspe/601.md create mode 100644 gcc/config/powerpcspe/603.md create mode 100644 gcc/config/powerpcspe/6xx.md create mode 100644 gcc/config/powerpcspe/7450.md create mode 100644 gcc/config/powerpcspe/750cl.h create mode 100644 gcc/config/powerpcspe/7xx.md create mode 100644 gcc/config/powerpcspe/8540.md create mode 100644 gcc/config/powerpcspe/a2.md create mode 100644 gcc/config/powerpcspe/aix-stdint.h create mode 100644 gcc/config/powerpcspe/aix.h create mode 100644 gcc/config/powerpcspe/aix43.h create mode 100644 gcc/config/powerpcspe/aix51.h create mode 100644 gcc/config/powerpcspe/aix52.h create mode 100644 gcc/config/powerpcspe/aix53.h create mode 100644 gcc/config/powerpcspe/aix61.h create mode 100644 gcc/config/powerpcspe/aix64.opt create mode 100644 gcc/config/powerpcspe/aix71.h create mode 100644 gcc/config/powerpcspe/altivec.h create mode 100644 gcc/config/powerpcspe/altivec.md create mode 100644 gcc/config/powerpcspe/biarch64.h create mode 100644 gcc/config/powerpcspe/bmi2intrin.h create mode 100644 gcc/config/powerpcspe/bmiintrin.h create mode 100644 gcc/config/powerpcspe/cell.md create mode 100644 gcc/config/powerpcspe/constraints.md create mode 100644 gcc/config/powerpcspe/crypto.md create mode 100644 gcc/config/powerpcspe/darwin.h create mode 100644 gcc/config/powerpcspe/darwin.md create mode 100644 gcc/config/powerpcspe/darwin.opt create mode 100644 gcc/config/powerpcspe/darwin64.h create mode 100644 gcc/config/powerpcspe/darwin7.h create mode 100644 gcc/config/powerpcspe/darwin8.h create mode 100644 gcc/config/powerpcspe/default64.h create mode 100644 gcc/config/powerpcspe/dfp.md create mode 100644 gcc/config/powerpcspe/driver-powerpcspe.c create mode 100644 gcc/config/powerpcspe/e300c2c3.md create mode 100644 gcc/config/powerpcspe/e500.h create mode 100644 gcc/config/powerpcspe/e500mc.md create mode 100644 gcc/config/powerpcspe/e500mc64.md create mode 100644 gcc/config/powerpcspe/e5500.md create mode 100644 gcc/config/powerpcspe/e6500.md create mode 100644 gcc/config/powerpcspe/eabi.h create mode 100644 gcc/config/powerpcspe/eabialtivec.h create mode 100644 gcc/config/powerpcspe/eabisim.h create mode 100644 gcc/config/powerpcspe/eabispe.h create mode 100644 gcc/config/powerpcspe/freebsd.h create mode 100644 gcc/config/powerpcspe/freebsd64.h create mode 100755 gcc/config/powerpcspe/genopt.sh create mode 100644 gcc/config/powerpcspe/host-darwin.c create mode 100644 gcc/config/powerpcspe/host-ppc64-darwin.c create mode 100644 gcc/config/powerpcspe/htm.md create mode 100644 gcc/config/powerpcspe/htmintrin.h create mode 100644 gcc/config/powerpcspe/htmxlintrin.h create mode 100644 gcc/config/powerpcspe/linux.h create mode 100644 gcc/config/powerpcspe/linux64.h create mode 100644 gcc/config/powerpcspe/linux64.opt create mode 100644 gcc/config/powerpcspe/linuxaltivec.h create mode 100644 gcc/config/powerpcspe/linuxspe.h create mode 100644 gcc/config/powerpcspe/lynx.h create mode 100644 gcc/config/powerpcspe/milli.exp create mode 100644 gcc/config/powerpcspe/mpc.md create mode 100644 gcc/config/powerpcspe/netbsd.h create mode 100644 gcc/config/powerpcspe/option-defaults.h create mode 100644 gcc/config/powerpcspe/paired.h create mode 100644 gcc/config/powerpcspe/paired.md create mode 100644 gcc/config/powerpcspe/power4.md create mode 100644 gcc/config/powerpcspe/power5.md create mode 100644 gcc/config/powerpcspe/power6.md create mode 100644 gcc/config/powerpcspe/power7.md create mode 100644 gcc/config/powerpcspe/power8.md create mode 100644 gcc/config/powerpcspe/power9.md create mode 100644 gcc/config/powerpcspe/powerpcspe-builtin.def create mode 100644 gcc/config/powerpcspe/powerpcspe-c.c create mode 100644 gcc/config/powerpcspe/powerpcspe-cpus.def create mode 100644 gcc/config/powerpcspe/powerpcspe-linux.c create mode 100644 gcc/config/powerpcspe/powerpcspe-modes.def create mode 100644 gcc/config/powerpcspe/powerpcspe-opts.h create mode 100644 gcc/config/powerpcspe/powerpcspe-passes.def create mode 100644 gcc/config/powerpcspe/powerpcspe-protos.h create mode 100644 gcc/config/powerpcspe/powerpcspe-tables.opt create mode 100644 gcc/config/powerpcspe/powerpcspe.c create mode 100644 gcc/config/powerpcspe/powerpcspe.h create mode 100644 gcc/config/powerpcspe/powerpcspe.md create mode 100644 gcc/config/powerpcspe/powerpcspe.opt create mode 100644 gcc/config/powerpcspe/ppc-asm.h create mode 100644 gcc/config/powerpcspe/ppc-auxv.h create mode 100644 gcc/config/powerpcspe/ppu_intrinsics.h create mode 100644 gcc/config/powerpcspe/predicates.md create mode 100644 gcc/config/powerpcspe/rs64.md create mode 100644 gcc/config/powerpcspe/rtems.h create mode 100644 gcc/config/powerpcspe/secureplt.h create mode 100644 gcc/config/powerpcspe/si2vmx.h create mode 100644 gcc/config/powerpcspe/singlefp.h create mode 100644 gcc/config/powerpcspe/spe.h create mode 100644 gcc/config/powerpcspe/spe.md create mode 100644 gcc/config/powerpcspe/spu2vmx.h create mode 100644 gcc/config/powerpcspe/sync.md create mode 100644 gcc/config/powerpcspe/sysv4.h create mode 100644 gcc/config/powerpcspe/sysv4.opt create mode 100644 gcc/config/powerpcspe/sysv4le.h create mode 100644 gcc/config/powerpcspe/t-aix43 create mode 100644 gcc/config/powerpcspe/t-aix52 create mode 100644 gcc/config/powerpcspe/t-darwin64 create mode 100644 gcc/config/powerpcspe/t-darwin8 create mode 100644 gcc/config/powerpcspe/t-fprules create mode 100644 gcc/config/powerpcspe/t-freebsd64 create mode 100644 gcc/config/powerpcspe/t-linux create mode 100644 gcc/config/powerpcspe/t-linux64 create mode 100644 gcc/config/powerpcspe/t-linux64bele create mode 100644 gcc/config/powerpcspe/t-linux64le create mode 100644 gcc/config/powerpcspe/t-linux64lebe create mode 100644 gcc/config/powerpcspe/t-lynx create mode 100644 gcc/config/powerpcspe/t-netbsd create mode 100644 gcc/config/powerpcspe/t-powerpcspe create mode 100644 gcc/config/powerpcspe/t-ppccomm create mode 100644 gcc/config/powerpcspe/t-ppcendian create mode 100644 gcc/config/powerpcspe/t-ppcgas create mode 100644 gcc/config/powerpcspe/t-ppcos create mode 100644 gcc/config/powerpcspe/t-rtems create mode 100644 gcc/config/powerpcspe/t-spe create mode 100644 gcc/config/powerpcspe/t-vxworks create mode 100644 gcc/config/powerpcspe/t-vxworksae create mode 100644 gcc/config/powerpcspe/t-vxworksmils create mode 100644 gcc/config/powerpcspe/t-xilinx create mode 100644 gcc/config/powerpcspe/titan.md create mode 100644 gcc/config/powerpcspe/vec_types.h create mode 100644 gcc/config/powerpcspe/vector.md create mode 100644 gcc/config/powerpcspe/vsx.md create mode 100644 gcc/config/powerpcspe/vxworks.h create mode 100644 gcc/config/powerpcspe/vxworksae.h create mode 100644 gcc/config/powerpcspe/vxworksmils.h create mode 100644 gcc/config/powerpcspe/x-aix create mode 100644 gcc/config/powerpcspe/x-darwin create mode 100644 gcc/config/powerpcspe/x-darwin64 create mode 100644 gcc/config/powerpcspe/x-linux-relax create mode 100644 gcc/config/powerpcspe/x-powerpcspe create mode 100644 gcc/config/powerpcspe/x86intrin.h create mode 100644 gcc/config/powerpcspe/xcoff.h create mode 100644 gcc/config/powerpcspe/xfpu.h create mode 100644 gcc/config/powerpcspe/xfpu.md create mode 100644 gcc/config/powerpcspe/xilinx.h create mode 100644 gcc/config/powerpcspe/xilinx.opt diff --git a/ChangeLog b/ChangeLog index b75693b1ed44..e1ab7c8e6675 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2017-05-24 Segher Boessenkool <segher@kernel.crashing.org> + + * config.gcc (powerpc*-*-*spe*): New. + (powerpc-*-eabispe*): Use ${cpu_type} instead of hardcoded pathnames. + (powerpc-*-rtems*spe*): New. + (powerpc*-*-linux*spe*): New. + (powerpc-wrs-vxworksspe): New. + (powerpc*-*-*, rs6000-*-*): Use ${cpu_type}. + (misc flags) [powerpc*-*-*, rs6000-*-*): Use ${cpu_type}. + * config.host (powerpc*-*-*spe*): New. + 2017-05-19 Martin Liska <mliska@suse.cz> * configure.ac: Add --enable-werror-always just for diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1b70fa034b26..0de253144060 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2017-05-24 Segher Boessenkool <segher@kernel.crashing.org> + + * config/powerpcspe: New port. Files are copied from the rs6000 + port, with "rs6000" in filenames replaced by "powerpcspe". + 2017-05-24 Wilco Dijkstra <wdijkstr@arm.com> PR rtl-optimization/80754 diff --git a/gcc/common/config/powerpcspe/powerpcspe-common.c b/gcc/common/config/powerpcspe/powerpcspe-common.c new file mode 100644 index 000000000000..3c0106ae555d --- /dev/null +++ b/gcc/common/config/powerpcspe/powerpcspe-common.c @@ -0,0 +1,333 @@ +/* Common hooks for IBM RS/6000. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic-core.h" +#include "tm.h" +#include "common/common-target.h" +#include "common/common-target-def.h" +#include "opts.h" +#include "flags.h" +#include "params.h" + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options rs6000_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + /* Enable -fsched-pressure for first pass instruction scheduling. */ + { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Implement TARGET_OPTION_INIT_STRUCT. */ + +static void +rs6000_option_init_struct (struct gcc_options *opts) +{ + if (DEFAULT_ABI == ABI_DARWIN) + /* The Darwin libraries never set errno, so we might as well + avoid calling them when that's the only reason we would. */ + opts->x_flag_errno_math = 0; + + /* Enable section anchors by default. */ + if (!TARGET_MACHO) + opts->x_flag_section_anchors = 1; +} + +/* Implement TARGET_OPTION_DEFAULT_PARAMS. */ + +static void +rs6000_option_default_params (void) +{ + /* Double growth factor to counter reduced min jump length. */ + set_default_param_value (PARAM_MAX_GROW_COPY_BB_INSNS, 16); +} + +/* If not otherwise specified by a target, make 'long double' equivalent to + 'double'. */ + +#ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE +#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64 +#endif + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +rs6000_handle_option (struct gcc_options *opts, struct gcc_options *opts_set, + const struct cl_decoded_option *decoded, + location_t loc) +{ + enum fpu_type_t fpu_type = FPU_NONE; + char *p, *q; + size_t code = decoded->opt_index; + const char *arg = decoded->arg; + int value = decoded->value; + + switch (code) + { + case OPT_mfull_toc: + opts->x_rs6000_isa_flags &= ~OPTION_MASK_MINIMAL_TOC; + opts->x_TARGET_NO_FP_IN_TOC = 0; + opts->x_TARGET_NO_SUM_IN_TOC = 0; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; +#ifdef TARGET_USES_SYSV4_OPT + /* Note, V.4 no longer uses a normal TOC, so make -mfull-toc, be + just the same as -mminimal-toc. */ + opts->x_rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; +#endif + break; + +#ifdef TARGET_USES_SYSV4_OPT + case OPT_mtoc: + /* Make -mtoc behave like -mminimal-toc. */ + opts->x_rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; + break; +#endif + +#ifdef TARGET_USES_AIX64_OPT + case OPT_maix64: +#else + case OPT_m64: +#endif + opts->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64; + opts->x_rs6000_isa_flags |= (~opts_set->x_rs6000_isa_flags + & OPTION_MASK_PPC_GFXOPT); + opts_set->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64; + break; + +#ifdef TARGET_USES_AIX64_OPT + case OPT_maix32: +#else + case OPT_m32: +#endif + opts->x_rs6000_isa_flags &= ~OPTION_MASK_POWERPC64; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_POWERPC64; + break; + + case OPT_mminimal_toc: + if (value == 1) + { + opts->x_TARGET_NO_FP_IN_TOC = 0; + opts->x_TARGET_NO_SUM_IN_TOC = 0; + } + break; + + case OPT_mpowerpc_gpopt: + case OPT_mpowerpc_gfxopt: + break; + + case OPT_mdebug_: + p = ASTRDUP (arg); + opts->x_rs6000_debug = 0; + + while ((q = strtok (p, ",")) != NULL) + { + unsigned mask = 0; + bool invert; + + p = NULL; + if (*q == '!') + { + invert = true; + q++; + } + else + invert = false; + + if (! strcmp (q, "all")) + mask = MASK_DEBUG_ALL; + else if (! strcmp (q, "stack")) + mask = MASK_DEBUG_STACK; + else if (! strcmp (q, "arg")) + mask = MASK_DEBUG_ARG; + else if (! strcmp (q, "reg")) + mask = MASK_DEBUG_REG; + else if (! strcmp (q, "addr")) + mask = MASK_DEBUG_ADDR; + else if (! strcmp (q, "cost")) + mask = MASK_DEBUG_COST; + else if (! strcmp (q, "target")) + mask = MASK_DEBUG_TARGET; + else if (! strcmp (q, "builtin")) + mask = MASK_DEBUG_BUILTIN; + else + error_at (loc, "unknown -mdebug-%s switch", q); + + if (invert) + opts->x_rs6000_debug &= ~mask; + else + opts->x_rs6000_debug |= mask; + } + break; + +#ifdef TARGET_USES_SYSV4_OPT + case OPT_mrelocatable: + if (value == 1) + { + opts->x_rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; + opts->x_TARGET_NO_FP_IN_TOC = 1; + } + break; + + case OPT_mrelocatable_lib: + if (value == 1) + { + opts->x_rs6000_isa_flags |= (OPTION_MASK_RELOCATABLE + | OPTION_MASK_MINIMAL_TOC); + opts_set->x_rs6000_isa_flags |= (OPTION_MASK_RELOCATABLE + | OPTION_MASK_MINIMAL_TOC); + opts->x_TARGET_NO_FP_IN_TOC = 1; + } + else + { + opts->x_rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_RELOCATABLE; + } + break; +#endif + + case OPT_mabi_altivec: + /* Enabling the AltiVec ABI turns off the SPE ABI. */ + opts->x_rs6000_spe_abi = 0; + break; + + case OPT_mabi_spe: + opts->x_rs6000_altivec_abi = 0; + break; + + case OPT_mlong_double_: + if (value != 64 && value != 128) + { + error_at (loc, "unknown switch -mlong-double-%s", arg); + opts->x_rs6000_long_double_type_size + = RS6000_DEFAULT_LONG_DOUBLE_SIZE; + return false; + } + break; + + case OPT_msingle_float: + if (!TARGET_SINGLE_FPU) + warning_at (loc, 0, + "-msingle-float option equivalent to -mhard-float"); + /* -msingle-float implies -mno-double-float and TARGET_HARD_FLOAT. */ + opts->x_rs6000_double_float = 0; + opts->x_rs6000_isa_flags &= ~OPTION_MASK_SOFT_FLOAT; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; + break; + + case OPT_mdouble_float: + /* -mdouble-float implies -msingle-float and TARGET_HARD_FLOAT. */ + opts->x_rs6000_single_float = 1; + opts->x_rs6000_isa_flags &= ~OPTION_MASK_SOFT_FLOAT; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; + break; + + case OPT_msimple_fpu: + if (!TARGET_SINGLE_FPU) + warning_at (loc, 0, "-msimple-fpu option ignored"); + break; + + case OPT_mhard_float: + /* -mhard_float implies -msingle-float and -mdouble-float. */ + opts->x_rs6000_single_float = opts->x_rs6000_double_float = 1; + break; + + case OPT_msoft_float: + /* -msoft_float implies -mnosingle-float and -mnodouble-float. */ + opts->x_rs6000_single_float = opts->x_rs6000_double_float = 0; + break; + + case OPT_mfpu_: + fpu_type = (enum fpu_type_t) value; + if (fpu_type != FPU_NONE) + { + /* If -mfpu is not none, then turn off SOFT_FLOAT, turn on + HARD_FLOAT. */ + opts->x_rs6000_isa_flags &= ~OPTION_MASK_SOFT_FLOAT; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; + opts->x_rs6000_xilinx_fpu = 1; + if (fpu_type == FPU_SF_LITE || fpu_type == FPU_SF_FULL) + opts->x_rs6000_single_float = 1; + if (fpu_type == FPU_DF_LITE || fpu_type == FPU_DF_FULL) + opts->x_rs6000_single_float = opts->x_rs6000_double_float = 1; + if (fpu_type == FPU_SF_LITE || fpu_type == FPU_DF_LITE) + opts->x_rs6000_simple_fpu = 1; + } + else + { + /* -mfpu=none is equivalent to -msoft-float. */ + opts->x_rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; + opts_set->x_rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; + opts->x_rs6000_single_float = opts->x_rs6000_double_float = 0; + } + break; + + case OPT_mrecip: + opts->x_rs6000_recip_name = (value) ? "default" : "none"; + break; + } + return true; +} + +/* -fsplit-stack uses a field in the TCB, available with glibc-2.19. + We also allow 2.18 because alignment padding guarantees that the + space is available there too. */ + +static bool +rs6000_supports_split_stack (bool report, + struct gcc_options *opts ATTRIBUTE_UNUSED) +{ +#ifndef TARGET_GLIBC_MAJOR +#define TARGET_GLIBC_MAJOR 0 +#endif +#ifndef TARGET_GLIBC_MINOR +#define TARGET_GLIBC_MINOR 0 +#endif + /* Note: Can't test DEFAULT_ABI here, it isn't set until later. */ + if (TARGET_GLIBC_MAJOR * 1000 + TARGET_GLIBC_MINOR >= 2018 + && TARGET_64BIT + && TARGET_ELF) + return true; + + if (report) + error ("%<-fsplit-stack%> currently only supported on PowerPC64 GNU/Linux with glibc-2.18 or later"); + return false; +} + +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION rs6000_handle_option + +#undef TARGET_OPTION_INIT_STRUCT +#define TARGET_OPTION_INIT_STRUCT rs6000_option_init_struct + +#undef TARGET_OPTION_DEFAULT_PARAMS +#define TARGET_OPTION_DEFAULT_PARAMS rs6000_option_default_params + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE rs6000_option_optimization_table + +#undef TARGET_SUPPORTS_SPLIT_STACK +#define TARGET_SUPPORTS_SPLIT_STACK rs6000_supports_split_stack + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index 8ce94d01ee06..f55dcaa335bc 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -442,6 +442,16 @@ nios2-*-*) nvptx-*-*) cpu_type=nvptx ;; +powerpc*-*-*spe*) + cpu_type=powerpcspe + extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h htmintrin.h htmxlintrin.h" + case x$with_cpu in + xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500) + cpu_is_64bit=yes + ;; + esac + extra_options="${extra_options} g.opt fused-madd.opt powerpcspe/powerpcspe-tables.opt" + ;; powerpc*-*-*) cpu_type=rs6000 extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h" @@ -2372,9 +2382,9 @@ powerpc-*-netbsd*) extra_options="${extra_options} rs6000/sysv4.opt" ;; powerpc-*-eabispe*) - tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/e500.h rs6000/eabispe.h" - extra_options="${extra_options} rs6000/sysv4.opt" - tmake_file="rs6000/t-spe rs6000/t-ppccomm" + tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h ${cpu_type}/sysv4.h ${cpu_type}/eabi.h ${cpu_type}/e500.h ${cpu_type}/eabispe.h" + extra_options="${extra_options} ${cpu_type}/sysv4.opt" + tmake_file="${cpu_type}/t-spe ${cpu_type}/t-ppccomm" use_gcc_stdint=wrap ;; powerpc-*-eabisimaltivec*) @@ -2412,11 +2422,27 @@ powerpc-*-eabi*) tmake_file="rs6000/t-fprules rs6000/t-ppcgas rs6000/t-ppccomm" use_gcc_stdint=wrap ;; +powerpc-*-rtems*spe*) + tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h powerpcspe/sysv4.h powerpcspe/eabi.h powerpcspe/e500.h powerpcspe/rtems.h rtems.h" + extra_options="${extra_options} powerpcspe/sysv4.opt" + tmake_file="${tmake_file} powerpcspe/t-fprules powerpcspe/t-rtems powerpcspe/t-ppccomm" + ;; powerpc-*-rtems*) tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h newlib-stdint.h rs6000/sysv4.h rs6000/eabi.h rs6000/e500.h rs6000/rtems.h rtems.h" extra_options="${extra_options} rs6000/sysv4.opt" tmake_file="${tmake_file} rs6000/t-fprules rs6000/t-rtems rs6000/t-ppccomm" ;; +powerpc*-*-linux*spe*) + tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h freebsd-spec.h powerpcspe/sysv4.h" + extra_options="${extra_options} powerpcspe/sysv4.opt" + tmake_file="${tmake_file} powerpcspe/t-fprules powerpcspe/t-ppccomm" + extra_objs="$extra_objs powerpcspe-linux.o" + maybe_biarch= + tm_file="${tm_file} powerpcspe/linux.h glibc-stdint.h" + tmake_file="${tmake_file} powerpcspe/t-ppcos powerpcspe/t-linux" + tm_file="${tm_file} powerpcspe/linuxspe.h powerpcspe/e500.h" + default_gnu_indirect_function=yes + ;; powerpc*-*-linux*) tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h freebsd-spec.h rs6000/sysv4.h" extra_options="${extra_options} rs6000/sysv4.opt" @@ -2504,6 +2530,13 @@ powerpc*-*-linux*) ;; esac ;; +powerpc-wrs-vxworksspe) + tm_file="${tm_file} elfos.h freebsd-spec.h powerpcspe/sysv4.h" + tmake_file="${tmake_file} powerpcspe/t-fprules powerpcspe/t-ppccomm powerpcspe/t-vxworks" + extra_options="${extra_options} powerpcspe/sysv4.opt" + extra_headers=ppc-asm.h + tm_file="${tm_file} vx-common.h vxworks.h powerpcspe/vxworks.h powerpcspe/e500.h" + ;; powerpc-wrs-vxworks|powerpc-wrs-vxworksae|powerpc-wrs-vxworksmils) tm_file="${tm_file} elfos.h freebsd-spec.h rs6000/sysv4.h" tmake_file="${tmake_file} rs6000/t-fprules rs6000/t-ppccomm rs6000/t-vxworks" @@ -3060,7 +3093,7 @@ i[34567]86-*-* | x86_64-*-*) tmake_file="${tmake_file} i386/t-gmm_malloc i386/t-i386" ;; powerpc*-*-* | rs6000-*-*) - tm_file="${tm_file} rs6000/option-defaults.h" + tm_file="${tm_file} ${cpu_type}/option-defaults.h" esac # Build mkoffload tool @@ -4554,10 +4587,10 @@ case ${target} in then target_cpu_default2="\\\"$with_cpu\\\"" fi - out_file=rs6000/rs6000.c - c_target_objs="${c_target_objs} rs6000-c.o" - cxx_target_objs="${cxx_target_objs} rs6000-c.o" - tmake_file="rs6000/t-rs6000 ${tmake_file}" + out_file="${cpu_type}/${cpu_type}.c" + c_target_objs="${c_target_objs} ${cpu_type}-c.o" + cxx_target_objs="${cxx_target_objs} ${cpu_type}-c.o" + tmake_file="${cpu_type}/t-${cpu_type} ${tmake_file}" ;; sh[123456ble]*-*-* | sh-*-*) diff --git a/gcc/config.host b/gcc/config.host index 5e2db5327e30..48120ae1c89c 100644 --- a/gcc/config.host +++ b/gcc/config.host @@ -144,6 +144,10 @@ case ${host} in rs6000-*-* \ | powerpc*-*-* ) case ${target} in + powerpc*-*-*spe*) + host_extra_gcc_objs="driver-powerpcspe.o" + host_xmake_file="${host_xmake_file} powerpcspe/x-powerpcspe" + ;; rs6000-*-* \ | powerpc*-*-* ) host_extra_gcc_objs="driver-rs6000.o" diff --git a/gcc/config/powerpcspe/40x.md b/gcc/config/powerpcspe/40x.md new file mode 100644 index 000000000000..d832a90c4098 --- /dev/null +++ b/gcc/config/powerpcspe/40x.md @@ -0,0 +1,124 @@ +;; Scheduling description for IBM PowerPC 403 and PowerPC 405 processors. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppc40x,ppc40xiu") +(define_cpu_unit "bpu_40x,fpu_405" "ppc40x") +(define_cpu_unit "iu_40x" "ppc40xiu") + +;; PPC401 / PPC403 / PPC405 32-bit integer only IU BPU +;; Embedded PowerPC controller +;; In-order execution +;; Max issue two insns/cycle (includes one branch) +(define_insn_reservation "ppc403-load" 2 + (and (eq_attr "type" "load,load_l,store_c,sync") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-store" 2 + (and (eq_attr "type" "store") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x,iu_40x") + +(define_insn_reservation "ppc403-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x,iu_40x,iu_40x") + +(define_insn_reservation "ppc403-compare" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x,nothing,bpu_40x") + +(define_insn_reservation "ppc403-imul" 4 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "ppc403")) + "iu_40x*4") + +(define_insn_reservation "ppc405-imul" 5 + (and (eq_attr "type" "mul") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc405")) + "iu_40x*4") + +(define_insn_reservation "ppc405-imul2" 3 + (and (eq_attr "type" "mul") + (eq_attr "size" "16") + (eq_attr "cpu" "ppc405")) + "iu_40x*2") + +(define_insn_reservation "ppc405-imul3" 2 + (and (ior (eq_attr "type" "halfmul") + (and (eq_attr "type" "mul") + (eq_attr "size" "8"))) + (eq_attr "cpu" "ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-idiv" 33 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x*33") + +(define_insn_reservation "ppc403-mfcr" 2 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-mfjmpr" 2 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppc403,ppc405")) + "bpu_40x") + +(define_insn_reservation "ppc403-cr" 2 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc403,ppc405")) + "bpu_40x") + +(define_insn_reservation "ppc405-float" 11 + (and (eq_attr "type" "fpload,fpstore,fpcompare,fp,fpsimple,dmul,sdiv,ddiv") + (eq_attr "cpu" "ppc405")) + "fpu_405*10") diff --git a/gcc/config/powerpcspe/440.md b/gcc/config/powerpcspe/440.md new file mode 100644 index 000000000000..80964e8bbac0 --- /dev/null +++ b/gcc/config/powerpcspe/440.md @@ -0,0 +1,138 @@ +;; Scheduling description for IBM PowerPC 440 processor. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; PPC440 Embedded PowerPC controller +;; dual issue +;; i_pipe - complex integer / compare / branch +;; j_pipe - simple integer arithmetic +;; l_pipe - load-store +;; f_pipe - floating point arithmetic + +(define_automaton "ppc440_core,ppc440_apu") +(define_cpu_unit "ppc440_i_pipe,ppc440_j_pipe,ppc440_l_pipe" "ppc440_core") +(define_cpu_unit "ppc440_f_pipe" "ppc440_apu") +(define_cpu_unit "ppc440_issue_0,ppc440_issue_1" "ppc440_core") + +(define_reservation "ppc440_issue" "ppc440_issue_0|ppc440_issue_1") + + +(define_insn_reservation "ppc440-load" 3 + (and (eq_attr "type" "load,load_l,store_c,sync") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_l_pipe") + +(define_insn_reservation "ppc440-store" 3 + (and (eq_attr "type" "store") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_l_pipe") + +(define_insn_reservation "ppc440-fpload" 4 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_l_pipe") + +(define_insn_reservation "ppc440-fpstore" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_l_pipe") + +(define_insn_reservation "ppc440-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe|ppc440_j_pipe") + +(define_insn_reservation "ppc440-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc440")) + "ppc440_issue_0+ppc440_issue_1,\ + ppc440_i_pipe|ppc440_j_pipe,ppc440_i_pipe|ppc440_j_pipe") + +(define_insn_reservation "ppc440-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc440")) + "ppc440_issue_0+ppc440_issue_1,ppc440_i_pipe|ppc440_j_pipe,\ + ppc440_i_pipe|ppc440_j_pipe,ppc440_i_pipe|ppc440_j_pipe") + +(define_insn_reservation "ppc440-imul" 3 + (and (eq_attr "type" "mul") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe") + +(define_insn_reservation "ppc440-imul2" 2 + (and (ior (eq_attr "type" "halfmul") + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16"))) + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe") + +(define_insn_reservation "ppc440-idiv" 34 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe*33") + +(define_insn_reservation "ppc440-branch" 1 + (and (eq_attr "type" "branch,jmpreg,isync") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe") + +(define_insn_reservation "ppc440-compare" 2 + (and (ior (eq_attr "type" "cmp,cr_logical,delayed_cr,mfcr") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe") + +(define_insn_reservation "ppc440-fpcompare" 3 ; 2 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_f_pipe+ppc440_i_pipe") + +(define_insn_reservation "ppc440-fp" 5 + (and (eq_attr "type" "fp,fpsimple,dmul") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_f_pipe") + +(define_insn_reservation "ppc440-sdiv" 19 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_f_pipe*15") + +(define_insn_reservation "ppc440-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_f_pipe*29") + +(define_insn_reservation "ppc440-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe") + +(define_insn_reservation "ppc440-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe") + +(define_insn_reservation "ppc440-mfjmpr" 2 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppc440")) + "ppc440_issue,ppc440_i_pipe") + diff --git a/gcc/config/powerpcspe/476.h b/gcc/config/powerpcspe/476.h new file mode 100644 index 000000000000..f22e399b6c5f --- /dev/null +++ b/gcc/config/powerpcspe/476.h @@ -0,0 +1,32 @@ +/* Enable IBM PowerPC 476 support. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + Contributed by Peter Bergner (bergner@vnet.ibm.com) + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_LINK_STACK +#define TARGET_LINK_STACK (rs6000_link_stack) + +#undef SET_TARGET_LINK_STACK +#define SET_TARGET_LINK_STACK(X) do { TARGET_LINK_STACK = (X); } while (0) + +#undef TARGET_ASM_CODE_END +#define TARGET_ASM_CODE_END rs6000_code_end diff --git a/gcc/config/powerpcspe/476.md b/gcc/config/powerpcspe/476.md new file mode 100644 index 000000000000..aefead25adac --- /dev/null +++ b/gcc/config/powerpcspe/476.md @@ -0,0 +1,143 @@ +;; Scheduling description for IBM PowerPC 476 processor. +;; Copyright (C) 2009-2017 Free Software Foundation, Inc. +;; Contributed by Peter Bergner (bergner@vnet.ibm.com). +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; PPC476 Embedded PowerPC controller +;; 3 issue (476) / 4 issue (476fp) +;; +;; i_pipe - complex integer / compare +;; lj_pipe - load-store / simple integer arithmetic +;; b_pipe - branch pipe +;; f_pipe - floating point arithmetic + +(define_automaton "ppc476_core,ppc476_apu") + +(define_cpu_unit "ppc476_i_pipe,ppc476_lj_pipe,ppc476_b_pipe" "ppc476_core") +(define_cpu_unit "ppc476_issue_fp,ppc476_f_pipe" "ppc476_apu") +(define_cpu_unit "ppc476_issue_0,ppc476_issue_1,ppc476_issue_2" "ppc476_core") + +(define_reservation "ppc476_issue" "ppc476_issue_0|ppc476_issue_1|ppc476_issue_2") +(define_reservation "ppc476_issue2" "ppc476_issue_0+ppc476_issue_1\ + |ppc476_issue_0+ppc476_issue_2\ + |ppc476_issue_1+ppc476_issue_2") +(define_reservation "ppc476_issue3" "ppc476_issue_0+ppc476_issue_1+ppc476_issue_2") + +(define_insn_reservation "ppc476-load" 4 + (and (eq_attr "type" "load,load_l,store_c,sync") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_lj_pipe") + +(define_insn_reservation "ppc476-store" 4 + (and (eq_attr "type" "store") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_lj_pipe") + +(define_insn_reservation "ppc476-fpload" 4 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_lj_pipe") + +(define_insn_reservation "ppc476-fpstore" 4 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_lj_pipe") + +(define_insn_reservation "ppc476-simple-integer" 1 + (and (ior (eq_attr "type" "integer,insert") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_i_pipe|ppc476_lj_pipe") + +(define_insn_reservation "ppc476-complex-integer" 1 + (and (eq_attr "type" "cmp,cr_logical,delayed_cr,cntlz,isel,isync,sync,trap,popcnt") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_i_pipe") + +(define_insn_reservation "ppc476-compare" 4 + (and (ior (eq_attr "type" "mfcr,mfcrf,mtcr,mfjmpr,mtjmpr") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_i_pipe") + +(define_insn_reservation "ppc476-imul" 4 + (and (eq_attr "type" "mul,halfmul") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_i_pipe") + +(define_insn_reservation "ppc476-idiv" 11 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_i_pipe*11") + +(define_insn_reservation "ppc476-branch" 1 + (and (eq_attr "type" "branch,jmpreg") + (eq_attr "cpu" "ppc476")) + "ppc476_issue,\ + ppc476_b_pipe") + +(define_insn_reservation "ppc476-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc476")) + "ppc476_issue2,\ + ppc476_i_pipe|ppc476_lj_pipe,\ + ppc476_i_pipe|ppc476_lj_pipe") + +(define_insn_reservation "ppc476-three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc476")) + "ppc476_issue3,\ + ppc476_i_pipe|ppc476_lj_pipe,\ + ppc476_i_pipe|ppc476_lj_pipe,\ + ppc476_i_pipe|ppc476_lj_pipe") + +(define_insn_reservation "ppc476-fpcompare" 6 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc476")) + "ppc476_issue+ppc476_issue_fp,\ + ppc476_f_pipe+ppc476_i_pipe") + +(define_insn_reservation "ppc476-fp" 6 + (and (eq_attr "type" "fp,fpsimple,dmul") + (eq_attr "cpu" "ppc476")) + "ppc476_issue_fp,\ + ppc476_f_pipe") + +(define_insn_reservation "ppc476-sdiv" 19 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc476")) + "ppc476_issue_fp, + ppc476_f_pipe*19") + +(define_insn_reservation "ppc476-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc476")) + "ppc476_issue_fp,\ + ppc476_f_pipe*33") + diff --git a/gcc/config/powerpcspe/476.opt b/gcc/config/powerpcspe/476.opt new file mode 100644 index 000000000000..c681d2c03a52 --- /dev/null +++ b/gcc/config/powerpcspe/476.opt @@ -0,0 +1,24 @@ +; IBM PowerPC 476 options. +; +; Copyright (C) 2011-2017 Free Software Foundation, Inc. +; Contributed by Peter Bergner (bergner@vnet.ibm.com) +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +mpreserve-link-stack +Target Var(rs6000_link_stack) Init(-1) Save +Preserve the PowerPC 476's link stack by matching up a blr with the bcl/bl insns used for GOT accesses. diff --git a/gcc/config/powerpcspe/601.md b/gcc/config/powerpcspe/601.md new file mode 100644 index 000000000000..792212602298 --- /dev/null +++ b/gcc/config/powerpcspe/601.md @@ -0,0 +1,137 @@ +;; Scheduling description for PowerPC 601 processor. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppc601,ppc601fp") +(define_cpu_unit "iu_ppc601" "ppc601") +(define_cpu_unit "fpu_ppc601" "ppc601fp") +(define_cpu_unit "bpu_ppc601" "ppc601") + +;; PPC601 32-bit IU, FPU, BPU + +(define_insn_reservation "ppc601-load" 2 + (and (eq_attr "type" "load,load_l,store_c,sync") + (eq_attr "cpu" "ppc601")) + "iu_ppc601") + +(define_insn_reservation "ppc601-store" 2 + (and (eq_attr "type" "store") + (eq_attr "cpu" "ppc601")) + "iu_ppc601") + +(define_insn_reservation "ppc601-fpload" 3 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppc601")) + "iu_ppc601") + +(define_insn_reservation "ppc601-fpstore" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppc601")) + "iu_ppc601+fpu_ppc601") + +(define_insn_reservation "ppc601-integer" 1 + (and (ior (eq_attr "type" "integer,add,insert,trap,cntlz,isel") + (and (eq_attr "type" "shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc601")) + "iu_ppc601") + +(define_insn_reservation "ppc601-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc601")) + "iu_ppc601,iu_ppc601") + +(define_insn_reservation "ppc601-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc601")) + "iu_ppc601,iu_ppc601,iu_ppc601") + +(define_insn_reservation "ppc601-imul" 5 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "ppc601")) + "iu_ppc601*5") + +(define_insn_reservation "ppc601-idiv" 36 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc601")) + "iu_ppc601*36") + +; compare executes on integer unit, but feeds insns which +; execute on the branch unit. +(define_insn_reservation "ppc601-compare" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc601")) + "iu_ppc601,nothing,bpu_ppc601") + +(define_insn_reservation "ppc601-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc601")) + "(fpu_ppc601+iu_ppc601*2),nothing*2,bpu_ppc601") + +(define_insn_reservation "ppc601-fp" 4 + (and (eq_attr "type" "fp,fpsimple") + (eq_attr "cpu" "ppc601")) + "fpu_ppc601") + +(define_insn_reservation "ppc601-dmul" 5 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc601")) + "fpu_ppc601*2") + +(define_insn_reservation "ppc601-sdiv" 17 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc601")) + "fpu_ppc601*17") + +(define_insn_reservation "ppc601-ddiv" 31 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc601")) + "fpu_ppc601*31") + +(define_insn_reservation "ppc601-mfcr" 2 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppc601")) + "iu_ppc601,bpu_ppc601") + +(define_insn_reservation "ppc601-mtcr" 4 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppc601")) + "iu_ppc601,bpu_ppc601") + +(define_insn_reservation "ppc601-crlogical" 4 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc601")) + "bpu_ppc601") + +(define_insn_reservation "ppc601-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc601")) + "iu_ppc601,bpu_ppc601") + +(define_insn_reservation "ppc601-mfjmpr" 2 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppc601")) + "iu_ppc601,bpu_ppc601") + +(define_insn_reservation "ppc601-branch" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppc601")) + "bpu_ppc601") + diff --git a/gcc/config/powerpcspe/603.md b/gcc/config/powerpcspe/603.md new file mode 100644 index 000000000000..853746eb9125 --- /dev/null +++ b/gcc/config/powerpcspe/603.md @@ -0,0 +1,147 @@ +;; Scheduling description for PowerPC 603 processor. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppc603,ppc603fp") +(define_cpu_unit "iu_603" "ppc603") +(define_cpu_unit "fpu_603" "ppc603fp") +(define_cpu_unit "lsu_603,bpu_603,sru_603" "ppc603") + +;; PPC603/PPC603e 32-bit IU, LSU, FPU, BPU, SRU +;; Max issue 3 insns/clock cycle (includes 1 branch) + +;; Branches go straight to the BPU. All other insns are handled +;; by a dispatch unit which can issue a max of 2 insns per cycle. + +;; The PPC603e user's manual recommends that to reduce branch mispredictions, +;; the insn that sets CR bits should be separated from the branch insn +;; that evaluates them; separation by more than 9 insns ensures that the CR +;; bits will be immediately available for execution. +;; This could be artificially achieved by exaggerating the latency of +;; compare insns but at the expense of a poorer schedule. + +;; CR insns get executed in the SRU. Not modelled. + +(define_insn_reservation "ppc603-load" 2 + (and (eq_attr "type" "load,load_l") + (eq_attr "cpu" "ppc603")) + "lsu_603") + +(define_insn_reservation "ppc603-store" 2 + (and (eq_attr "type" "store,fpstore") + (eq_attr "cpu" "ppc603")) + "lsu_603*2") + +(define_insn_reservation "ppc603-fpload" 2 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppc603")) + "lsu_603") + +(define_insn_reservation "ppc603-storec" 8 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "ppc603")) + "lsu_603") + +(define_insn_reservation "ppc603-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc603")) + "iu_603") + +(define_insn_reservation "ppc603-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc603")) + "iu_603,iu_603") + +(define_insn_reservation "ppc603-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc603")) + "iu_603,iu_603,iu_603") + +; This takes 2 or 3 cycles +(define_insn_reservation "ppc603-imul" 3 + (and (eq_attr "type" "mul") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc603")) + "iu_603*2") + +(define_insn_reservation "ppc603-imul2" 2 + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16") + (eq_attr "cpu" "ppc603")) + "iu_603*2") + +(define_insn_reservation "ppc603-idiv" 37 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc603")) + "iu_603*37") + +(define_insn_reservation "ppc603-compare" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc603")) + "iu_603,nothing,bpu_603") + +(define_insn_reservation "ppc603-fpcompare" 3 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc603")) + "(fpu_603+iu_603*2),bpu_603") + +(define_insn_reservation "ppc603-fp" 3 + (and (eq_attr "type" "fp,fpsimple") + (eq_attr "cpu" "ppc603")) + "fpu_603") + +(define_insn_reservation "ppc603-dmul" 4 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc603")) + "fpu_603*2") + +; Divides are not pipelined +(define_insn_reservation "ppc603-sdiv" 18 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc603")) + "fpu_603*18") + +(define_insn_reservation "ppc603-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc603")) + "fpu_603*33") + +(define_insn_reservation "ppc603-crlogical" 2 + (and (eq_attr "type" "cr_logical,delayed_cr,mfcr,mtcr") + (eq_attr "cpu" "ppc603")) + "sru_603") + +(define_insn_reservation "ppc603-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc603")) + "sru_603") + +(define_insn_reservation "ppc603-mfjmpr" 2 + (and (eq_attr "type" "mfjmpr,isync,sync") + (eq_attr "cpu" "ppc603")) + "sru_603") + +(define_insn_reservation "ppc603-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "ppc603")) + "bpu_603") + diff --git a/gcc/config/powerpcspe/6xx.md b/gcc/config/powerpcspe/6xx.md new file mode 100644 index 000000000000..64e5ebb7d6f2 --- /dev/null +++ b/gcc/config/powerpcspe/6xx.md @@ -0,0 +1,284 @@ +;; Scheduling description for PowerPC 604, PowerPC 604e, PowerPC 620, +;; and PowerPC 630 processors. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppc6xx,ppc6xxfp,ppc6xxfp2") +(define_cpu_unit "iu1_6xx,iu2_6xx,mciu_6xx" "ppc6xx") +(define_cpu_unit "fpu_6xx" "ppc6xxfp") +(define_cpu_unit "fpu1_6xx,fpu2_6xx" "ppc6xxfp2") +(define_cpu_unit "lsu_6xx,bpu_6xx,cru_6xx" "ppc6xx") + +;; PPC604 32-bit 2xSCIU, MCIU, LSU, FPU, BPU +;; PPC604e 32-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU +;; MCIU used for imul/idiv and moves from/to spr +;; LSU 2 stage pipelined +;; FPU 3 stage pipelined +;; Max issue 4 insns/clock cycle + +;; PPC604e is PPC604 with larger caches and a CRU. In the 604 +;; the CR logical operations are handled in the BPU. +;; In the 604e, the CRU shares bus with BPU so only one condition +;; register or branch insn can be issued per clock. Not modelled. + +;; PPC620 64-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU +;; PPC630 64-bit 2xSCIU, MCIU, LSU, 2xFPU, BPU, CRU +;; Max issue 4 insns/clock cycle +;; Out-of-order execution, in-order completion + +;; No following instruction can dispatch in the same cycle as a branch +;; instruction. Not modelled. This is no problem if RCSP is not +;; enabled since the scheduler stops a schedule when it gets to a branch. + +;; Four insns can be dispatched per cycle. + +(define_insn_reservation "ppc604-load" 2 + (and (eq_attr "type" "load") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "lsu_6xx") + +(define_insn_reservation "ppc604-fpload" 3 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "lsu_6xx") + +(define_insn_reservation "ppc604-store" 3 + (and (eq_attr "type" "store,fpstore") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "lsu_6xx") + +(define_insn_reservation "ppc604-llsc" 3 + (and (eq_attr "type" "load_l,store_c") + (eq_attr "cpu" "ppc604,ppc604e")) + "lsu_6xx") + +(define_insn_reservation "ppc630-llsc" 4 + (and (eq_attr "type" "load_l,store_c") + (eq_attr "cpu" "ppc620,ppc630")) + "lsu_6xx") + +(define_insn_reservation "ppc604-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "iu1_6xx|iu2_6xx") + +(define_insn_reservation "ppc604-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx") + +(define_insn_reservation "ppc604-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx") + +(define_insn_reservation "ppc604-imul" 4 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "ppc604")) + "mciu_6xx*2") + +(define_insn_reservation "ppc604e-imul" 2 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "ppc604e")) + "mciu_6xx") + +(define_insn_reservation "ppc620-imul" 5 + (and (eq_attr "type" "mul") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*3") + +(define_insn_reservation "ppc620-imul2" 4 + (and (eq_attr "type" "mul") + (eq_attr "size" "16") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*3") + +(define_insn_reservation "ppc620-imul3" 3 + (and (eq_attr "type" "mul") + (eq_attr "size" "8") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*3") + +(define_insn_reservation "ppc620-lmul" 7 + (and (eq_attr "type" "mul") + (eq_attr "size" "64") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*5") + +(define_insn_reservation "ppc604-idiv" 20 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc604,ppc604e")) + "mciu_6xx*19") + +(define_insn_reservation "ppc620-idiv" 37 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc620")) + "mciu_6xx*36") + +(define_insn_reservation "ppc630-idiv" 21 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc630")) + "mciu_6xx*20") + +(define_insn_reservation "ppc620-ldiv" 37 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*36") + +(define_insn_reservation "ppc604-compare" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "(iu1_6xx|iu2_6xx)") + +; FPU PPC604{,e},PPC620 +(define_insn_reservation "ppc604-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx") + +(define_insn_reservation "ppc604-fp" 3 + (and (eq_attr "type" "fp,fpsimple") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx") + +(define_insn_reservation "ppc604-dmul" 3 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx") + +; Divides are not pipelined +(define_insn_reservation "ppc604-sdiv" 18 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx*18") + +(define_insn_reservation "ppc604-ddiv" 32 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx*32") + +(define_insn_reservation "ppc620-ssqrt" 31 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppc620")) + "fpu_6xx*31") + +(define_insn_reservation "ppc620-dsqrt" 31 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppc620")) + "fpu_6xx*31") + + +; 2xFPU PPC630 +(define_insn_reservation "ppc630-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx|fpu2_6xx") + +(define_insn_reservation "ppc630-fp" 3 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx|fpu2_6xx") + +(define_insn_reservation "ppc630-sdiv" 17 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*17|fpu2_6xx*17") + +(define_insn_reservation "ppc630-ddiv" 21 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*21|fpu2_6xx*21") + +(define_insn_reservation "ppc630-ssqrt" 18 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*18|fpu2_6xx*18") + +(define_insn_reservation "ppc630-dsqrt" 25 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*25|fpu2_6xx*25") + +(define_insn_reservation "ppc604-mfcr" 3 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "mciu_6xx") + +(define_insn_reservation "ppc604-mtcr" 2 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "iu1_6xx|iu2_6xx") + +(define_insn_reservation "ppc604-crlogical" 2 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc604")) + "bpu_6xx") + +(define_insn_reservation "ppc604e-crlogical" 2 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc604e,ppc620,ppc630")) + "cru_6xx") + +(define_insn_reservation "ppc604-mtjmpr" 2 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "mciu_6xx") + +(define_insn_reservation "ppc604-mfjmpr" 3 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "mciu_6xx") + +(define_insn_reservation "ppc630-mfjmpr" 2 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppc630")) + "mciu_6xx") + +(define_insn_reservation "ppc604-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "bpu_6xx") + +(define_insn_reservation "ppc604-isync" 0 + (and (eq_attr "type" "isync") + (eq_attr "cpu" "ppc604,ppc604e")) + "bpu_6xx") + +(define_insn_reservation "ppc630-isync" 6 + (and (eq_attr "type" "isync") + (eq_attr "cpu" "ppc620,ppc630")) + "bpu_6xx") + +(define_insn_reservation "ppc604-sync" 35 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "ppc604,ppc604e")) + "lsu_6xx") + +(define_insn_reservation "ppc630-sync" 26 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "ppc620,ppc630")) + "lsu_6xx") + diff --git a/gcc/config/powerpcspe/7450.md b/gcc/config/powerpcspe/7450.md new file mode 100644 index 000000000000..8617708fd8bd --- /dev/null +++ b/gcc/config/powerpcspe/7450.md @@ -0,0 +1,188 @@ +;; Scheduling description for Motorola PowerPC 7450 processor. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppc7450,ppc7450mciu,ppc7450fp,ppc7450vec") +(define_cpu_unit "iu1_7450,iu2_7450,iu3_7450" "ppc7450") +(define_cpu_unit "mciu_7450" "ppc7450mciu") +(define_cpu_unit "fpu_7450" "ppc7450fp") +(define_cpu_unit "lsu_7450,bpu_7450" "ppc7450") +(define_cpu_unit "du1_7450,du2_7450,du3_7450" "ppc7450") +(define_cpu_unit "vecsmpl_7450,veccmplx_7450,vecflt_7450,vecperm_7450" "ppc7450vec") +(define_cpu_unit "vdu1_7450,vdu2_7450" "ppc7450vec") + + +;; PPC7450 32-bit 3xIU, MCIU, LSU, SRU, FPU, BPU, 4xVEC +;; IU1,IU2,IU3 can perform all integer operations +;; MCIU performs imul and idiv, cr logical, SPR moves +;; LSU 2 stage pipelined +;; FPU 3 stage pipelined +;; It also has 4 vector units, one for each type of vector instruction. +;; However, we can only dispatch 2 instructions per cycle. +;; Max issue 3 insns/clock cycle (includes 1 branch) +;; In-order execution + +;; Branches go straight to the BPU. All other insns are handled +;; by a dispatch unit which can issue a max of 3 insns per cycle. +(define_reservation "ppc7450_du" "du1_7450|du2_7450|du3_7450") +(define_reservation "ppc7450_vec_du" "vdu1_7450|vdu2_7450") + +(define_insn_reservation "ppc7450-load" 3 + (and (eq_attr "type" "load,vecload") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-store" 3 + (and (eq_attr "type" "store,vecstore") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-fpload" 4 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-fpstore" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450*3") + +(define_insn_reservation "ppc7450-llsc" 3 + (and (eq_attr "type" "load_l,store_c") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-sync" 35 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,iu1_7450|iu2_7450|iu3_7450") + +(define_insn_reservation "ppc7450-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,iu1_7450|iu2_7450|iu3_7450,iu1_7450|iu2_7450|iu3_7450") + +(define_insn_reservation "ppc7450-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,iu1_7450|iu2_7450|iu3_7450,\ + iu1_7450|iu2_7450|iu3_7450,iu1_7450|iu2_7450|iu3_7450") + +(define_insn_reservation "ppc7450-imul" 4 + (and (eq_attr "type" "mul") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450*2") + +(define_insn_reservation "ppc7450-imul2" 3 + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450") + +(define_insn_reservation "ppc7450-idiv" 23 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450*23") + +(define_insn_reservation "ppc7450-compare" 2 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,(iu1_7450|iu2_7450|iu3_7450)") + +(define_insn_reservation "ppc7450-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450") + +(define_insn_reservation "ppc7450-fp" 5 + (and (eq_attr "type" "fp,fpsimple,dmul") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450") + +; Divides are not pipelined +(define_insn_reservation "ppc7450-sdiv" 21 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450*21") + +(define_insn_reservation "ppc7450-ddiv" 35 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450*35") + +(define_insn_reservation "ppc7450-mfcr" 2 + (and (eq_attr "type" "mfcr,mtcr") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450") + +(define_insn_reservation "ppc7450-crlogical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450") + +(define_insn_reservation "ppc7450-mtjmpr" 2 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc7450")) + "nothing,mciu_7450*2") + +(define_insn_reservation "ppc7450-mfjmpr" 3 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppc7450")) + "nothing,mciu_7450*2") + +(define_insn_reservation "ppc7450-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppc7450")) + "nothing,bpu_7450") + +;; Altivec +(define_insn_reservation "ppc7450-vecsimple" 1 + (and (eq_attr "type" "vecsimple,veclogical,vecmove") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,vecsmpl_7450") + +(define_insn_reservation "ppc7450-veccomplex" 4 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,veccmplx_7450") + +(define_insn_reservation "ppc7450-veccmp" 2 + (and (eq_attr "type" "veccmp,veccmpfx") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,veccmplx_7450") + +(define_insn_reservation "ppc7450-vecfloat" 4 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,vecflt_7450") + +(define_insn_reservation "ppc7450-vecperm" 2 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,vecperm_7450") + diff --git a/gcc/config/powerpcspe/750cl.h b/gcc/config/powerpcspe/750cl.h new file mode 100644 index 000000000000..b31f05fafd10 --- /dev/null +++ b/gcc/config/powerpcspe/750cl.h @@ -0,0 +1,30 @@ +/* Enable 750cl paired single support. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + Contributed by Revital Eres (eres@il.ibm.com) + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_PAIRED_FLOAT +#define TARGET_PAIRED_FLOAT rs6000_paired_float + +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC "-m750cl" + diff --git a/gcc/config/powerpcspe/7xx.md b/gcc/config/powerpcspe/7xx.md new file mode 100644 index 000000000000..4001bf9405d1 --- /dev/null +++ b/gcc/config/powerpcspe/7xx.md @@ -0,0 +1,186 @@ +;; Scheduling description for Motorola PowerPC 750 and PowerPC 7400 processors. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppc7xx,ppc7xxfp") +(define_cpu_unit "iu1_7xx,iu2_7xx" "ppc7xx") +(define_cpu_unit "fpu_7xx" "ppc7xxfp") +(define_cpu_unit "lsu_7xx,bpu_7xx,sru_7xx" "ppc7xx") +(define_cpu_unit "du1_7xx,du2_7xx" "ppc7xx") +(define_cpu_unit "veccmplx_7xx,vecperm_7xx,vdu_7xx" "ppc7xx") + +;; PPC740/PPC750/PPC7400 32-bit 2xIU, LSU, SRU, FPU, BPU +;; IU1 can perform all integer operations +;; IU2 can perform all integer operations except imul and idiv +;; LSU 2 stage pipelined +;; FPU 3 stage pipelined +;; Max issue 3 insns/clock cycle (includes 1 branch) +;; In-order execution + + +;; The PPC750 user's manual recommends that to reduce branch mispredictions, +;; the insn that sets CR bits should be separated from the branch insn +;; that evaluates them. There is no advantage have more than 10 cycles +;; of separation. +;; This could be artificially achieved by exaggerating the latency of +;; compare insns but at the expense of a poorer schedule. + +;; Branches go straight to the BPU. All other insns are handled +;; by a dispatch unit which can issue a max of 2 insns per cycle. +(define_reservation "ppc750_du" "du1_7xx|du2_7xx") +(define_reservation "ppc7400_vec_du" "vdu_7xx") + +(define_insn_reservation "ppc750-load" 2 + (and (eq_attr "type" "load,fpload,vecload,load_l") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,lsu_7xx") + +(define_insn_reservation "ppc750-store" 2 + (and (eq_attr "type" "store,fpstore,vecstore") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,lsu_7xx") + +(define_insn_reservation "ppc750-storec" 8 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,lsu_7xx") + +(define_insn_reservation "ppc750-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx|iu2_7xx") + +(define_insn_reservation "ppc750-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx") + +(define_insn_reservation "ppc750-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx") + +(define_insn_reservation "ppc750-imul" 4 + (and (eq_attr "type" "mul") + (eq_attr "size" "32") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx*4") + +(define_insn_reservation "ppc750-imul2" 3 + (and (eq_attr "type" "mul") + (eq_attr "size" "16") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx*2") + +(define_insn_reservation "ppc750-imul3" 2 + (and (eq_attr "type" "mul") + (eq_attr "size" "8") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx") + +(define_insn_reservation "ppc750-idiv" 19 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx*19") + +(define_insn_reservation "ppc750-compare" 2 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,(iu1_7xx|iu2_7xx)") + +(define_insn_reservation "ppc750-fpcompare" 2 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx") + +(define_insn_reservation "ppc750-fp" 3 + (and (eq_attr "type" "fp,fpsimple") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx") + +(define_insn_reservation "ppc750-dmul" 4 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc750")) + "ppc750_du,fpu_7xx*2") + +(define_insn_reservation "ppc7400-dmul" 3 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,fpu_7xx") + +; Divides are not pipelined +(define_insn_reservation "ppc750-sdiv" 17 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx*17") + +(define_insn_reservation "ppc750-ddiv" 31 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx*31") + +(define_insn_reservation "ppc750-mfcr" 2 + (and (eq_attr "type" "mfcr,mtcr") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx") + +(define_insn_reservation "ppc750-crlogical" 3 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc750,ppc7400")) + "nothing,sru_7xx*2") + +(define_insn_reservation "ppc750-mtjmpr" 2 + (and (eq_attr "type" "mtjmpr,isync,sync") + (eq_attr "cpu" "ppc750,ppc7400")) + "nothing,sru_7xx*2") + +(define_insn_reservation "ppc750-mfjmpr" 3 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppc750,ppc7400")) + "nothing,sru_7xx*2") + +(define_insn_reservation "ppc750-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppc750,ppc7400")) + "nothing,bpu_7xx") + +;; Altivec +(define_insn_reservation "ppc7400-vecsimple" 1 + (and (eq_attr "type" "vecsimple,veclogical,vecmove,veccmp,veccmpfx") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,veccmplx_7xx") + +(define_insn_reservation "ppc7400-veccomplex" 4 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,veccmplx_7xx") + +(define_insn_reservation "ppc7400-vecfloat" 4 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,veccmplx_7xx") + +(define_insn_reservation "ppc7400-vecperm" 2 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,vecperm_7xx") + diff --git a/gcc/config/powerpcspe/8540.md b/gcc/config/powerpcspe/8540.md new file mode 100644 index 000000000000..fae369de0c34 --- /dev/null +++ b/gcc/config/powerpcspe/8540.md @@ -0,0 +1,248 @@ +;; Pipeline description for Motorola PowerPC 8540 processor. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppc8540_most,ppc8540_long,ppc8540_retire") +(define_cpu_unit "ppc8540_decode_0,ppc8540_decode_1" "ppc8540_most") + +;; We don't simulate general issue queue (GIC). If we have SU insn +;; and then SU1 insn, they cannot be issued on the same cycle +;; (although SU1 insn and then SU insn can be issued) because the SU +;; insn will go to SU1 from GIC0 entry. Fortunately, the first cycle +;; multipass insn scheduling will find the situation and issue the SU1 +;; insn and then the SU insn. +(define_cpu_unit "ppc8540_issue_0,ppc8540_issue_1" "ppc8540_most") + +;; We could describe completion buffers slots in combination with the +;; retirement units and the order of completion but the result +;; automaton would behave in the same way because we cannot describe +;; real latency time with taking in order completion into account. +;; Actually we could define the real latency time by querying reserved +;; automaton units but the current scheduler uses latency time before +;; issuing insns and making any reservations. +;; +;; So our description is aimed to achieve a insn schedule in which the +;; insns would not wait in the completion buffer. +(define_cpu_unit "ppc8540_retire_0,ppc8540_retire_1" "ppc8540_retire") + +;; Branch unit: +(define_cpu_unit "ppc8540_bu" "ppc8540_most") + +;; SU: +(define_cpu_unit "ppc8540_su0_stage0,ppc8540_su1_stage0" "ppc8540_most") + +;; We could describe here MU subunits for float multiply, float add +;; etc. But the result automaton would behave the same way as the +;; described one pipeline below because MU can start only one insn +;; per cycle. Actually we could simplify the automaton more not +;; describing stages 1-3, the result automata would be the same. +(define_cpu_unit "ppc8540_mu_stage0,ppc8540_mu_stage1" "ppc8540_most") +(define_cpu_unit "ppc8540_mu_stage2,ppc8540_mu_stage3" "ppc8540_most") + +;; The following unit is used to describe non-pipelined division. +(define_cpu_unit "ppc8540_mu_div" "ppc8540_long") + +;; Here we simplified LSU unit description not describing the stages. +(define_cpu_unit "ppc8540_lsu" "ppc8540_most") + +;; The following units are used to make automata deterministic +(define_cpu_unit "present_ppc8540_decode_0" "ppc8540_most") +(define_cpu_unit "present_ppc8540_issue_0" "ppc8540_most") +(define_cpu_unit "present_ppc8540_retire_0" "ppc8540_retire") +(define_cpu_unit "present_ppc8540_su0_stage0" "ppc8540_most") + +;; The following sets to make automata deterministic when option ndfa is used. +(presence_set "present_ppc8540_decode_0" "ppc8540_decode_0") +(presence_set "present_ppc8540_issue_0" "ppc8540_issue_0") +(presence_set "present_ppc8540_retire_0" "ppc8540_retire_0") +(presence_set "present_ppc8540_su0_stage0" "ppc8540_su0_stage0") + +;; Some useful abbreviations. +(define_reservation "ppc8540_decode" + "ppc8540_decode_0|ppc8540_decode_1+present_ppc8540_decode_0") +(define_reservation "ppc8540_issue" + "ppc8540_issue_0|ppc8540_issue_1+present_ppc8540_issue_0") +(define_reservation "ppc8540_retire" + "ppc8540_retire_0|ppc8540_retire_1+present_ppc8540_retire_0") +(define_reservation "ppc8540_su_stage0" + "ppc8540_su0_stage0|ppc8540_su1_stage0+present_ppc8540_su0_stage0") + +;; Simple SU insns +(define_insn_reservation "ppc8540_su" 1 + (and (eq_attr "type" "integer,add,logical,insert,cmp,\ + shift,trap,cntlz,exts,isel") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") + +(define_insn_reservation "ppc8540_two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\ + ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") + +(define_insn_reservation "ppc8540_three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\ + ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\ + ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") + +;; Branch. Actually this latency time is not used by the scheduler. +(define_insn_reservation "ppc8540_branch" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_bu,ppc8540_retire") + +;; Multiply +(define_insn_reservation "ppc8540_multiply" 4 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\ + ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire") + +;; Divide. We use the average latency time here. We omit reserving a +;; retire unit because of the result automata will be huge. We ignore +;; reservation of miu_stage3 here because we use the average latency +;; time. +(define_insn_reservation "ppc8540_divide" 14 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\ + ppc8540_mu_div*13") + +;; CR logical +(define_insn_reservation "ppc8540_cr_logical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_bu,ppc8540_retire") + +;; Mfcr +(define_insn_reservation "ppc8540_mfcr" 1 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire") + +;; Mtcrf +(define_insn_reservation "ppc8540_mtcrf" 1 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire") + +;; Mtjmpr +(define_insn_reservation "ppc8540_mtjmpr" 1 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") + +;; Loads +(define_insn_reservation "ppc8540_load" 3 + (and (eq_attr "type" "load,load_l,sync") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire") + +;; Stores. +(define_insn_reservation "ppc8540_store" 3 + (and (eq_attr "type" "store,store_c") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire") + +;; Simple FP +(define_insn_reservation "ppc8540_simple_float" 1 + (and (eq_attr "type" "fpsimple") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") + +;; FP +(define_insn_reservation "ppc8540_float" 4 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\ + ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire") + +;; float divides. We omit reserving a retire unit and miu_stage3 +;; because of the result automata will be huge. +(define_insn_reservation "ppc8540_float_vector_divide" 29 + (and (eq_attr "type" "vecfdiv") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\ + ppc8540_mu_div*28") + +;; Brinc +(define_insn_reservation "ppc8540_brinc" 1 + (and (eq_attr "type" "brinc") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") + +;; Simple vector +(define_insn_reservation "ppc8540_simple_vector" 1 + (and (eq_attr "type" "vecsimple,veclogical,vecmove") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire") + +;; Simple vector compare +(define_insn_reservation "ppc8540_simple_vector_compare" 1 + (and (eq_attr "type" "veccmpsimple") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire") + +;; Vector compare +(define_insn_reservation "ppc8540_vector_compare" 1 + (and (eq_attr "type" "veccmp,veccmpfx") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire") + +;; evsplatfi evsplati +(define_insn_reservation "ppc8540_vector_perm" 1 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire") + +;; Vector float +(define_insn_reservation "ppc8540_float_vector" 4 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\ + ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire") + +;; Vector divides: Use the average. We omit reserving a retire unit +;; because of the result automata will be huge. We ignore reservation +;; of miu_stage3 here because we use the average latency time. +(define_insn_reservation "ppc8540_vector_divide" 14 + (and (eq_attr "type" "vecdiv") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\ + ppc8540_mu_div*13") + +;; Complex vector. +(define_insn_reservation "ppc8540_complex_vector" 4 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\ + ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire") + +;; Vector load +(define_insn_reservation "ppc8540_vector_load" 3 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire") + +;; Vector store +(define_insn_reservation "ppc8540_vector_store" 3 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "ppc8540,ppc8548")) + "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire") diff --git a/gcc/config/powerpcspe/a2.md b/gcc/config/powerpcspe/a2.md new file mode 100644 index 000000000000..d8dd3268f71d --- /dev/null +++ b/gcc/config/powerpcspe/a2.md @@ -0,0 +1,138 @@ +;; Scheduling description for PowerPC A2 processors. +;; Copyright (C) 2009-2017 Free Software Foundation, Inc. +;; Contributed by Ben Elliston (bje@au.ibm.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppca2") + +;; CPU units + +;; The multiplier pipeline. +(define_cpu_unit "mult" "ppca2") + +;; The auxiliary processor unit (FP/vector unit). +(define_cpu_unit "axu" "ppca2") + +;; D.4.6 +;; Some peculiarities for certain SPRs + +(define_insn_reservation "ppca2-mfcr" 1 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppca2")) + "nothing") + +(define_insn_reservation "ppca2-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "ppca2")) + "nothing") + +(define_insn_reservation "ppca2-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; D.4.8 +(define_insn_reservation "ppca2-imul" 1 + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16,32") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; FIXME: latency and multiplier reservation for 64-bit multiply? +(define_insn_reservation "ppca2-lmul" 6 + (and (eq_attr "type" "mul") + (eq_attr "size" "64") + (eq_attr "cpu" "ppca2")) + "mult*3") + +;; D.4.9 +(define_insn_reservation "ppca2-idiv" 32 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "ppca2")) + "mult*32") + +(define_insn_reservation "ppca2-ldiv" 65 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "ppca2")) + "mult*65") + +;; D.4.13 +(define_insn_reservation "ppca2-load" 5 + (and (eq_attr "type" "load") + (eq_attr "cpu" "ppca2")) + "nothing") + +;; D.8.1 +(define_insn_reservation "ppca2-fp" 6 + (and (eq_attr "type" "fp,fpsimple") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.4 +(define_insn_reservation "ppca2-fp-load" 6 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.5 +(define_insn_reservation "ppca2-fp-store" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.6 +(define_insn_reservation "ppca2-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.7 +;; +;; Instructions from the same thread succeeding the floating-point +;; divide cannot be executed until the floating-point divide has +;; completed. Since there is nothing else we can do, this thread will +;; just have to stall. + +(define_insn_reservation "ppca2-ddiv" 72 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppca2")) + "axu") + +(define_insn_reservation "ppca2-sdiv" 59 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppca2")) + "axu") + +;; D.8.8 +;; +;; Instructions from the same thread succeeding the floating-point +;; divide cannot be executed until the floating-point divide has +;; completed. Since there is nothing else we can do, this thread will +;; just have to stall. + +(define_insn_reservation "ppca2-dsqrt" 69 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppca2")) + "axu") + +(define_insn_reservation "ppca2-ssqrt" 65 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppca2")) + "axu") diff --git a/gcc/config/powerpcspe/aix-stdint.h b/gcc/config/powerpcspe/aix-stdint.h new file mode 100644 index 000000000000..77cc699daaf8 --- /dev/null +++ b/gcc/config/powerpcspe/aix-stdint.h @@ -0,0 +1,51 @@ +/* Definitions for <stdint.h> types on systems using AIX. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define SIG_ATOMIC_TYPE "int" + +#define INT8_TYPE "signed char" +#define INT16_TYPE "short int" +#define INT32_TYPE "int" +#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int") +#define UINT8_TYPE "unsigned char" +#define UINT16_TYPE "short unsigned int" +#define UINT32_TYPE "unsigned int" +#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int") + +#define INT_LEAST8_TYPE "signed char" +#define INT_LEAST16_TYPE "short int" +#define INT_LEAST32_TYPE "int" +#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int") +#define UINT_LEAST8_TYPE "unsigned char" +#define UINT_LEAST16_TYPE "short unsigned int" +#define UINT_LEAST32_TYPE "unsigned int" +#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int") + +#define INT_FAST8_TYPE "signed char" +#define INT_FAST16_TYPE "short int" +#define INT_FAST32_TYPE "int" +#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int") +#define UINT_FAST8_TYPE "unsigned char" +#define UINT_FAST16_TYPE "short unsigned int" +#define UINT_FAST32_TYPE "unsigned int" +#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int") + +#define INTPTR_TYPE "long int" +#define UINTPTR_TYPE "long unsigned int" + diff --git a/gcc/config/powerpcspe/aix.h b/gcc/config/powerpcspe/aix.h new file mode 100644 index 000000000000..b90e2b065195 --- /dev/null +++ b/gcc/config/powerpcspe/aix.h @@ -0,0 +1,278 @@ +/* Definitions of target machine for GNU compiler, + for IBM RS/6000 POWER running AIX. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Yes! We are AIX! */ +#define DEFAULT_ABI ABI_AIX +#undef TARGET_AIX +#define TARGET_AIX 1 + +/* Linux64.h wants to redefine TARGET_AIX based on -m64, but it can't be used + in the #if conditional in options-default.h, so provide another macro. */ +#undef TARGET_AIX_OS +#define TARGET_AIX_OS 1 + +/* AIX always has a TOC. */ +#define TARGET_NO_TOC 0 +#define TARGET_TOC 1 +#define FIXED_R2 1 + +/* AIX allows r13 to be used in 32-bit mode. */ +#define FIXED_R13 0 + +/* 32-bit and 64-bit AIX stack boundary is 128. */ +#undef STACK_BOUNDARY +#define STACK_BOUNDARY 128 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. + + On the RS/6000, the frame pointer is the same as the stack pointer, + except for dynamic allocations. So we start after the fixed area and + outgoing parameter area. + + If the function uses dynamic stack space (CALLS_ALLOCA is set), that + space needs to be aligned to STACK_BOUNDARY, i.e. the sum of the + sizes of the fixed area and the parameter area must be a multiple of + STACK_BOUNDARY. */ + +#undef STARTING_FRAME_OFFSET +#define STARTING_FRAME_OFFSET \ + (FRAME_GROWS_DOWNWARD \ + ? 0 \ + : (cfun->calls_alloca \ + ? RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA, 16) \ + : (RS6000_ALIGN (crtl->outgoing_args_size, 16) + RS6000_SAVE_AREA))) + +/* Offset from the stack pointer register to an item dynamically + allocated on the stack, e.g., by `alloca'. + + The default value for this macro is `STACK_POINTER_OFFSET' plus the + length of the outgoing arguments. The default is correct for most + machines. See `function.c' for details. + + This value must be a multiple of STACK_BOUNDARY (hard coded in + `emit-rtl.c'). */ +#undef STACK_DYNAMIC_OFFSET +#define STACK_DYNAMIC_OFFSET(FUNDECL) \ + RS6000_ALIGN (crtl->outgoing_args_size + STACK_POINTER_OFFSET, 16) + +#undef TARGET_IEEEQUAD +#define TARGET_IEEEQUAD 0 + +/* The AIX linker will discard static constructors in object files before + collect has a chance to see them, so scan the object files directly. */ +#define COLLECT_EXPORT_LIST + +/* On AIX, initialisers specified with -binitfini are called in breadth-first + order. + e.g. if a.out depends on lib1.so, the init function for a.out is called before + the init function for lib1.so. + + To ensure global C++ constructors in linked libraries are run before global + C++ constructors from the current module, there is additional symbol scanning + logic in collect2. + + The global initialiser/finaliser functions are named __GLOBAL_AIXI_{libname} + and __GLOBAL_AIXD_{libname} and are exported from each shared library. + + collect2 will detect these symbols when they exist in shared libraries that + the current program is being linked against. All such initiliser functions + will be called prior to the constructors of the current program, and + finaliser functions called after destructors. + + Reference counting generated by collect2 will ensure that constructors are + only invoked once in the case of multiple dependencies on a library. + + -binitfini is still used in parallel to this solution. + This handles the case where a library is loaded through dlopen(), and also + handles the option -blazy. +*/ +#define COLLECT_SHARED_INIT_FUNC(STREAM, FUNC) \ + fprintf ((STREAM), "void %s() {\n\t%s();\n}\n", aix_shared_initname, (FUNC)) +#define COLLECT_SHARED_FINI_FUNC(STREAM, FUNC) \ + fprintf ((STREAM), "void %s() {\n\t%s();\n}\n", aix_shared_fininame, (FUNC)) + +#if HAVE_AS_REF +/* Issue assembly directives that create a reference to the given DWARF table + identifier label from the current function section. This is defined to + ensure we drag frame tables associated with needed function bodies in + a link with garbage collection activated. */ +#define ASM_OUTPUT_DWARF_TABLE_REF rs6000_aix_asm_output_dwarf_table_ref +#endif + +/* This is the only version of nm that collect2 can work with. */ +#define REAL_NM_FILE_NAME "/usr/ucb/nm" + +#define USER_LABEL_PREFIX "" + +/* Don't turn -B into -L if the argument specifies a relative file name. */ +#define RELATIVE_PREFIX_NOT_LINKDIR + +/* Because of the above, we must have gcc search itself to find libgcc.a. */ +#define LINK_LIBGCC_SPECIAL_1 + +/* Names to predefine in the preprocessor for this target machine. */ +#define TARGET_OS_AIX_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_IBMR2"); \ + builtin_define ("_POWER"); \ + builtin_define ("__unix__"); \ + builtin_define ("_AIX"); \ + builtin_define ("_AIX32"); \ + builtin_define ("_AIX41"); \ + builtin_define ("_LONG_LONG"); \ + if (TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONGDOUBLE128"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=aix"); \ + if (TARGET_64BIT) \ + { \ + builtin_define ("__PPC__"); \ + builtin_define ("__PPC64__"); \ + builtin_define ("__powerpc__"); \ + builtin_define ("__powerpc64__"); \ + builtin_assert ("cpu=powerpc64"); \ + builtin_assert ("machine=powerpc64"); \ + } \ + else \ + { \ + builtin_define ("__PPC__"); \ + builtin_define ("__powerpc__"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + } \ + } \ + while (0) + +/* Define appropriate architecture macros for preprocessor depending on + target switches. */ + +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}\ + %{ansi: -D_ANSI_C_SOURCE}" + +#define CC1_SPEC "%(cc1_cpu)" + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "" + +/* Tell the assembler to assume that all undefined names are external. + + Don't do this until the fixed IBM assembler is more generally available. + When this becomes permanently defined, the ASM_OUTPUT_EXTERNAL, + ASM_OUTPUT_EXTERNAL_LIBCALL, and RS6000_OUTPUT_BASENAME macros will no + longer be needed. Also, the extern declaration of mcount in + rs6000_xcoff_file_start will no longer be needed. */ + +/* #define ASM_SPEC "-u %(asm_cpu)" */ + +/* Default location of syscalls.exp under AIX */ +#define LINK_SYSCALLS_SPEC "-bI:%R/lib/syscalls.exp" + +/* Default location of libg.exp under AIX */ +#define LINK_LIBG_SPEC "-bexport:%R/usr/lib/libg.exp" + +/* Define the options for the binder: Start text at 512, align all segments + to 512 bytes, and warn if there is text relocation. + + The -bhalt:4 option supposedly changes the level at which ld will abort, + but it also suppresses warnings about multiply defined symbols and is + used by the AIX cc command. So we use it here. + + -bnodelcsect undoes a poor choice of default relating to multiply-defined + csects. See AIX documentation for more information about this. + + -bM:SRE tells the linker that the output file is Shared REusable. Note + that to actually build a shared library you will also need to specify an + export list with the -Wl,-bE option. */ + +#define LINK_SPEC "-T512 -H512 %{!r:-btextro} -bhalt:4 -bnodelcsect\ +%{static:-bnso %(link_syscalls) } \ +%{!shared:%{g*: %(link_libg) }} %{shared:-bM:SRE}" + +/* Profiled library versions are used by linking with special directories. */ +#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\ +%{p:-L%R/lib/profiled -L%R/usr/lib/profiled} %{!shared:%{g*:-lg}} -lc" + +/* Static linking with shared libstdc++ requires libsupc++ as well. */ +#define LIBSTDCXX_STATIC "supc++" + +/* This now supports a natural alignment mode. */ +/* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */ +#define ADJUST_FIELD_ALIGN(FIELD, TYPE, COMPUTED) \ + ((TARGET_ALIGN_NATURAL == 0 \ + && TYPE_MODE (strip_array_types (TYPE)) == DFmode) \ + ? MIN ((COMPUTED), 32) \ + : (COMPUTED)) + +/* AIX increases natural record alignment to doubleword if the first + field is an FP double while the FP fields remain word aligned. */ +#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) \ + ((TREE_CODE (STRUCT) == RECORD_TYPE \ + || TREE_CODE (STRUCT) == UNION_TYPE \ + || TREE_CODE (STRUCT) == QUAL_UNION_TYPE) \ + && TARGET_ALIGN_NATURAL == 0 \ + ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED) \ + : MAX ((COMPUTED), (SPECIFIED))) + +/* The AIX ABI isn't explicit on whether aggregates smaller than a + word/doubleword should be padded upward or downward. One could + reasonably assume that they follow the normal rules for structure + layout treating the parameter area as any other block of memory, + then map the reg param area to registers, i.e., pad upward, which + is the way IBM Compilers for AIX behave. + Setting both of the following defines results in this behavior. */ +#define AGGREGATE_PADDING_FIXED 1 +#define AGGREGATES_PAD_UPWARD_ALWAYS 1 + +/* Specify padding for the last element of a block move between + registers and memory. FIRST is nonzero if this is the only + element. */ +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE)) + +/* Indicate that jump tables go in the text section. */ + +#define JUMP_TABLES_IN_TEXT_SECTION 1 + +/* Define any extra SPECS that the compiler needs to generate. */ +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "link_syscalls", LINK_SYSCALLS_SPEC }, \ + { "link_libg", LINK_LIBG_SPEC } + +#define PROFILE_HOOK(LABEL) output_profile_hook (LABEL) + +/* No version of AIX fully supports AltiVec or 64-bit instructions in + 32-bit mode. */ +#define OS_MISSING_POWERPC64 1 +#define OS_MISSING_ALTIVEC 1 + +/* WINT_TYPE */ +#define WINT_TYPE "int" + +/* Static stack checking is supported by means of probes. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +/* Use standard DWARF numbering for DWARF debugging information. */ +#define RS6000_USE_DWARF_NUMBERING + diff --git a/gcc/config/powerpcspe/aix43.h b/gcc/config/powerpcspe/aix43.h new file mode 100644 index 000000000000..d61956d3b287 --- /dev/null +++ b/gcc/config/powerpcspe/aix43.h @@ -0,0 +1,167 @@ +/* Definitions of target machine for GNU compiler, + for IBM RS/6000 POWER running AIX version 4.3. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + Contributed by David Edelsohn (edelsohn@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to + get control in TARGET_OPTION_OVERRIDE. */ + +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (TARGET_64BIT && ! TARGET_POWERPC64) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \ + } \ + if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128) \ + { \ + rs6000_long_double_type_size = 64; \ + if (global_options_set.x_rs6000_long_double_type_size) \ + warning (0, "soft-float and long-double-128 are incompatible"); \ + } \ + if (TARGET_POWERPC64 && ! TARGET_64BIT) \ + { \ + error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ + } \ +} while (0); + +#undef ASM_SPEC +#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" + +/* Common ASM definitions used by ASM_SPEC amongst the various targets + for handling -mcpu=xxx switches. */ +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC \ +"%{!mcpu*: %{!maix64: \ + %{!mpowerpc64: %(asm_default)} \ + %{mpowerpc64: -mppc64}}} \ +%{mcpu=power3: -m620} \ +%{mcpu=power4: -m620} \ +%{mcpu=powerpc: -mppc} \ +%{mcpu=rs64a: -mppc} \ +%{mcpu=601: -m601} \ +%{mcpu=602: -mppc} \ +%{mcpu=603: -m603} \ +%{mcpu=603e: -m603} \ +%{mcpu=604: -m604} \ +%{mcpu=604e: -m604} \ +%{mcpu=620: -m620} \ +%{mcpu=630: -m620}" + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc" + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_AIX43"); \ + TARGET_OS_AIX_CPP_BUILTINS (); \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}\ + %{ansi: -D_ANSI_C_SOURCE}\ + %{maix64: -D__64BIT__}\ + %{mpe: -I%R/usr/lpp/ppe.poe/include}\ + %{pthread: -D_THREAD_SAFE}" + +/* The GNU C++ standard library requires that these macros be + defined. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC \ + "-D_ALL_SOURCE \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT 0 + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_PPC604e + +/* AIX does not support Altivec. */ +#undef TARGET_ALTIVEC +#define TARGET_ALTIVEC 0 +#undef TARGET_ALTIVEC_ABI +#define TARGET_ALTIVEC_ABI 0 +#undef TARGET_EXTRA_BUILTINS +#define TARGET_EXTRA_BUILTINS 0 + + +/* Define this macro as a C expression for the initializer of an + array of string to tell the driver program which options are + defaults for this target and thus do not need to be handled + specially when using `MULTILIB_OPTIONS'. + + Do not define this macro if `MULTILIB_OPTIONS' is not defined in + the target makefile fragment or if none of the options listed in + `MULTILIB_OPTIONS' are set by default. *Note Target Fragment::. */ + +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS { "mcpu=common" } + +#undef LIB_SPEC +#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{!maix64:%{!shared:%{g*:-lg}}}\ + %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\ + %{pthread:-L%R/usr/lib/threads -lpthreads -lc_r %R/usr/lib/libc.a}\ + %{!pthread:-lc}" + +#undef LINK_SPEC +#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\ + %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\ + %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\ + %{mpe:-binitfini:poe_remote_main}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared:\ + %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\ + %{!maix64:\ + %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\ + %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}" + +/* AIX 4.3 typedefs ptrdiff_t as "long" while earlier releases used "int". */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* AIX 4.2 and above provides initialization and finalization function + support from linker command line. */ +#undef HAS_INIT_SECTION +#define HAS_INIT_SECTION + +#undef LD_INIT_SWITCH +#define LD_INIT_SWITCH "-binitfini" + +/* The IBM AIX 4.x assembler doesn't support forward references in + .set directives. We handle this by deferring the output of .set + directives to the end of the compilation unit. */ +#define TARGET_DEFERRED_OUTPUT_DEFS(DECL,TARGET) true + +/* This target uses the aix64.opt file. */ +#define TARGET_USES_AIX64_OPT 1 + +#define TARGET_AIX_VERSION 43 + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/powerpcspe/aix51.h b/gcc/config/powerpcspe/aix51.h new file mode 100644 index 000000000000..af7e38e1775e --- /dev/null +++ b/gcc/config/powerpcspe/aix51.h @@ -0,0 +1,169 @@ +/* Definitions of target machine for GNU compiler, + for IBM RS/6000 POWER running AIX V5. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + Contributed by David Edelsohn (edelsohn@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to + get control in TARGET_OPTION_OVERRIDE. */ + +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (TARGET_64BIT && ! TARGET_POWERPC64) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \ + } \ + if (TARGET_POWERPC64 && ! TARGET_64BIT) \ + { \ + error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ + } \ +} while (0); + +#undef ASM_SPEC +#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" + +/* Common ASM definitions used by ASM_SPEC amongst the various targets + for handling -mcpu=xxx switches. */ +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC \ +"%{!mcpu*: %{!maix64: \ + %{!mpowerpc64: %(asm_default)} \ + %{mpowerpc64: -mppc64}}} \ +%{mcpu=power3: -m620} \ +%{mcpu=power4: -m620} \ +%{mcpu=powerpc: -mppc} \ +%{mcpu=rs64a: -mppc} \ +%{mcpu=601: -m601} \ +%{mcpu=602: -mppc} \ +%{mcpu=603: -m603} \ +%{mcpu=603e: -m603} \ +%{mcpu=604: -m604} \ +%{mcpu=604e: -m604} \ +%{mcpu=620: -m620} \ +%{mcpu=630: -m620} \ +%{mcpu=970: -m620} \ +%{mcpu=G5: -m620}" + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc" + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_AIX43"); \ + builtin_define ("_AIX51"); \ + TARGET_OS_AIX_CPP_BUILTINS (); \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE} \ + %{ansi: -D_ANSI_C_SOURCE} \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +/* The GNU C++ standard library requires that these macros be + defined. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC \ + "-D_ALL_SOURCE \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT 0 + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_PPC604e + +/* AIX does not support Altivec. */ +#undef TARGET_ALTIVEC +#define TARGET_ALTIVEC 0 +#undef TARGET_ALTIVEC_ABI +#define TARGET_ALTIVEC_ABI 0 +#undef TARGET_EXTRA_BUILTINS +#define TARGET_EXTRA_BUILTINS 0 + + +/* Define this macro as a C expression for the initializer of an + array of string to tell the driver program which options are + defaults for this target and thus do not need to be handled + specially when using `MULTILIB_OPTIONS'. + + Do not define this macro if `MULTILIB_OPTIONS' is not defined in + the target makefile fragment or if none of the options listed in + `MULTILIB_OPTIONS' are set by default. *Note Target Fragment::. */ + +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS { "mcpu=common" } + +#undef LIB_SPEC +#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{!maix64:%{!shared:%{g*:-lg}}}\ + %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\ + %{pthread:-lpthreads} -lc" + +#undef LINK_SPEC +#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro}\ + %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\ + %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\ + %{mpe:-binitfini:poe_remote_main}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared:\ + %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\ + %{!maix64:\ + %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\ + %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}" + +/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int". */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* Type used for wchar_t, as a string used in a declaration. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int") + +/* Width of wchar_t in bits. */ +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32) + +/* AIX 4.2 and above provides initialization and finalization function + support from linker command line. */ +#undef HAS_INIT_SECTION +#define HAS_INIT_SECTION + +#undef LD_INIT_SWITCH +#define LD_INIT_SWITCH "-binitfini" + +/* This target uses the aix64.opt file. */ +#define TARGET_USES_AIX64_OPT 1 + +/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION, + but does not have crtbegin/end. */ + +#define TARGET_AIX_VERSION 51 + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc/config/powerpcspe/aix52.h b/gcc/config/powerpcspe/aix52.h new file mode 100644 index 000000000000..35d2286e5b3d --- /dev/null +++ b/gcc/config/powerpcspe/aix52.h @@ -0,0 +1,179 @@ +/* Definitions of target machine for GNU compiler, + for IBM RS/6000 POWER running AIX V5.2. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by David Edelsohn (edelsohn@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to + get control in TARGET_OPTION_OVERRIDE. */ + +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (TARGET_64BIT && ! TARGET_POWERPC64) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \ + } \ + if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128) \ + { \ + rs6000_long_double_type_size = 64; \ + if (global_options_set.x_rs6000_long_double_type_size) \ + warning (0, "soft-float and long-double-128 are incompatible"); \ + } \ + if (TARGET_POWERPC64 && ! TARGET_64BIT) \ + { \ + error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ + } \ +} while (0); + +#undef ASM_SPEC +#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" + +/* Common ASM definitions used by ASM_SPEC amongst the various targets + for handling -mcpu=xxx switches. */ +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC \ +"%{!mcpu*: %{!maix64: \ + %{mpowerpc64: -mppc64} \ + %{!mpowerpc64: %(asm_default)}}} \ +%{mcpu=power3: -m620} \ +%{mcpu=power4: -m620} \ +%{mcpu=power5: -m620} \ +%{mcpu=power5+: -m620} \ +%{mcpu=power6: -m620} \ +%{mcpu=power6x: -m620} \ +%{mcpu=powerpc: -mppc} \ +%{mcpu=rs64a: -mppc} \ +%{mcpu=603: -m603} \ +%{mcpu=603e: -m603} \ +%{mcpu=604: -m604} \ +%{mcpu=604e: -m604} \ +%{mcpu=620: -m620} \ +%{mcpu=630: -m620} \ +%{mcpu=970: -m620} \ +%{mcpu=G5: -m620}" + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc" + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_AIX43"); \ + builtin_define ("_AIX51"); \ + builtin_define ("_AIX52"); \ + TARGET_OS_AIX_CPP_BUILTINS (); \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE} \ + %{ansi: -D_ANSI_C_SOURCE} \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +/* The GNU C++ standard library requires that these macros be + defined. Synchronize with libstdc++ os_defines.h. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC \ + "-D_ALL_SOURCE \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT 0 + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_POWER4 +#undef PROCESSOR_DEFAULT64 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER4 + +/* AIX does not support Altivec. */ +#undef TARGET_ALTIVEC +#define TARGET_ALTIVEC 0 +#undef TARGET_ALTIVEC_ABI +#define TARGET_ALTIVEC_ABI 0 +#undef TARGET_EXTRA_BUILTINS +#define TARGET_EXTRA_BUILTINS 0 + +/* Define this macro as a C expression for the initializer of an + array of string to tell the driver program which options are + defaults for this target and thus do not need to be handled + specially when using `MULTILIB_OPTIONS'. + + Do not define this macro if `MULTILIB_OPTIONS' is not defined in + the target makefile fragment or if none of the options listed in + `MULTILIB_OPTIONS' are set by default. *Note Target Fragment::. */ + +#undef MULTILIB_DEFAULTS + +#undef LIB_SPEC +#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{!maix64:%{!shared:%{g*:-lg}}}\ + %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\ + %{pthread:-lpthreads} -lc" + +#undef LINK_SPEC +#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro}\ + %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\ + %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\ + %{mpe:-binitfini:poe_remote_main}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared:\ + %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\ + %{!maix64:\ + %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\ + %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}" + +/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int". */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* Type used for wchar_t, as a string used in a declaration. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int") + +/* Width of wchar_t in bits. */ +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32) + +/* AIX 4.2 and above provides initialization and finalization function + support from linker command line. */ +#undef HAS_INIT_SECTION +#define HAS_INIT_SECTION + +#undef LD_INIT_SWITCH +#define LD_INIT_SWITCH "-binitfini" + +#ifndef _AIX52 +extern long long int atoll(const char *); +#endif + +/* This target uses the aix64.opt file. */ +#define TARGET_USES_AIX64_OPT 1 + +/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION, + but does not have crtbegin/end. */ + +#define TARGET_AIX_VERSION 52 diff --git a/gcc/config/powerpcspe/aix53.h b/gcc/config/powerpcspe/aix53.h new file mode 100644 index 000000000000..a1fbd834d479 --- /dev/null +++ b/gcc/config/powerpcspe/aix53.h @@ -0,0 +1,180 @@ +/* Definitions of target machine for GNU compiler, + for IBM RS/6000 POWER running AIX V5.3. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by David Edelsohn (edelsohn@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to + get control in TARGET_OPTION_OVERRIDE. */ + +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (TARGET_64BIT && ! TARGET_POWERPC64) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \ + } \ + if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128) \ + { \ + rs6000_long_double_type_size = 64; \ + if (global_options_set.x_rs6000_long_double_type_size) \ + warning (0, "soft-float and long-double-128 are incompatible"); \ + } \ + if (TARGET_POWERPC64 && ! TARGET_64BIT) \ + { \ + error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ + } \ +} while (0); + +#undef ASM_SPEC +#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" + +/* Common ASM definitions used by ASM_SPEC amongst the various targets for + handling -mcpu=xxx switches. There is a parallel list in driver-rs6000.c to + provide the default assembler options if the user uses -mcpu=native, so if + you make changes here, make them there also. */ +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC \ +"%{!mcpu*: %{!maix64: \ + %{mpowerpc64: -mppc64} \ + %{maltivec: -m970} \ + %{!maltivec: %{!mpowerpc64: %(asm_default)}}}} \ +%{mcpu=native: %(asm_cpu_native)} \ +%{mcpu=power3: -m620} \ +%{mcpu=power4: -mpwr4} \ +%{mcpu=power5: -mpwr5} \ +%{mcpu=power5+: -mpwr5x} \ +%{mcpu=power6: -mpwr6} \ +%{mcpu=power6x: -mpwr6} \ +%{mcpu=power7: -mpwr7} \ +%{mcpu=power8: -mpwr8} \ +%{mcpu=power9: -mpwr9} \ +%{mcpu=powerpc: -mppc} \ +%{mcpu=rs64a: -mppc} \ +%{mcpu=603: -m603} \ +%{mcpu=603e: -m603} \ +%{mcpu=604: -m604} \ +%{mcpu=604e: -m604} \ +%{mcpu=620: -m620} \ +%{mcpu=630: -m620} \ +%{mcpu=970: -m970} \ +%{mcpu=G5: -m970}" + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc" + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_AIX43"); \ + builtin_define ("_AIX51"); \ + builtin_define ("_AIX52"); \ + builtin_define ("_AIX53"); \ + TARGET_OS_AIX_CPP_BUILTINS (); \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE} \ + %{ansi: -D_ANSI_C_SOURCE} \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +/* The GNU C++ standard library requires that these macros be + defined. Synchronize with libstdc++ os_defines.h. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC \ + "-D_ALL_SOURCE \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT 0 + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_POWER5 +#undef PROCESSOR_DEFAULT64 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER5 + +/* Define this macro as a C expression for the initializer of an + array of string to tell the driver program which options are + defaults for this target and thus do not need to be handled + specially when using `MULTILIB_OPTIONS'. + + Do not define this macro if `MULTILIB_OPTIONS' is not defined in + the target makefile fragment or if none of the options listed in + `MULTILIB_OPTIONS' are set by default. *Note Target Fragment::. */ + +#undef MULTILIB_DEFAULTS + +#undef LIB_SPEC +#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{!maix64:%{!shared:%{g*:-lg}}}\ + %{fprofile-arcs|fprofile-generate*|coverage:-lpthreads}\ + %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\ + %{pthread:-lpthreads} -lc" + +#undef LINK_SPEC +#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro}\ + %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\ + %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\ + %{mpe:-binitfini:poe_remote_main}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared:\ + %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\ + %{!maix64:\ + %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\ + %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}" + +/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int". */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* Type used for wchar_t, as a string used in a declaration. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int") + +/* Width of wchar_t in bits. */ +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32) + +/* AIX 4.2 and above provides initialization and finalization function + support from linker command line. */ +#undef HAS_INIT_SECTION +#define HAS_INIT_SECTION + +#undef LD_INIT_SWITCH +#define LD_INIT_SWITCH "-binitfini" + +#ifndef _AIX52 +extern long long int atoll(const char *); +#endif + +/* This target uses the aix64.opt file. */ +#define TARGET_USES_AIX64_OPT 1 + +/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION, + but does not have crtbegin/end. */ + +#define TARGET_AIX_VERSION 53 diff --git a/gcc/config/powerpcspe/aix61.h b/gcc/config/powerpcspe/aix61.h new file mode 100644 index 000000000000..0b6150570876 --- /dev/null +++ b/gcc/config/powerpcspe/aix61.h @@ -0,0 +1,213 @@ +/* Definitions of target machine for GNU compiler, + for IBM RS/6000 POWER running AIX V6.1. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by David Edelsohn (edelsohn@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to + get control in TARGET_OPTION_OVERRIDE. */ + +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (TARGET_64BIT && ! TARGET_POWERPC64) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \ + } \ + if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128) \ + { \ + rs6000_long_double_type_size = 64; \ + if (global_options_set.x_rs6000_long_double_type_size) \ + warning (0, "soft-float and long-double-128 are incompatible"); \ + } \ + if (TARGET_POWERPC64 && ! TARGET_64BIT) \ + { \ + error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ + } \ + if ((rs6000_isa_flags_explicit \ + & OPTION_MASK_MINIMAL_TOC) != 0) \ + { \ + if (global_options_set.x_rs6000_current_cmodel \ + && rs6000_current_cmodel != CMODEL_SMALL) \ + error ("-mcmodel incompatible with other toc options"); \ + SET_CMODEL (CMODEL_SMALL); \ + } \ + if (rs6000_current_cmodel != CMODEL_SMALL) \ + { \ + TARGET_NO_FP_IN_TOC = 0; \ + TARGET_NO_SUM_IN_TOC = 0; \ + } \ + if (rs6000_current_cmodel == CMODEL_MEDIUM) \ + { \ + rs6000_current_cmodel = CMODEL_LARGE; \ + } \ +} while (0); + +#undef ASM_SPEC +#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" + +/* Common ASM definitions used by ASM_SPEC amongst the various targets for + handling -mcpu=xxx switches. There is a parallel list in driver-rs6000.c to + provide the default assembler options if the user uses -mcpu=native, so if + you make changes here, make them there also. */ +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC \ +"%{!mcpu*: %{!maix64: \ + %{mpowerpc64: -mppc64} \ + %{maltivec: -m970} \ + %{!maltivec: %{!mpowerpc64: %(asm_default)}}}} \ +%{mcpu=native: %(asm_cpu_native)} \ +%{mcpu=power3: -m620} \ +%{mcpu=power4: -mpwr4} \ +%{mcpu=power5: -mpwr5} \ +%{mcpu=power5+: -mpwr5x} \ +%{mcpu=power6: -mpwr6} \ +%{mcpu=power6x: -mpwr6} \ +%{mcpu=power7: -mpwr7} \ +%{mcpu=power8: -mpwr8} \ +%{mcpu=power9: -mpwr9} \ +%{mcpu=powerpc: -mppc} \ +%{mcpu=rs64a: -mppc} \ +%{mcpu=603: -m603} \ +%{mcpu=603e: -m603} \ +%{mcpu=604: -m604} \ +%{mcpu=604e: -m604} \ +%{mcpu=620: -m620} \ +%{mcpu=630: -m620} \ +%{mcpu=970: -m970} \ +%{mcpu=G5: -m970} \ +%{mvsx: %{!mcpu*: -mpwr6}} \ +-many" + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mpwr4" + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_AIX43"); \ + builtin_define ("_AIX51"); \ + builtin_define ("_AIX52"); \ + builtin_define ("_AIX53"); \ + builtin_define ("_AIX61"); \ + TARGET_OS_AIX_CPP_BUILTINS (); \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE} \ + %{ansi: -D_ANSI_C_SOURCE} \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +/* The GNU C++ standard library requires that these macros be + defined. Synchronize with libstdc++ os_defines.h. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC \ + "-D_ALL_SOURCE -D__COMPATMATH__ \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF) + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_POWER7 +#undef PROCESSOR_DEFAULT64 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7 + +/* AIX 6.1 kernel and assembler have necessary support for Altivec and VSX. */ +#undef OS_MISSING_ALTIVEC + +/* Define this macro as a C expression for the initializer of an + array of string to tell the driver program which options are + defaults for this target and thus do not need to be handled + specially when using `MULTILIB_OPTIONS'. + + Do not define this macro if `MULTILIB_OPTIONS' is not defined in + the target makefile fragment or if none of the options listed in + `MULTILIB_OPTIONS' are set by default. *Note Target Fragment::. */ + +#undef MULTILIB_DEFAULTS + +#undef LIB_SPEC +#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{!maix64:%{!shared:%{g*:-lg}}}\ + %{fprofile-arcs|fprofile-generate*|coverage:-lpthreads}\ + %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\ + %{pthread:-lpthreads} -lc" + +#undef LINK_SPEC +#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro}\ + %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\ + %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\ + %{mpe:-binitfini:poe_remote_main}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared:\ + %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\ + %{!maix64:\ + %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\ + %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}\ + %{shared:crtcxa_s%O%s;:crtcxa%O%s} crtdbase%O%s" + +/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int". */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* Type used for wchar_t, as a string used in a declaration. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int") + +/* Width of wchar_t in bits. */ +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32) + +/* AIX 4.2 and above provides initialization and finalization function + support from linker command line. */ +#undef HAS_INIT_SECTION +#define HAS_INIT_SECTION + +#undef LD_INIT_SWITCH +#define LD_INIT_SWITCH "-binitfini" + +#ifndef _AIX52 +extern long long int atoll(const char *); +#endif + +/* This target uses the aix64.opt file. */ +#define TARGET_USES_AIX64_OPT 1 + +/* Large TOC Support */ +#ifdef HAVE_LD_LARGE_TOC +#undef TARGET_CMODEL +#define TARGET_CMODEL rs6000_current_cmodel +#define SET_CMODEL(opt) rs6000_current_cmodel = opt +#else +#define SET_CMODEL(opt) do {} while (0) +#endif + +/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION, + but does not have crtbegin/end. */ + +#define TARGET_AIX_VERSION 61 diff --git a/gcc/config/powerpcspe/aix64.opt b/gcc/config/powerpcspe/aix64.opt new file mode 100644 index 000000000000..ca8d1da6b0e2 --- /dev/null +++ b/gcc/config/powerpcspe/aix64.opt @@ -0,0 +1,55 @@ +; Options for the 64-bit flavor of AIX. +; +; Copyright (C) 2005-2017 Free Software Foundation, Inc. +; Contributed by Aldy Hernandez <aldy@quesejoda.com>. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +maix64 +Target Report RejectNegative Negative(maix32) Mask(64BIT) Var(rs6000_isa_flags) +Compile for 64-bit pointers. + +maix32 +Target Report RejectNegative Negative(maix64) InverseMask(64BIT) Var(rs6000_isa_flags) +Compile for 32-bit pointers. + +mcmodel= +Target RejectNegative Joined Enum(rs6000_cmodel) Var(rs6000_current_cmodel) +Select code model. + +Enum +Name(rs6000_cmodel) Type(enum rs6000_cmodel) +Known code models (for use with the -mcmodel= option): + +EnumValue +Enum(rs6000_cmodel) String(small) Value(CMODEL_SMALL) + +EnumValue +Enum(rs6000_cmodel) String(medium) Value(CMODEL_MEDIUM) + +EnumValue +Enum(rs6000_cmodel) String(large) Value(CMODEL_LARGE) + +mpe +Target Report RejectNegative Var(internal_nothing_1) Save +Support message passing with the Parallel Environment. + +posix +Driver + +pthread +Driver diff --git a/gcc/config/powerpcspe/aix71.h b/gcc/config/powerpcspe/aix71.h new file mode 100644 index 000000000000..4b986d6a818d --- /dev/null +++ b/gcc/config/powerpcspe/aix71.h @@ -0,0 +1,230 @@ +/* Definitions of target machine for GNU compiler, + for IBM RS/6000 POWER running AIX V7.1. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by David Edelsohn (edelsohn@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to + get control in TARGET_OPTION_OVERRIDE. */ + +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (TARGET_64BIT && ! TARGET_POWERPC64) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \ + } \ + if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128) \ + { \ + rs6000_long_double_type_size = 64; \ + if (global_options_set.x_rs6000_long_double_type_size) \ + warning (0, "soft-float and long-double-128 are incompatible"); \ + } \ + if (TARGET_POWERPC64 && ! TARGET_64BIT) \ + { \ + error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \ + } \ + if ((rs6000_isa_flags_explicit \ + & OPTION_MASK_MINIMAL_TOC) != 0) \ + { \ + if (global_options_set.x_rs6000_current_cmodel \ + && rs6000_current_cmodel != CMODEL_SMALL) \ + error ("-mcmodel incompatible with other toc options"); \ + SET_CMODEL (CMODEL_SMALL); \ + } \ + if (rs6000_current_cmodel != CMODEL_SMALL) \ + { \ + TARGET_NO_FP_IN_TOC = 0; \ + TARGET_NO_SUM_IN_TOC = 0; \ + } \ + if (rs6000_current_cmodel == CMODEL_MEDIUM) \ + { \ + rs6000_current_cmodel = CMODEL_LARGE; \ + } \ +} while (0); + +#undef ASM_SPEC +#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)" + +/* Common ASM definitions used by ASM_SPEC amongst the various targets for + handling -mcpu=xxx switches. There is a parallel list in driver-rs6000.c to + provide the default assembler options if the user uses -mcpu=native, so if + you make changes here, make them there also. */ +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC \ +"%{!mcpu*: %{!maix64: \ + %{mpowerpc64: -mppc64} \ + %{maltivec: -m970} \ + %{!maltivec: %{!mpowerpc64: %(asm_default)}}}} \ +%{mcpu=native: %(asm_cpu_native)} \ +%{mcpu=power3: -m620} \ +%{mcpu=power4: -mpwr4} \ +%{mcpu=power5: -mpwr5} \ +%{mcpu=power5+: -mpwr5x} \ +%{mcpu=power6: -mpwr6} \ +%{mcpu=power6x: -mpwr6} \ +%{mcpu=power7: -mpwr7} \ +%{mcpu=power8: -mpwr8} \ +%{mcpu=power9: -mpwr9} \ +%{mcpu=powerpc: -mppc} \ +%{mcpu=rs64a: -mppc} \ +%{mcpu=603: -m603} \ +%{mcpu=603e: -m603} \ +%{mcpu=604: -m604} \ +%{mcpu=604e: -m604} \ +%{mcpu=620: -m620} \ +%{mcpu=630: -m620} \ +%{mcpu=970: -m970} \ +%{mcpu=G5: -m970} \ +%{mvsx: %{!mcpu*: -mpwr6}} \ +-many" + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mpwr4" + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("_AIX43"); \ + builtin_define ("_AIX51"); \ + builtin_define ("_AIX52"); \ + builtin_define ("_AIX53"); \ + builtin_define ("_AIX61"); \ + builtin_define ("_AIX71"); \ + TARGET_OS_AIX_CPP_BUILTINS (); \ + } \ + while (0) + +#undef CPP_SPEC +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE} \ + %{ansi: -D_ANSI_C_SOURCE} \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +/* The GNU C++ standard library requires that these macros be + defined. Synchronize with libstdc++ os_defines.h. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC \ + "-D_ALL_SOURCE -D__COMPATMATH__ \ + %{maix64: -D__64BIT__} \ + %{mpe: -I%R/usr/lpp/ppe.poe/include} \ + %{pthread: -D_THREAD_SAFE}" + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF) + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_POWER7 +#undef PROCESSOR_DEFAULT64 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7 + +/* AIX 7.1 kernel and assembler have necessary support for Altivec and VSX. */ +#undef OS_MISSING_ALTIVEC + +/* Define this macro as a C expression for the initializer of an + array of string to tell the driver program which options are + defaults for this target and thus do not need to be handled + specially when using `MULTILIB_OPTIONS'. + + Do not define this macro if `MULTILIB_OPTIONS' is not defined in + the target makefile fragment or if none of the options listed in + `MULTILIB_OPTIONS' are set by default. *Note Target Fragment::. */ + +#undef MULTILIB_DEFAULTS + +#undef LIB_SPEC +#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\ + %{!maix64:%{!shared:%{g*:-lg}}}\ + %{fprofile-arcs|fprofile-generate*|coverage:-lpthreads}\ + %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\ + %{pthread:-lpthreads} -lc" + +#undef LINK_SPEC +#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro}\ + %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\ + %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\ + %{mpe:-binitfini:poe_remote_main}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared:\ + %{maix64:%{pg:gcrt0_64%O%s;:%{p:mcrt0_64%O%s;:crt0_64%O%s}};:\ + %{pthread:%{pg:gcrt0_r%O%s;:%{p:mcrt0_r%O%s;:crt0_r%O%s}};:\ + %{pg:gcrt0%O%s;:%{p:mcrt0%O%s;:crt0%O%s}}}}}\ + %{shared:crtcxa_s%O%s;:crtcxa%O%s} crtdbase%O%s" + +/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int". */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* Type used for wchar_t, as a string used in a declaration. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int") + +/* Width of wchar_t in bits. */ +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32) + +/* AIX 4.2 and above provides initialization and finalization function + support from linker command line. */ +#undef HAS_INIT_SECTION +#define HAS_INIT_SECTION + +#undef LD_INIT_SWITCH +#define LD_INIT_SWITCH "-binitfini" + +#ifndef _AIX52 +extern long long int atoll(const char *); +#endif + +/* This target uses the aix64.opt file. */ +#define TARGET_USES_AIX64_OPT 1 + +/* Large TOC Support */ +#ifdef HAVE_LD_LARGE_TOC +#undef TARGET_CMODEL +#define TARGET_CMODEL rs6000_current_cmodel +#define SET_CMODEL(opt) rs6000_current_cmodel = opt +#else +#define SET_CMODEL(opt) do {} while (0) +#endif + +/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION, + but does not have crtbegin/end. */ + +#define TARGET_AIX_VERSION 71 + +/* AIX 7.1 supports DWARF3 debugging, but XCOFF remains the default. */ +#define DWARF2_DEBUGGING_INFO 1 +#define PREFERRED_DEBUGGING_TYPE XCOFF_DEBUG +#define DEBUG_INFO_SECTION "0x10000" +#define DEBUG_LINE_SECTION "0x20000" +#define DEBUG_PUBNAMES_SECTION "0x30000" +#define DEBUG_PUBTYPES_SECTION "0x40000" +#define DEBUG_ARANGES_SECTION "0x50000" +#define DEBUG_ABBREV_SECTION "0x60000" +#define DEBUG_STR_SECTION "0x70000" +#define DEBUG_RANGES_SECTION "0x80000" +#define DEBUG_LOC_SECTION "0x90000" +#define DEBUG_FRAME_SECTION "0xA0000" +#define DEBUG_MACINFO_SECTION "0xB0000" +#define DEBUG_MACRO_SECTION "0xB0000" + diff --git a/gcc/config/powerpcspe/altivec.h b/gcc/config/powerpcspe/altivec.h new file mode 100644 index 000000000000..c92bccef150a --- /dev/null +++ b/gcc/config/powerpcspe/altivec.h @@ -0,0 +1,648 @@ +/* PowerPC AltiVec include file. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (aldyh@redhat.com). + Rewritten by Paolo Bonzini (bonzini@gnu.org). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Implemented to conform to the specification included in the AltiVec + Technology Programming Interface Manual (ALTIVECPIM/D 6/1999 Rev 0). */ + +#ifndef _ALTIVEC_H +#define _ALTIVEC_H 1 + +#if !defined(__VEC__) || !defined(__ALTIVEC__) +#error Use the "-maltivec" flag to enable PowerPC AltiVec support +#endif + +/* If __APPLE_ALTIVEC__ is defined, the compiler supports 'vector', + 'pixel' and 'bool' as context-sensitive AltiVec keywords (in + non-AltiVec contexts, they revert to their original meanings, + if any), so we do not need to define them as macros. */ + +#if !defined(__APPLE_ALTIVEC__) +/* You are allowed to undef these for C++ compatibility. */ +#define vector __vector +#define pixel __pixel +#define bool __bool +#endif + +/* Condition register codes for AltiVec predicates. */ + +#define __CR6_EQ 0 +#define __CR6_EQ_REV 1 +#define __CR6_LT 2 +#define __CR6_LT_REV 3 + +/* Synonyms. */ +#define vec_vaddcuw vec_addc +#define vec_vand vec_and +#define vec_vandc vec_andc +#define vec_vrfip vec_ceil +#define vec_vcmpbfp vec_cmpb +#define vec_vcmpgefp vec_cmpge +#define vec_vctsxs vec_cts +#define vec_vctuxs vec_ctu +#define vec_vexptefp vec_expte +#define vec_vrfim vec_floor +#define vec_lvx vec_ld +#define vec_lvxl vec_ldl +#define vec_vlogefp vec_loge +#define vec_vmaddfp vec_madd +#define vec_vmhaddshs vec_madds +#define vec_vmladduhm vec_mladd +#define vec_vmhraddshs vec_mradds +#define vec_vnmsubfp vec_nmsub +#define vec_vnor vec_nor +#define vec_vor vec_or +#define vec_vpkpx vec_packpx +#define vec_vperm vec_perm +#define vec_vrefp vec_re +#define vec_vrfin vec_round +#define vec_vrsqrtefp vec_rsqrte +#define vec_vsel vec_sel +#define vec_vsldoi vec_sld +#define vec_vsl vec_sll +#define vec_vslo vec_slo +#define vec_vspltisb vec_splat_s8 +#define vec_vspltish vec_splat_s16 +#define vec_vspltisw vec_splat_s32 +#define vec_vsr vec_srl +#define vec_vsro vec_sro +#define vec_stvx vec_st +#define vec_stvxl vec_stl +#define vec_vsubcuw vec_subc +#define vec_vsum2sws vec_sum2s +#define vec_vsumsws vec_sums +#define vec_vrfiz vec_trunc +#define vec_vxor vec_xor + +/* Functions that are resolved by the backend to one of the + typed builtins. */ +#define vec_vaddfp __builtin_vec_vaddfp +#define vec_addc __builtin_vec_addc +#define vec_adde __builtin_vec_adde +#define vec_addec __builtin_vec_addec +#define vec_vaddsws __builtin_vec_vaddsws +#define vec_vaddshs __builtin_vec_vaddshs +#define vec_vaddsbs __builtin_vec_vaddsbs +#define vec_vavgsw __builtin_vec_vavgsw +#define vec_vavguw __builtin_vec_vavguw +#define vec_vavgsh __builtin_vec_vavgsh +#define vec_vavguh __builtin_vec_vavguh +#define vec_vavgsb __builtin_vec_vavgsb +#define vec_vavgub __builtin_vec_vavgub +#define vec_ceil __builtin_vec_ceil +#define vec_cmpb __builtin_vec_cmpb +#define vec_vcmpeqfp __builtin_vec_vcmpeqfp +#define vec_cmpge __builtin_vec_cmpge +#define vec_vcmpgtfp __builtin_vec_vcmpgtfp +#define vec_vcmpgtsw __builtin_vec_vcmpgtsw +#define vec_vcmpgtuw __builtin_vec_vcmpgtuw +#define vec_vcmpgtsh __builtin_vec_vcmpgtsh +#define vec_vcmpgtuh __builtin_vec_vcmpgtuh +#define vec_vcmpgtsb __builtin_vec_vcmpgtsb +#define vec_vcmpgtub __builtin_vec_vcmpgtub +#define vec_vcfsx __builtin_vec_vcfsx +#define vec_vcfux __builtin_vec_vcfux +#define vec_cts __builtin_vec_cts +#define vec_ctu __builtin_vec_ctu +#define vec_cpsgn __builtin_vec_copysign +#define vec_double __builtin_vec_double +#define vec_expte __builtin_vec_expte +#define vec_floor __builtin_vec_floor +#define vec_loge __builtin_vec_loge +#define vec_madd __builtin_vec_madd +#define vec_madds __builtin_vec_madds +#define vec_mtvscr __builtin_vec_mtvscr +#define vec_vmaxfp __builtin_vec_vmaxfp +#define vec_vmaxsw __builtin_vec_vmaxsw +#define vec_vmaxsh __builtin_vec_vmaxsh +#define vec_vmaxsb __builtin_vec_vmaxsb +#define vec_vminfp __builtin_vec_vminfp +#define vec_vminsw __builtin_vec_vminsw +#define vec_vminsh __builtin_vec_vminsh +#define vec_vminsb __builtin_vec_vminsb +#define vec_mradds __builtin_vec_mradds +#define vec_vmsumshm __builtin_vec_vmsumshm +#define vec_vmsumuhm __builtin_vec_vmsumuhm +#define vec_vmsummbm __builtin_vec_vmsummbm +#define vec_vmsumubm __builtin_vec_vmsumubm +#define vec_vmsumshs __builtin_vec_vmsumshs +#define vec_vmsumuhs __builtin_vec_vmsumuhs +#define vec_vmulesb __builtin_vec_vmulesb +#define vec_vmulesh __builtin_vec_vmulesh +#define vec_vmuleuh __builtin_vec_vmuleuh +#define vec_vmuleub __builtin_vec_vmuleub +#define vec_vmulosh __builtin_vec_vmulosh +#define vec_vmulouh __builtin_vec_vmulouh +#define vec_vmulosb __builtin_vec_vmulosb +#define vec_vmuloub __builtin_vec_vmuloub +#define vec_nmsub __builtin_vec_nmsub +#define vec_packpx __builtin_vec_packpx +#define vec_vpkswss __builtin_vec_vpkswss +#define vec_vpkuwus __builtin_vec_vpkuwus +#define vec_vpkshss __builtin_vec_vpkshss +#define vec_vpkuhus __builtin_vec_vpkuhus +#define vec_vpkswus __builtin_vec_vpkswus +#define vec_vpkshus __builtin_vec_vpkshus +#define vec_re __builtin_vec_re +#define vec_round __builtin_vec_round +#define vec_recipdiv __builtin_vec_recipdiv +#define vec_rlmi __builtin_vec_rlmi +#define vec_vrlnm __builtin_vec_rlnm +#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c))) +#define vec_rsqrt __builtin_vec_rsqrt +#define vec_rsqrte __builtin_vec_rsqrte +#define vec_vsubfp __builtin_vec_vsubfp +#define vec_subc __builtin_vec_subc +#define vec_vsubsws __builtin_vec_vsubsws +#define vec_vsubshs __builtin_vec_vsubshs +#define vec_vsubsbs __builtin_vec_vsubsbs +#define vec_sum4s __builtin_vec_sum4s +#define vec_vsum4shs __builtin_vec_vsum4shs +#define vec_vsum4sbs __builtin_vec_vsum4sbs +#define vec_vsum4ubs __builtin_vec_vsum4ubs +#define vec_sum2s __builtin_vec_sum2s +#define vec_sums __builtin_vec_sums +#define vec_trunc __builtin_vec_trunc +#define vec_vupkhpx __builtin_vec_vupkhpx +#define vec_vupkhsh __builtin_vec_vupkhsh +#define vec_vupkhsb __builtin_vec_vupkhsb +#define vec_vupklpx __builtin_vec_vupklpx +#define vec_vupklsh __builtin_vec_vupklsh +#define vec_vupklsb __builtin_vec_vupklsb +#define vec_abs __builtin_vec_abs +#define vec_nabs __builtin_vec_nabs +#define vec_abss __builtin_vec_abss +#define vec_add __builtin_vec_add +#define vec_adds __builtin_vec_adds +#define vec_and __builtin_vec_and +#define vec_andc __builtin_vec_andc +#define vec_avg __builtin_vec_avg +#define vec_cmpeq __builtin_vec_cmpeq +#define vec_cmpne __builtin_vec_cmpne +#define vec_cmpgt __builtin_vec_cmpgt +#define vec_ctf __builtin_vec_ctf +#define vec_dst __builtin_vec_dst +#define vec_dstst __builtin_vec_dstst +#define vec_dststt __builtin_vec_dststt +#define vec_dstt __builtin_vec_dstt +#define vec_ld __builtin_vec_ld +#define vec_lde __builtin_vec_lde +#define vec_ldl __builtin_vec_ldl +#define vec_lvebx __builtin_vec_lvebx +#define vec_lvehx __builtin_vec_lvehx +#define vec_lvewx __builtin_vec_lvewx +#define vec_neg __builtin_vec_neg +#define vec_pmsum_be __builtin_vec_vpmsum +#define vec_shasigma_be __builtin_crypto_vshasigma +/* Cell only intrinsics. */ +#ifdef __PPU__ +#define vec_lvlx __builtin_vec_lvlx +#define vec_lvlxl __builtin_vec_lvlxl +#define vec_lvrx __builtin_vec_lvrx +#define vec_lvrxl __builtin_vec_lvrxl +#endif +#define vec_lvsl __builtin_vec_lvsl +#define vec_lvsr __builtin_vec_lvsr +#define vec_max __builtin_vec_max +#define vec_mergee __builtin_vec_vmrgew +#define vec_mergeh __builtin_vec_mergeh +#define vec_mergel __builtin_vec_mergel +#define vec_mergeo __builtin_vec_vmrgow +#define vec_min __builtin_vec_min +#define vec_mladd __builtin_vec_mladd +#define vec_msum __builtin_vec_msum +#define vec_msums __builtin_vec_msums +#define vec_mul __builtin_vec_mul +#define vec_mule __builtin_vec_mule +#define vec_mulo __builtin_vec_mulo +#define vec_nor __builtin_vec_nor +#define vec_or __builtin_vec_or +#define vec_pack __builtin_vec_pack +#define vec_packs __builtin_vec_packs +#define vec_packsu __builtin_vec_packsu +#define vec_perm __builtin_vec_perm +#define vec_rl __builtin_vec_rl +#define vec_sel __builtin_vec_sel +#define vec_sl __builtin_vec_sl +#define vec_sld __builtin_vec_sld +#define vec_sldw __builtin_vsx_xxsldwi +#define vec_sll __builtin_vec_sll +#define vec_slo __builtin_vec_slo +#define vec_splat __builtin_vec_splat +#define vec_sr __builtin_vec_sr +#define vec_sra __builtin_vec_sra +#define vec_srl __builtin_vec_srl +#define vec_sro __builtin_vec_sro +#define vec_st __builtin_vec_st +#define vec_ste __builtin_vec_ste +#define vec_stl __builtin_vec_stl +#define vec_stvebx __builtin_vec_stvebx +#define vec_stvehx __builtin_vec_stvehx +#define vec_stvewx __builtin_vec_stvewx +/* Cell only intrinsics. */ +#ifdef __PPU__ +#define vec_stvlx __builtin_vec_stvlx +#define vec_stvlxl __builtin_vec_stvlxl +#define vec_stvrx __builtin_vec_stvrx +#define vec_stvrxl __builtin_vec_stvrxl +#endif +#define vec_sub __builtin_vec_sub +#define vec_subs __builtin_vec_subs +#define vec_sum __builtin_vec_sum +#define vec_unpackh __builtin_vec_unpackh +#define vec_unpackl __builtin_vec_unpackl +#define vec_vaddubm __builtin_vec_vaddubm +#define vec_vaddubs __builtin_vec_vaddubs +#define vec_vadduhm __builtin_vec_vadduhm +#define vec_vadduhs __builtin_vec_vadduhs +#define vec_vadduwm __builtin_vec_vadduwm +#define vec_vadduws __builtin_vec_vadduws +#define vec_vcmpequb __builtin_vec_vcmpequb +#define vec_vcmpequh __builtin_vec_vcmpequh +#define vec_vcmpequw __builtin_vec_vcmpequw +#define vec_vmaxub __builtin_vec_vmaxub +#define vec_vmaxuh __builtin_vec_vmaxuh +#define vec_vmaxuw __builtin_vec_vmaxuw +#define vec_vminub __builtin_vec_vminub +#define vec_vminuh __builtin_vec_vminuh +#define vec_vminuw __builtin_vec_vminuw +#define vec_vmrghb __builtin_vec_vmrghb +#define vec_vmrghh __builtin_vec_vmrghh +#define vec_vmrghw __builtin_vec_vmrghw +#define vec_vmrglb __builtin_vec_vmrglb +#define vec_vmrglh __builtin_vec_vmrglh +#define vec_vmrglw __builtin_vec_vmrglw +#define vec_vpkuhum __builtin_vec_vpkuhum +#define vec_vpkuwum __builtin_vec_vpkuwum +#define vec_vrlb __builtin_vec_vrlb +#define vec_vrlh __builtin_vec_vrlh +#define vec_vrlw __builtin_vec_vrlw +#define vec_vslb __builtin_vec_vslb +#define vec_vslh __builtin_vec_vslh +#define vec_vslw __builtin_vec_vslw +#define vec_vspltb __builtin_vec_vspltb +#define vec_vsplth __builtin_vec_vsplth +#define vec_vspltw __builtin_vec_vspltw +#define vec_vsrab __builtin_vec_vsrab +#define vec_vsrah __builtin_vec_vsrah +#define vec_vsraw __builtin_vec_vsraw +#define vec_vsrb __builtin_vec_vsrb +#define vec_vsrh __builtin_vec_vsrh +#define vec_vsrw __builtin_vec_vsrw +#define vec_vsububs __builtin_vec_vsububs +#define vec_vsububm __builtin_vec_vsububm +#define vec_vsubuhm __builtin_vec_vsubuhm +#define vec_vsubuhs __builtin_vec_vsubuhs +#define vec_vsubuwm __builtin_vec_vsubuwm +#define vec_vsubuws __builtin_vec_vsubuws +#define vec_xor __builtin_vec_xor + +#define vec_extract __builtin_vec_extract +#define vec_insert __builtin_vec_insert +#define vec_splats __builtin_vec_splats +#define vec_promote __builtin_vec_promote + +#ifdef __VSX__ +/* VSX additions */ +#define vec_div __builtin_vec_div +#define vec_mul __builtin_vec_mul +#define vec_msub __builtin_vec_msub +#define vec_nmadd __builtin_vec_nmadd +#define vec_nearbyint __builtin_vec_nearbyint +#define vec_rint __builtin_vec_rint +#define vec_sqrt __builtin_vec_sqrt +#define vec_vsx_ld __builtin_vec_vsx_ld +#define vec_vsx_st __builtin_vec_vsx_st +#define vec_xl __builtin_vec_vsx_ld +#define vec_xst __builtin_vec_vsx_st + +/* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions + instead of __builtin_vec_<xxx> */ +#define vec_xxsldwi __builtin_vsx_xxsldwi +#define vec_xxpermdi __builtin_vsx_xxpermdi +#endif + +#ifdef _ARCH_PWR8 +/* Vector additions added in ISA 2.07. */ +#define vec_eqv __builtin_vec_eqv +#define vec_nand __builtin_vec_nand +#define vec_orc __builtin_vec_orc +#define vec_vaddcuq __builtin_vec_vaddcuq +#define vec_vaddudm __builtin_vec_vaddudm +#define vec_vadduqm __builtin_vec_vadduqm +#define vec_vbpermq __builtin_vec_vbpermq +#define vec_bperm __builtin_vec_vbperm_api +#define vec_vclz __builtin_vec_vclz +#define vec_cntlz __builtin_vec_vclz +#define vec_vclzb __builtin_vec_vclzb +#define vec_vclzd __builtin_vec_vclzd +#define vec_vclzh __builtin_vec_vclzh +#define vec_vclzw __builtin_vec_vclzw +#define vec_vaddecuq __builtin_vec_vaddecuq +#define vec_vaddeuqm __builtin_vec_vaddeuqm +#define vec_vsubecuq __builtin_vec_vsubecuq +#define vec_vsubeuqm __builtin_vec_vsubeuqm +#define vec_vgbbd __builtin_vec_vgbbd +#define vec_gb __builtin_vec_vgbbd +#define vec_vmaxsd __builtin_vec_vmaxsd +#define vec_vmaxud __builtin_vec_vmaxud +#define vec_vminsd __builtin_vec_vminsd +#define vec_vminud __builtin_vec_vminud +#define vec_vmrgew __builtin_vec_vmrgew +#define vec_vmrgow __builtin_vec_vmrgow +#define vec_vpksdss __builtin_vec_vpksdss +#define vec_vpksdus __builtin_vec_vpksdus +#define vec_vpkudum __builtin_vec_vpkudum +#define vec_vpkudus __builtin_vec_vpkudus +#define vec_vpopcnt __builtin_vec_vpopcnt +#define vec_vpopcntb __builtin_vec_vpopcntb +#define vec_vpopcntd __builtin_vec_vpopcntd +#define vec_vpopcnth __builtin_vec_vpopcnth +#define vec_vpopcntw __builtin_vec_vpopcntw +#define vec_popcnt __builtin_vec_vpopcntu +#define vec_popcntb __builtin_vec_vpopcntub +#define vec_popcnth __builtin_vec_vpopcntuh +#define vec_popcntw __builtin_vec_vpopcntuw +#define vec_popcntd __builtin_vec_vpopcntud +#define vec_vrld __builtin_vec_vrld +#define vec_vsld __builtin_vec_vsld +#define vec_vsrad __builtin_vec_vsrad +#define vec_vsrd __builtin_vec_vsrd +#define vec_vsubcuq __builtin_vec_vsubcuq +#define vec_vsubudm __builtin_vec_vsubudm +#define vec_vsubuqm __builtin_vec_vsubuqm +#define vec_vupkhsw __builtin_vec_vupkhsw +#define vec_vupklsw __builtin_vec_vupklsw +#endif + +#ifdef __POWER9_VECTOR__ +/* Vector additions added in ISA 3.0. */ +#define vec_vctz __builtin_vec_vctz +#define vec_cnttz __builtin_vec_vctz +#define vec_vctzb __builtin_vec_vctzb +#define vec_vctzd __builtin_vec_vctzd +#define vec_vctzh __builtin_vec_vctzh +#define vec_vctzw __builtin_vec_vctzw +#define vec_vextract4b __builtin_vec_vextract4b +#define vec_vinsert4b __builtin_vec_vinsert4b +#define vec_vprtyb __builtin_vec_vprtyb +#define vec_vprtybd __builtin_vec_vprtybd +#define vec_vprtybw __builtin_vec_vprtybw + +#ifdef _ARCH_PPC64 +#define vec_vprtybq __builtin_vec_vprtybq +#endif + +#define vec_absd __builtin_vec_vadu +#define vec_absdb __builtin_vec_vadub +#define vec_absdh __builtin_vec_vaduh +#define vec_absdw __builtin_vec_vaduw + +#define vec_slv __builtin_vec_vslv +#define vec_srv __builtin_vec_vsrv + +#define vec_extract_exp __builtin_vec_extract_exp +#define vec_extract_sig __builtin_vec_extract_sig +#define vec_insert_exp __builtin_vec_insert_exp +#define vec_test_data_class __builtin_vec_test_data_class + +#define scalar_extract_exp __builtin_vec_scalar_extract_exp +#define scalar_extract_sig __builtin_vec_scalar_extract_sig +#define scalar_insert_exp __builtin_vec_scalar_insert_exp +#define scalar_test_data_class __builtin_vec_scalar_test_data_class +#define scalar_test_neg __builtin_vec_scalar_test_neg + +#define scalar_cmp_exp_gt __builtin_vec_scalar_cmp_exp_gt +#define scalar_cmp_exp_lt __builtin_vec_scalar_cmp_exp_lt +#define scalar_cmp_exp_eq __builtin_vec_scalar_cmp_exp_eq +#define scalar_cmp_exp_unordered __builtin_vec_scalar_cmp_exp_unordered + +#ifdef _ARCH_PPC64 +#define vec_xl_len __builtin_vec_lxvl +#define vec_xst_len __builtin_vec_stxvl +#endif + +#define vec_cmpnez __builtin_vec_vcmpnez + +#define vec_cntlz_lsbb __builtin_vec_vclzlsbb +#define vec_cnttz_lsbb __builtin_vec_vctzlsbb + +#define vec_xlx __builtin_vec_vextulx +#define vec_xrx __builtin_vec_vexturx + +#define vec_revb __builtin_vec_revb +#endif + +/* Predicates. + For C++, we use templates in order to allow non-parenthesized arguments. + For C, instead, we use macros since non-parenthesized arguments were + not allowed even in older GCC implementation of AltiVec. + + In the future, we may add more magic to the back-end, so that no + one- or two-argument macros are used. */ + +#ifdef __cplusplus__ +#define __altivec_unary_pred(NAME, CALL) \ +template <class T> int NAME (T a1) { return CALL; } + +#define __altivec_scalar_pred(NAME, CALL) \ +template <class T, class U> int NAME (T a1, U a2) { return CALL; } + +/* Given the vec_step of a type, return the corresponding bool type. */ +template <int STEP> class __altivec_bool_ret { }; +template <> class __altivec_bool_ret <4> { + typedef __vector __bool int __ret; +}; +template <> class __altivec_bool_ret <8> { + typedef __vector __bool short __ret; +}; +template <> class __altivec_bool_ret <16> { + typedef __vector __bool char __ret; +}; + +/* Be very liberal in the pairs we accept. Mistakes such as passing + a `vector char' and `vector short' will be caught by the middle-end, + while any attempt to detect them here would produce hard to understand + error messages involving the implementation details of AltiVec. */ +#define __altivec_binary_pred(NAME, CALL) \ +template <class T, class U> \ +typename __altivec_bool_ret <vec_step (T)>::__ret \ +NAME (T a1, U a2) \ +{ \ + return CALL; \ +} + +__altivec_binary_pred(vec_cmplt, + __builtin_vec_cmpgt (a2, a1)) +__altivec_binary_pred(vec_cmple, + __builtin_vec_cmpge (a2, a1)) + +__altivec_scalar_pred(vec_all_in, + __builtin_altivec_vcmpbfp_p (__CR6_EQ, a1, a2)) +__altivec_scalar_pred(vec_any_out, + __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, a1, a2)) + +__altivec_unary_pred(vec_all_nan, + __builtin_altivec_vcmpeq_p (__CR6_EQ, a1, a1)) +__altivec_unary_pred(vec_any_nan, + __builtin_altivec_vcmpeq_p (__CR6_LT_REV, a1, a1)) + +__altivec_unary_pred(vec_all_numeric, + __builtin_altivec_vcmpeq_p (__CR6_LT, a1, a1)) +__altivec_unary_pred(vec_any_numeric, + __builtin_altivec_vcmpeq_p (__CR6_EQ_REV, a1, a1)) + +__altivec_scalar_pred(vec_all_eq, + __builtin_vec_vcmpeq_p (__CR6_LT, a1, a2)) + +#ifndef __POWER9_VECTOR__ +__altivec_scalar_pred(vec_all_ne, + __builtin_vec_vcmpeq_p (__CR6_EQ, a1, a2)) +__altivec_scalar_pred(vec_any_eq, + __builtin_vec_vcmpeq_p (__CR6_EQ_REV, a1, a2)) +#else +__altivec_scalar_pred(vec_all_nez, + __builtin_vec_vcmpnez_p (__CR6_LT, a1, a2)) +__altivec_scalar_pred(vec_any_eqz, + __builtin_vec_vcmpnez_p (__CR6_LT_REV, a1, a2)) +__altivec_scalar_pred(vec_all_ne, + __builtin_vec_vcmpne_p (a1, a2)) +__altivec_scalar_pred(vec_any_eq, + __builtin_vec_vcmpae_p (a1, a2)) +#endif + +__altivec_scalar_pred(vec_any_ne, + __builtin_vec_vcmpeq_p (__CR6_LT_REV, a1, a2)) + +__altivec_scalar_pred(vec_all_gt, + __builtin_vec_vcmpgt_p (__CR6_LT, a1, a2)) +__altivec_scalar_pred(vec_all_lt, + __builtin_vec_vcmpgt_p (__CR6_LT, a2, a1)) +__altivec_scalar_pred(vec_any_gt, + __builtin_vec_vcmpgt_p (__CR6_EQ_REV, a1, a2)) +__altivec_scalar_pred(vec_any_lt, + __builtin_vec_vcmpgt_p (__CR6_EQ_REV, a2, a1)) + +__altivec_scalar_pred(vec_all_ngt, + __builtin_altivec_vcmpgt_p (__CR6_EQ, a1, a2)) +__altivec_scalar_pred(vec_all_nlt, + __builtin_altivec_vcmpgt_p (__CR6_EQ, a2, a1)) +__altivec_scalar_pred(vec_any_ngt, + __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a1, a2)) +__altivec_scalar_pred(vec_any_nlt, + __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a2, a1)) + +/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types, + while for integer types it is converted to __builtin_vec_vcmpgt_p, + with inverted args and condition code. */ +__altivec_scalar_pred(vec_all_le, + __builtin_vec_vcmpge_p (__CR6_LT, a2, a1)) +__altivec_scalar_pred(vec_all_ge, + __builtin_vec_vcmpge_p (__CR6_LT, a1, a2)) +__altivec_scalar_pred(vec_any_le, + __builtin_vec_vcmpge_p (__CR6_EQ_REV, a2, a1)) +__altivec_scalar_pred(vec_any_ge, + __builtin_vec_vcmpge_p (__CR6_EQ_REV, a1, a2)) + +__altivec_scalar_pred(vec_all_nge, + __builtin_altivec_vcmpge_p (__CR6_EQ, a1, a2)) +__altivec_scalar_pred(vec_all_nle, + __builtin_altivec_vcmpge_p (__CR6_EQ, a2, a1)) +__altivec_scalar_pred(vec_any_nge, + __builtin_altivec_vcmpge_p (__CR6_LT_REV, a1, a2)) +__altivec_scalar_pred(vec_any_nle, + __builtin_altivec_vcmpge_p (__CR6_LT_REV, a2, a1)) + +#undef __altivec_scalar_pred +#undef __altivec_unary_pred +#undef __altivec_binary_pred +#else +#define vec_cmplt(a1, a2) __builtin_vec_cmpgt ((a2), (a1)) +#define vec_cmple(a1, a2) __builtin_vec_cmpge ((a2), (a1)) + +#define vec_all_in(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ, (a1), (a2)) +#define vec_any_out(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, (a1), (a2)) + +#define vec_all_nan(a1) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a1)) +#define vec_any_nan(a1) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a1)) + +#define vec_all_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a1)) +#define vec_any_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a1)) + +#define vec_all_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a2)) + +#ifdef __POWER9_VECTOR__ +#define vec_all_nez(a1, a2) __builtin_vec_vcmpnez_p (__CR6_LT, (a1), (a2)) +#define vec_any_eqz(a1, a2) __builtin_vec_vcmpnez_p (__CR6_LT_REV, (a1), (a2)) +#define vec_all_ne(a1, a2) __builtin_vec_vcmpne_p ((a1), (a2)) +#define vec_any_eq(a1, a2) __builtin_vec_vcmpae_p ((a1), (a2)) +#else +#define vec_all_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a2)) +#define vec_any_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a2)) +#endif + +#define vec_any_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a2)) + +#define vec_all_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT, (a1), (a2)) +#define vec_all_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT, (a2), (a1)) +#define vec_any_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a1), (a2)) +#define vec_any_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a2), (a1)) + +#define vec_all_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a1), (a2)) +#define vec_all_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a2), (a1)) +#define vec_any_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a1), (a2)) +#define vec_any_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a2), (a1)) + +/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types, + while for integer types it is converted to __builtin_vec_vcmpgt_p, + with inverted args and condition code. */ +#define vec_all_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT, (a2), (a1)) +#define vec_all_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT, (a1), (a2)) +#define vec_any_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a2), (a1)) +#define vec_any_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a1), (a2)) + +#define vec_all_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a1), (a2)) +#define vec_all_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a2), (a1)) +#define vec_any_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a1), (a2)) +#define vec_any_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a2), (a1)) +#endif + +/* These do not accept vectors, so they do not have a __builtin_vec_* + counterpart. */ +#define vec_dss(x) __builtin_altivec_dss((x)) +#define vec_dssall() __builtin_altivec_dssall () +#define vec_mfvscr() ((__vector unsigned short) __builtin_altivec_mfvscr ()) +#define vec_splat_s8(x) __builtin_altivec_vspltisb ((x)) +#define vec_splat_s16(x) __builtin_altivec_vspltish ((x)) +#define vec_splat_s32(x) __builtin_altivec_vspltisw ((x)) +#define vec_splat_u8(x) ((__vector unsigned char) vec_splat_s8 ((x))) +#define vec_splat_u16(x) ((__vector unsigned short) vec_splat_s16 ((x))) +#define vec_splat_u32(x) ((__vector unsigned int) vec_splat_s32 ((x))) + +/* This also accepts a type for its parameter, so it is not enough + to #define vec_step to __builtin_vec_step. */ +#define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0) + +#endif /* _ALTIVEC_H */ diff --git a/gcc/config/powerpcspe/altivec.md b/gcc/config/powerpcspe/altivec.md new file mode 100644 index 000000000000..649f1810d168 --- /dev/null +++ b/gcc/config/powerpcspe/altivec.md @@ -0,0 +1,4028 @@ +;; AltiVec patterns. +;; Copyright (C) 2002-2017 Free Software Foundation, Inc. +;; Contributed by Aldy Hernandez (aldy@quesejoda.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_c_enum "unspec" + [UNSPEC_VCMPBFP + UNSPEC_VMSUMU + UNSPEC_VMSUMM + UNSPEC_VMSUMSHM + UNSPEC_VMSUMUHS + UNSPEC_VMSUMSHS + UNSPEC_VMHADDSHS + UNSPEC_VMHRADDSHS + UNSPEC_VADDCUW + UNSPEC_VADDU + UNSPEC_VADDS + UNSPEC_VAVGU + UNSPEC_VAVGS + UNSPEC_VMULEUB + UNSPEC_VMULESB + UNSPEC_VMULEUH + UNSPEC_VMULESH + UNSPEC_VMULOUB + UNSPEC_VMULOSB + UNSPEC_VMULOUH + UNSPEC_VMULOSH + UNSPEC_VPKPX + UNSPEC_VPACK_SIGN_SIGN_SAT + UNSPEC_VPACK_SIGN_UNS_SAT + UNSPEC_VPACK_UNS_UNS_SAT + UNSPEC_VPACK_UNS_UNS_MOD + UNSPEC_VPACK_UNS_UNS_MOD_DIRECT + UNSPEC_VSLV4SI + UNSPEC_VSLO + UNSPEC_VSR + UNSPEC_VSRO + UNSPEC_VSUBCUW + UNSPEC_VSUBU + UNSPEC_VSUBS + UNSPEC_VSUM4UBS + UNSPEC_VSUM4S + UNSPEC_VSUM2SWS + UNSPEC_VSUMSWS + UNSPEC_VPERM + UNSPEC_VPERMR + UNSPEC_VPERM_UNS + UNSPEC_VRFIN + UNSPEC_VCFUX + UNSPEC_VCFSX + UNSPEC_VCTUXS + UNSPEC_VCTSXS + UNSPEC_VLOGEFP + UNSPEC_VEXPTEFP + UNSPEC_VSLDOI + UNSPEC_VUNPACK_HI_SIGN + UNSPEC_VUNPACK_LO_SIGN + UNSPEC_VUNPACK_HI_SIGN_DIRECT + UNSPEC_VUNPACK_LO_SIGN_DIRECT + UNSPEC_VUPKHPX + UNSPEC_VUPKLPX + UNSPEC_DARN + UNSPEC_DARN_32 + UNSPEC_DARN_RAW + UNSPEC_DST + UNSPEC_DSTT + UNSPEC_DSTST + UNSPEC_DSTSTT + UNSPEC_LVSL + UNSPEC_LVSR + UNSPEC_LVE + UNSPEC_STVX + UNSPEC_STVXL + UNSPEC_STVE + UNSPEC_SET_VSCR + UNSPEC_GET_VRSAVE + UNSPEC_LVX + UNSPEC_REDUC_PLUS + UNSPEC_VECSH + UNSPEC_EXTEVEN_V4SI + UNSPEC_EXTEVEN_V8HI + UNSPEC_EXTEVEN_V16QI + UNSPEC_EXTEVEN_V4SF + UNSPEC_EXTODD_V4SI + UNSPEC_EXTODD_V8HI + UNSPEC_EXTODD_V16QI + UNSPEC_EXTODD_V4SF + UNSPEC_INTERHI_V4SI + UNSPEC_INTERHI_V8HI + UNSPEC_INTERHI_V16QI + UNSPEC_INTERLO_V4SI + UNSPEC_INTERLO_V8HI + UNSPEC_INTERLO_V16QI + UNSPEC_LVLX + UNSPEC_LVLXL + UNSPEC_LVRX + UNSPEC_LVRXL + UNSPEC_STVLX + UNSPEC_STVLXL + UNSPEC_STVRX + UNSPEC_STVRXL + UNSPEC_VADU + UNSPEC_VSLV + UNSPEC_VSRV + UNSPEC_VMULWHUB + UNSPEC_VMULWLUB + UNSPEC_VMULWHSB + UNSPEC_VMULWLSB + UNSPEC_VMULWHUH + UNSPEC_VMULWLUH + UNSPEC_VMULWHSH + UNSPEC_VMULWLSH + UNSPEC_VUPKHUB + UNSPEC_VUPKHUH + UNSPEC_VUPKLUB + UNSPEC_VUPKLUH + UNSPEC_VPERMSI + UNSPEC_VPERMHI + UNSPEC_INTERHI + UNSPEC_INTERLO + UNSPEC_VUPKHS_V4SF + UNSPEC_VUPKLS_V4SF + UNSPEC_VUPKHU_V4SF + UNSPEC_VUPKLU_V4SF + UNSPEC_VGBBD + UNSPEC_VMRGH_DIRECT + UNSPEC_VMRGL_DIRECT + UNSPEC_VSPLT_DIRECT + UNSPEC_VMRGEW_DIRECT + UNSPEC_VSUMSWS_DIRECT + UNSPEC_VADDCUQ + UNSPEC_VADDEUQM + UNSPEC_VADDECUQ + UNSPEC_VSUBCUQ + UNSPEC_VSUBEUQM + UNSPEC_VSUBECUQ + UNSPEC_VBPERMQ + UNSPEC_VBPERMD + UNSPEC_BCDADD + UNSPEC_BCDSUB + UNSPEC_BCD_OVERFLOW + UNSPEC_CMPRB + UNSPEC_CMPRB2 + UNSPEC_CMPEQB + UNSPEC_VRLMI + UNSPEC_VRLNM +]) + +(define_c_enum "unspecv" + [UNSPECV_SET_VRSAVE + UNSPECV_MTVSCR + UNSPECV_MFVSCR + UNSPECV_DSSALL + UNSPECV_DSS + ]) + +;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops +(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) +;; Short vec int modes +(define_mode_iterator VIshort [V8HI V16QI]) +;; Longer vec int modes for rotate/mask ops +(define_mode_iterator VIlong [V2DI V4SI]) +;; Vec float modes +(define_mode_iterator VF [V4SF]) +;; Vec modes, pity mode iterators are not composable +(define_mode_iterator V [V4SI V8HI V16QI V4SF]) +;; Vec modes for move/logical/permute ops, include vector types for move not +;; otherwise handled by altivec (v2df, v2di, ti) +(define_mode_iterator VM [V4SI + V8HI + V16QI + V4SF + V2DF + V2DI + V1TI + TI + (KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) + +;; Like VM, except don't do TImode +(define_mode_iterator VM2 [V4SI + V8HI + V16QI + V4SF + V2DF + V2DI + V1TI + (KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) + +;; Specific iterator for parity which does not have a byte/half-word form, but +;; does have a quad word form +(define_mode_iterator VParity [V4SI + V2DI + V1TI + (TI "TARGET_VSX_TIMODE")]) + +(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") + (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") + (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)") + (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")]) + +;; Vector pack/unpack +(define_mode_iterator VP [V2DI V4SI V8HI]) +(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) + +;; Vector negate +(define_mode_iterator VNEG [V4SI V2DI]) + +;; Vector move instructions. +(define_insn "*altivec_mov<mode>" + [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,?Y,?*r,?*r,v,v,?*r") + (match_operand:VM2 1 "input_operand" "v,Z,v,*r,Y,*r,j,W,W"))] + "VECTOR_MEM_ALTIVEC_P (<MODE>mode) + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" +{ + switch (which_alternative) + { + case 0: return "stvx %1,%y0"; + case 1: return "lvx %0,%y1"; + case 2: return "vor %0,%1,%1"; + case 3: return "#"; + case 4: return "#"; + case 5: return "#"; + case 6: return "vxor %0,%0,%0"; + case 7: return output_vec_const_move (operands); + case 8: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "vecstore,vecload,veclogical,store,load,*,veclogical,*,*") + (set_attr "length" "4,4,4,20,20,20,4,8,32")]) + +;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode +;; is for unions. However for plain data movement, slightly favor the vector +;; loads +(define_insn "*altivec_movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,v,v,?Y,?r,?r,v,v") + (match_operand:TI 1 "input_operand" "v,Z,v,r,Y,r,j,W"))] + "VECTOR_MEM_ALTIVEC_P (TImode) + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode))" +{ + switch (which_alternative) + { + case 0: return "stvx %1,%y0"; + case 1: return "lvx %0,%y1"; + case 2: return "vor %0,%1,%1"; + case 3: return "#"; + case 4: return "#"; + case 5: return "#"; + case 6: return "vxor %0,%0,%0"; + case 7: return output_vec_const_move (operands); + default: gcc_unreachable (); + } +} + [(set_attr "type" "vecstore,vecload,veclogical,store,load,*,veclogical,*")]) + +;; Load up a vector with the most significant bit set by loading up -1 and +;; doing a shift left +(define_split + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_msb" ""))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + machine_mode mode = GET_MODE (operands[0]); + rtvec v; + int i, num_elements; + + if (mode == V4SFmode) + { + mode = V4SImode; + dest = gen_lowpart (V4SImode, dest); + } + + num_elements = GET_MODE_NUNITS (mode); + v = rtvec_alloc (num_elements); + for (i = 0; i < num_elements; i++) + RTVEC_ELT (v, i) = constm1_rtx; + + emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v))); + emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest))); + DONE; +}) + +(define_split + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_add_self" ""))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] +{ + rtx dup = gen_easy_altivec_constant (operands[1]); + rtx const_vec; + machine_mode op_mode = <MODE>mode; + + /* Divide the operand of the resulting VEC_DUPLICATE, and use + simplify_rtx to make a CONST_VECTOR. */ + XEXP (dup, 0) = simplify_const_binary_operation (ASHIFTRT, QImode, + XEXP (dup, 0), const1_rtx); + const_vec = simplify_rtx (dup); + + if (op_mode == V4SFmode) + { + op_mode = V4SImode; + operands[0] = gen_lowpart (op_mode, operands[0]); + } + if (GET_MODE (const_vec) == op_mode) + operands[3] = const_vec; + else + operands[3] = gen_lowpart (op_mode, const_vec); + operands[4] = gen_rtx_PLUS (op_mode, operands[0], operands[0]); +}) + +(define_split + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_vsldoi" ""))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 0) + (unspec:VM [(match_dup 2) + (match_dup 4) + (match_dup 6)] + UNSPEC_VSLDOI))] +{ + rtx op1 = operands[1]; + int elt = (BYTES_BIG_ENDIAN) ? 0 : GET_MODE_NUNITS (<MODE>mode) - 1; + HOST_WIDE_INT val = const_vector_elt_as_int (op1, elt); + rtx rtx_val = GEN_INT (val); + int shift = vspltis_shifted (op1); + int nunits = GET_MODE_NUNITS (<MODE>mode); + int i; + + gcc_assert (shift != 0); + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, rtvec_alloc (nunits)); + operands[4] = gen_reg_rtx (<MODE>mode); + + if (shift < 0) + { + operands[5] = CONSTM1_RTX (<MODE>mode); + operands[6] = GEN_INT (-shift); + } + else + { + operands[5] = CONST0_RTX (<MODE>mode); + operands[6] = GEN_INT (shift); + } + + /* Populate the constant vectors. */ + for (i = 0; i < nunits; i++) + XVECEXP (operands[3], 0, i) = rtx_val; +}) + +(define_insn "get_vrsave_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(reg:SI VRSAVE_REGNO)] UNSPEC_GET_VRSAVE))] + "TARGET_ALTIVEC" +{ + if (TARGET_MACHO) + return "mfspr %0,256"; + else + return "mfvrsave %0"; +} + [(set_attr "type" "*")]) + +(define_insn "*set_vrsave_internal" + [(match_parallel 0 "vrsave_operation" + [(set (reg:SI VRSAVE_REGNO) + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r") + (reg:SI VRSAVE_REGNO)] UNSPECV_SET_VRSAVE))])] + "TARGET_ALTIVEC" +{ + if (TARGET_MACHO) + return "mtspr 256,%1"; + else + return "mtvrsave %1"; +} + [(set_attr "type" "*")]) + +(define_insn "*save_world" + [(match_parallel 0 "save_world_operation" + [(clobber (reg:SI LR_REGNO)) + (use (match_operand:SI 1 "call_operand" "s"))])] + "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT" + "bl %z1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_world" + [(match_parallel 0 "restore_world_operation" + [(return) + (use (reg:SI LR_REGNO)) + (use (match_operand:SI 1 "call_operand" "s")) + (clobber (match_operand:SI 2 "gpc_reg_operand" "=r"))])] + "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT" + "b %z1") + +;; The save_vregs and restore_vregs patterns don't use memory_operand +;; because (plus (reg) (const_int)) is not a valid vector address. +;; This way is more compact than describing exactly what happens in +;; the out-of-line functions, ie. loading the constant into r11/r12 +;; then using indexed addressing, and requires less editing of rtl +;; to describe the operation to dwarf2out_frame_debug_expr. +(define_insn "*save_vregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 11)) + (use (reg:P 0)) + (set (mem:V4SI (plus:P (match_operand:P 2 "gpc_reg_operand" "b") + (match_operand:P 3 "short_cint_operand" "I"))) + (match_operand:V4SI 4 "altivec_register_operand" "v"))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*save_vregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 12)) + (use (reg:P 0)) + (set (mem:V4SI (plus:P (match_operand:P 2 "gpc_reg_operand" "b") + (match_operand:P 3 "short_cint_operand" "I"))) + (match_operand:V4SI 4 "altivec_register_operand" "v"))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_vregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 11)) + (use (reg:P 0)) + (set (match_operand:V4SI 2 "altivec_register_operand" "=v") + (mem:V4SI (plus:P (match_operand:P 3 "gpc_reg_operand" "b") + (match_operand:P 4 "short_cint_operand" "I"))))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_vregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (clobber (reg:P 12)) + (use (reg:P 0)) + (set (match_operand:V4SI 2 "altivec_register_operand" "=v") + (mem:V4SI (plus:P (match_operand:P 3 "gpc_reg_operand" "b") + (match_operand:P 4 "short_cint_operand" "I"))))])] + "TARGET_ALTIVEC" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +;; Simple binary operations. + +;; add +(define_insn "add<mode>3" + [(set (match_operand:VI2 0 "register_operand" "=v") + (plus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vaddu<VI_char>m %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*altivec_addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (plus:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vaddfp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vaddcuw" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VADDCUW))] + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" + "vaddcuw %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddu<VI_char>s" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VADDU)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "<VI_unit>" + "vaddu<VI_char>s %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vadds<VI_char>s" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VADDS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "vadds<VI_char>s %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; sub +(define_insn "sub<mode>3" + [(set (match_operand:VI2 0 "register_operand" "=v") + (minus:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vsubu<VI_char>m %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*altivec_subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vsubfp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vsubcuw" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUBCUW))] + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" + "vsubcuw %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubu<VI_char>s" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VSUBU)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "vsubu<VI_char>s %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubs<VI_char>s" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VSUBS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "vsubs<VI_char>s %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; +(define_insn "altivec_vavgu<VI_char>" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VAVGU))] + "TARGET_ALTIVEC" + "vavgu<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vavgs<VI_char>" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VAVGS))] + "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "vavgs<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vcmpbfp" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_VCMPBFP))] + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" + "vcmpbfp %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_eq<mode>" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" + "vcmpequ<VI_char> %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + +(define_insn "*altivec_gt<mode>" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" + "vcmpgts<VI_char> %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + +(define_insn "*altivec_gtu<mode>" + [(set (match_operand:VI2 0 "altivec_register_operand" "=v") + (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") + (match_operand:VI2 2 "altivec_register_operand" "v")))] + "<VI_unit>" + "vcmpgtu<VI_char> %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + +(define_insn "*altivec_eqv4sf" + [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") + (eq:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") + (match_operand:V4SF 2 "altivec_register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpeqfp %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_gtv4sf" + [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") + (gt:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") + (match_operand:V4SF 2 "altivec_register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpgtfp %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_gev4sf" + [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") + (ge:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") + (match_operand:V4SF 2 "altivec_register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpgefp %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vsel<mode>" + [(set (match_operand:VM 0 "altivec_register_operand" "=v") + (if_then_else:VM + (ne:CC (match_operand:VM 1 "altivec_register_operand" "v") + (match_operand:VM 4 "zero_constant" "")) + (match_operand:VM 2 "altivec_register_operand" "v") + (match_operand:VM 3 "altivec_register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" + "vsel %0,%3,%2,%1" + [(set_attr "type" "vecmove")]) + +(define_insn "*altivec_vsel<mode>_uns" + [(set (match_operand:VM 0 "altivec_register_operand" "=v") + (if_then_else:VM + (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v") + (match_operand:VM 4 "zero_constant" "")) + (match_operand:VM 2 "altivec_register_operand" "v") + (match_operand:VM 3 "altivec_register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (<MODE>mode)" + "vsel %0,%3,%2,%1" + [(set_attr "type" "vecmove")]) + +;; Fused multiply add. + +(define_insn "*altivec_fmav4sf4" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (fma:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v") + (match_operand:V4SF 3 "register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vmaddfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +;; We do multiply as a fused multiply-add with an add of a -0.0 vector. + +(define_expand "altivec_mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (fma:V4SF (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "register_operand" "") + (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" +{ + rtx neg0; + + /* Generate [-0.0, -0.0, -0.0, -0.0]. */ + neg0 = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx)); + emit_insn (gen_vashlv4si3 (neg0, neg0, neg0)); + + operands[3] = gen_lowpart (V4SFmode, neg0); +}) + +;; 32-bit integer multiplication +;; A_high = Operand_0 & 0xFFFF0000 >> 16 +;; A_low = Operand_0 & 0xFFFF +;; B_high = Operand_1 & 0xFFFF0000 >> 16 +;; B_low = Operand_1 & 0xFFFF +;; result = A_low * B_low + (A_high * B_low + B_high * A_low) << 16 + +;; (define_insn "mulv4si3" +;; [(set (match_operand:V4SI 0 "register_operand" "=v") +;; (mult:V4SI (match_operand:V4SI 1 "register_operand" "v") +;; (match_operand:V4SI 2 "register_operand" "v")))] +(define_insn "mulv4si3_p8" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (mult:V4SI (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vmuluwm %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_expand "mulv4si3" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtx zero; + rtx swap; + rtx small_swap; + rtx sixteen; + rtx one; + rtx two; + rtx low_product; + rtx high_product; + + if (TARGET_P8_VECTOR) + { + emit_insn (gen_mulv4si3_p8 (operands[0], operands[1], operands[2])); + DONE; + } + + zero = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); + + sixteen = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltisw (sixteen, gen_rtx_CONST_INT (V4SImode, -16))); + + swap = gen_reg_rtx (V4SImode); + emit_insn (gen_vrotlv4si3 (swap, operands[2], sixteen)); + + one = gen_reg_rtx (V8HImode); + convert_move (one, operands[1], 0); + + two = gen_reg_rtx (V8HImode); + convert_move (two, operands[2], 0); + + small_swap = gen_reg_rtx (V8HImode); + convert_move (small_swap, swap, 0); + + low_product = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vmulouh (low_product, one, two)); + + high_product = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero)); + + emit_insn (gen_vashlv4si3 (high_product, high_product, sixteen)); + + emit_insn (gen_addv4si3 (operands[0], high_product, low_product)); + + DONE; +}) + +(define_expand "mulv8hi3" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtx zero = gen_reg_rtx (V8HImode); + + emit_insn (gen_altivec_vspltish (zero, const0_rtx)); + emit_insn (gen_altivec_vmladduhm(operands[0], operands[1], operands[2], zero)); + + DONE; +}) + +;; Fused multiply subtract +(define_insn "*altivec_vnmsubfp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (neg:V4SF + (fma:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v") + (neg:V4SF + (match_operand:V4SF 3 "register_operand" "v")))))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vnmsubfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vmsumu<VI_char>m" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v") + (match_operand:VIshort 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] + UNSPEC_VMSUMU))] + "TARGET_ALTIVEC" + "vmsumu<VI_char>m %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumm<VI_char>m" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v") + (match_operand:VIshort 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] + UNSPEC_VMSUMM))] + "TARGET_ALTIVEC" + "vmsumm<VI_char>m %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumshm" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] + UNSPEC_VMSUMSHM))] + "TARGET_ALTIVEC" + "vmsumshm %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumuhs" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] + UNSPEC_VMSUMUHS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vmsumuhs %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmsumshs" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V4SI 3 "register_operand" "v")] + UNSPEC_VMSUMSHS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vmsumshs %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +;; max + +(define_insn "umax<mode>3" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vmaxu<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "smax<mode>3" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vmaxs<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*altivec_smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (smax:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vmaxfp %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "umin<mode>3" + [(set (match_operand:VI2 0 "register_operand" "=v") + (umin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vminu<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "smin<mode>3" + [(set (match_operand:VI2 0 "register_operand" "=v") + (smin:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vmins<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*altivec_sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (smin:V4SF (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vminfp %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "altivec_vmhaddshs" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V8HI 3 "register_operand" "v")] + UNSPEC_VMHADDSHS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vmhaddshs %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmhraddshs" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V8HI 3 "register_operand" "v")] + UNSPEC_VMHRADDSHS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vmhraddshs %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmladduhm" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (match_operand:V8HI 3 "register_operand" "v")))] + "TARGET_ALTIVEC" + "vmladduhm %0,%1,%2,%3" + [(set_attr "type" "veccomplex")]) + +(define_expand "altivec_vmrghb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghb_internal" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghb %0,%1,%2"; + else + return "vmrglb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] + "TARGET_ALTIVEC" + "vmrghb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vmrghh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghh_internal" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghh %0,%1,%2"; + else + return "vmrglh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghh_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] + "TARGET_ALTIVEC" + "vmrghh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vmrghw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrghw_internal" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGH_DIRECT))] + "TARGET_ALTIVEC" + "vmrghw %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vmrghsf" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "VECTOR_MEM_ALTIVEC_P (V4SFmode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrghw %0,%1,%2"; + else + return "vmrglw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vmrglb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25), + GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27), + GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29), + GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31)); + x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglb_internal" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")) + (parallel [(const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglb %0,%1,%2"; + else + return "vmrghb %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] + "TARGET_ALTIVEC" + "vmrglb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vmrglh" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13), + GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15)); + x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglh_internal" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_ALTIVEC" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglh %0,%1,%2"; + else + return "vmrghh %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglh_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] + "TARGET_ALTIVEC" + "vmrglh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vmrglw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:V4SI 2 "register_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7)); + x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vmrglw_internal" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMRGL_DIRECT))] + "TARGET_ALTIVEC" + "vmrglw %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vmrglsf" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "VECTOR_MEM_ALTIVEC_P (V4SFmode)" +{ + if (BYTES_BIG_ENDIAN) + return "vmrglw %0,%1,%2"; + else + return "vmrghw %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +;; Power8 vector merge even/odd +(define_insn "p8_vmrgew" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] + "TARGET_P8_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + return "vmrgew %0,%1,%2"; + else + return "vmrgow %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgow" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] + "TARGET_P8_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + return "vmrgow %0,%1,%2"; + else + return "vmrgew %0,%2,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "p8_vmrgew_v4sf_direct" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_VMRGEW_DIRECT))] + "TARGET_P8_VECTOR" + "vmrgew %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "vec_widen_umult_even_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_even_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_even_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_odd_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v16qi" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:V16QI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_umult_odd_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vec_widen_smult_odd_v8hi" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "altivec_vmuleub" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULEUB))] + "TARGET_ALTIVEC" + "vmuleub %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmuloub" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULOUB))] + "TARGET_ALTIVEC" + "vmuloub %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulesb" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULESB))] + "TARGET_ALTIVEC" + "vmulesb %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulosb" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULOSB))] + "TARGET_ALTIVEC" + "vmulosb %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmuleuh" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULEUH))] + "TARGET_ALTIVEC" + "vmuleuh %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulouh" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULOUH))] + "TARGET_ALTIVEC" + "vmulouh %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulesh" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULESH))] + "TARGET_ALTIVEC" + "vmulesh %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulosh" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULOSH))] + "TARGET_ALTIVEC" + "vmulosh %0,%1,%2" + [(set_attr "type" "veccomplex")]) + + +;; Vector pack/unpack +(define_insn "altivec_vpkpx" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VPKPX))] + "TARGET_ALTIVEC" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpkpx %0,%1,%2\"; + else + return \"vpkpx %0,%2,%1\"; + }" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vpks<VI_char>ss" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_SIGN_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpks<VI_char>ss %0,%1,%2\"; + else + return \"vpks<VI_char>ss %0,%2,%1\"; + }" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vpks<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_SIGN_UNS_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpks<VI_char>us %0,%1,%2\"; + else + return \"vpks<VI_char>us %0,%2,%1\"; + }" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vpku<VI_char>us" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_SAT))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpku<VI_char>us %0,%1,%2\"; + else + return \"vpku<VI_char>us %0,%2,%1\"; + }" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vpku<VI_char>um" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "* + { + if (VECTOR_ELT_ORDER_BIG) + return \"vpku<VI_char>um %0,%1,%2\"; + else + return \"vpku<VI_char>um %0,%2,%1\"; + }" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vpku<VI_char>um_direct" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD_DIRECT))] + "<VI_unit>" + "* + { + if (BYTES_BIG_ENDIAN) + return \"vpku<VI_char>um %0,%1,%2\"; + else + return \"vpku<VI_char>um %0,%2,%1\"; + }" + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vrl<VI_char>" + [(set (match_operand:VI2 0 "register_operand" "=v") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vrl<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vrl<VI_char>mi" + [(set (match_operand:VIlong 0 "register_operand" "=v") + (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0") + (match_operand:VIlong 2 "register_operand" "v") + (match_operand:VIlong 3 "register_operand" "v")] + UNSPEC_VRLMI))] + "TARGET_P9_VECTOR" + "vrl<VI_char>mi %0,%2,%3" + [(set_attr "type" "veclogical")]) + +(define_insn "altivec_vrl<VI_char>nm" + [(set (match_operand:VIlong 0 "register_operand" "=v") + (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v") + (match_operand:VIlong 2 "register_operand" "v")] + UNSPEC_VRLNM))] + "TARGET_P9_VECTOR" + "vrl<VI_char>nm %0,%1,%2" + [(set_attr "type" "veclogical")]) + +(define_insn "altivec_vsl" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSLV4SI))] + "TARGET_ALTIVEC" + "vsl %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vslo" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSLO))] + "TARGET_ALTIVEC" + "vslo %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "vslv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSLV))] + "TARGET_P9_VECTOR" + "vslv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "vsrv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSRV))] + "TARGET_P9_VECTOR" + "vsrv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*altivec_vsl<VI_char>" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashift:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vsl<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*altivec_vsr<VI_char>" + [(set (match_operand:VI2 0 "register_operand" "=v") + (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vsr<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*altivec_vsra<VI_char>" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] + "<VI_unit>" + "vsra<VI_char> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsr" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSR))] + "TARGET_ALTIVEC" + "vsr %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vsro" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSRO))] + "TARGET_ALTIVEC" + "vsro %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vsum4ubs" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUM4UBS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vsum4ubs %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vsum4s<VI_char>s" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUM4S)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vsum4s<VI_char>s %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +;; FIXME: For the following two patterns, the scratch should only be +;; allocated for !VECTOR_ELT_ORDER_BIG, and the instructions should +;; be emitted separately. +(define_insn "altivec_vsum2sws" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUM2SWS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR)) + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vsum2sws %0,%1,%2"; + else + return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4"; +} + [(set_attr "type" "veccomplex") + (set (attr "length") + (if_then_else + (match_test "VECTOR_ELT_ORDER_BIG") + (const_string "4") + (const_string "12")))]) + +(define_insn "altivec_vsumsws" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUMSWS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR)) + (clobber (match_scratch:V4SI 3 "=v"))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vsumsws %0,%1,%2"; + else + return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvsldoi %0,%3,%3,12"; +} + [(set_attr "type" "veccomplex") + (set (attr "length") + (if_then_else + (match_test "(VECTOR_ELT_ORDER_BIG)") + (const_string "4") + (const_string "12")))]) + +(define_insn "altivec_vsumsws_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VSUMSWS_DIRECT)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vsumsws %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_expand "altivec_vspltb" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (15 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (QImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V16QImode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltb_internal" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_duplicate:V16QI + (vec_select:QI (match_operand:V16QI 1 "register_operand" "v") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "")]))))] + "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (15 - INTVAL (operands[2])); + + return "vspltb %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltb_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" + "vspltb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vsplth" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (7 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (HImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V8HImode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vsplth_internal" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (vec_duplicate:V8HI + (vec_select:HI (match_operand:V8HI 1 "register_operand" "v") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "")]))))] + "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (7 - INTVAL (operands[2])); + + return "vsplth %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vsplth_direct" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" + "vsplth %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vspltw" + [(use (match_operand:V4SI 0 "register_operand" "")) + (use (match_operand:V4SI 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (SImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V4SImode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltw_internal" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_duplicate:V4SI + (vec_select:SI (match_operand:V4SI 1 "register_operand" "v") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))] + "TARGET_ALTIVEC" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "vspltw %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltw_direct" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_ALTIVEC" + "vspltw %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_expand "altivec_vspltsf" + [(use (match_operand:V4SF 0 "register_operand" "")) + (use (match_operand:V4SF 1 "register_operand" "")) + (use (match_operand:QI 2 "u5bit_cint_operand" ""))] + "TARGET_ALTIVEC" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. We have to reflect + the actual selected index for the splat in the RTL. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + v = gen_rtvec (1, operands[2]); + x = gen_rtx_VEC_SELECT (SFmode, operands[1], gen_rtx_PARALLEL (VOIDmode, v)); + x = gen_rtx_VEC_DUPLICATE (V4SFmode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_insn "*altivec_vspltsf_internal" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (vec_duplicate:V4SF + (vec_select:SF (match_operand:V4SF 1 "register_operand" "v") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" +{ + /* For true LE, this adjusts the selected index. For LE with + -maltivec=be, this reverses what was done in the define_expand + because the instruction already has big-endian bias. */ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "vspltw %0,%1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vspltis<VI_char>" + [(set (match_operand:VI 0 "register_operand" "=v") + (vec_duplicate:VI + (match_operand:QI 1 "s5bit_cint_operand" "i")))] + "TARGET_ALTIVEC" + "vspltis<VI_char> %0,%1" + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vrfiz" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vrfiz %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_expand "altivec_vperm_<mode>" + [(set (match_operand:VM 0 "register_operand" "") + (unspec:VM [(match_operand:VM 1 "register_operand" "") + (match_operand:VM 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +;; Slightly prefer vperm, since the target does not overlap the source +(define_insn "*altivec_vperm_<mode>_internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_insn "altivec_vperm_v8hiv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=v,?wo") + (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,wo") + (match_operand:V8HI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_expand "altivec_vperm_<mode>_uns" + [(set (match_operand:VM 0 "register_operand" "") + (unspec:VM [(match_operand:VM 1 "register_operand" "") + (match_operand:VM 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] + UNSPEC_VPERM_UNS))] + "TARGET_ALTIVEC" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +(define_insn "*altivec_vperm_<mode>_uns_internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERM_UNS))] + "TARGET_ALTIVEC" + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_expand "vec_permv16qi" + [(set (match_operand:V16QI 0 "register_operand" "") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "") + (match_operand:V16QI 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN) { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) + +(define_expand "vec_perm_constv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:V16QI 2 "register_operand" "") + (match_operand:V16QI 3 "" "")] + "TARGET_ALTIVEC" +{ + if (altivec_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +(define_insn "*altivec_vpermr_<mode>_internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERMR))] + "TARGET_P9_VECTOR" + "@ + vpermr %0,%2,%1,%3 + xxpermr %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_insn "altivec_vrfip" ; ceil + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] + UNSPEC_FRIP))] + "TARGET_ALTIVEC" + "vrfip %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vrfin" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] + UNSPEC_VRFIN))] + "TARGET_ALTIVEC" + "vrfin %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "*altivec_vrfim" ; floor + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] + UNSPEC_FRIM))] + "TARGET_ALTIVEC" + "vrfim %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vcfux" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:QI 2 "immediate_operand" "i")] + UNSPEC_VCFUX))] + "TARGET_ALTIVEC" + "vcfux %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vcfsx" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:QI 2 "immediate_operand" "i")] + UNSPEC_VCFSX))] + "TARGET_ALTIVEC" + "vcfsx %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vctuxs" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:QI 2 "immediate_operand" "i")] + UNSPEC_VCTUXS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vctuxs %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vctsxs" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:QI 2 "immediate_operand" "i")] + UNSPEC_VCTSXS)) + (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] + "TARGET_ALTIVEC" + "vctsxs %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vlogefp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] + UNSPEC_VLOGEFP))] + "TARGET_ALTIVEC" + "vlogefp %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vexptefp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] + UNSPEC_VEXPTEFP))] + "TARGET_ALTIVEC" + "vexptefp %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "*altivec_vrsqrtefp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] + UNSPEC_RSQRT))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vrsqrtefp %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vrefp" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] + UNSPEC_FRES))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vrefp %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_expand "altivec_copysign_v4sf3" + [(use (match_operand:V4SF 0 "register_operand" "")) + (use (match_operand:V4SF 1 "register_operand" "")) + (use (match_operand:V4SF 2 "register_operand" ""))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + " +{ + rtx mask = gen_reg_rtx (V4SImode); + rtvec v = rtvec_alloc (4); + unsigned HOST_WIDE_INT mask_val = ((unsigned HOST_WIDE_INT)1) << 31; + + RTVEC_ELT (v, 0) = GEN_INT (mask_val); + RTVEC_ELT (v, 1) = GEN_INT (mask_val); + RTVEC_ELT (v, 2) = GEN_INT (mask_val); + RTVEC_ELT (v, 3) = GEN_INT (mask_val); + + emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v))); + emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2], + gen_lowpart (V4SFmode, mask))); + DONE; +}") + +(define_insn "altivec_vsldoi_<mode>" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:QI 3 "immediate_operand" "i")] + UNSPEC_VSLDOI))] + "TARGET_ALTIVEC" + "vsldoi %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vupkhs<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN))] + "<VI_unit>" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkhs<VU_char> %0,%1"; + else + return "vupkls<VU_char> %0,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vupkhs<VU_char>_direct" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN_DIRECT))] + "<VI_unit>" + "vupkhs<VU_char> %0,%1" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vupkls<VU_char>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN))] + "<VI_unit>" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkls<VU_char> %0,%1"; + else + return "vupkhs<VU_char> %0,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vupkls<VU_char>_direct" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN_DIRECT))] + "<VI_unit>" + "vupkls<VU_char> %0,%1" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vupkhpx" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + UNSPEC_VUPKHPX))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupkhpx %0,%1"; + else + return "vupklpx %0,%1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vupklpx" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + UNSPEC_VUPKLPX))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + return "vupklpx %0,%1"; + else + return "vupkhpx %0,%1"; +} + [(set_attr "type" "vecperm")]) + +;; Compare vectors producing a vector result and a predicate, setting CR6 to +;; indicate a combined status +(define_insn "*altivec_vcmpequ<VI_char>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:VI2 0 "register_operand" "=v") + (eq:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" + "vcmpequ<VI_char>. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + +(define_insn "*altivec_vcmpgts<VI_char>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:VI2 0 "register_operand" "=v") + (gt:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" + "vcmpgts<VI_char>. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + +(define_insn "*altivec_vcmpgtu<VI_char>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:VI2 0 "register_operand" "=v") + (gtu:VI2 (match_dup 1) + (match_dup 2)))] + "<VI_unit>" + "vcmpgtu<VI_char>. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + +(define_insn "*altivec_vcmpeqfp_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (eq:V4SF (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpeqfp. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vcmpgtfp_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (gt:V4SF (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpgtfp. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vcmpgefp_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(ge:CC (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (ge:V4SF (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpgefp. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "altivec_vcmpbfp_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_VCMPBFP)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_dup 1) + (match_dup 2)] + UNSPEC_VCMPBFP))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" + "vcmpbfp. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "altivec_mtvscr" + [(set (reg:SI VSCR_REGNO) + (unspec_volatile:SI + [(match_operand:V4SI 0 "register_operand" "v")] UNSPECV_MTVSCR))] + "TARGET_ALTIVEC" + "mtvscr %0" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_mfvscr" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec_volatile:V8HI [(reg:SI VSCR_REGNO)] UNSPECV_MFVSCR))] + "TARGET_ALTIVEC" + "mfvscr %0" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_dssall" + [(unspec_volatile [(const_int 0)] UNSPECV_DSSALL)] + "TARGET_ALTIVEC" + "dssall" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_dss" + [(unspec_volatile [(match_operand:QI 0 "immediate_operand" "i")] + UNSPECV_DSS)] + "TARGET_ALTIVEC" + "dss %0" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_dst" + [(unspec [(match_operand 0 "register_operand" "b") + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DST)] + "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode" + "dst %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_dstt" + [(unspec [(match_operand 0 "register_operand" "b") + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTT)] + "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode" + "dstt %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_dstst" + [(unspec [(match_operand 0 "register_operand" "b") + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTST)] + "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode" + "dstst %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "altivec_dststt" + [(unspec [(match_operand 0 "register_operand" "b") + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTSTT)] + "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode" + "dststt %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_expand "altivec_lvsl" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsl_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsl_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] + UNSPEC_LVSL))] + "TARGET_ALTIVEC" + "lvsl %0,%y1" + [(set_attr "type" "vecload")]) + +(define_expand "altivec_lvsr" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsr_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsr_direct" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] + UNSPEC_LVSR))] + "TARGET_ALTIVEC" + "lvsr %0,%y1" + [(set_attr "type" "vecload")]) + +(define_expand "build_vector_mask_for_load" + [(set (match_operand:V16QI 0 "register_operand" "") + (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_LVSR))] + "TARGET_ALTIVEC" + " +{ + rtx addr; + rtx temp; + + gcc_assert (GET_CODE (operands[1]) == MEM); + + addr = XEXP (operands[1], 0); + temp = gen_reg_rtx (GET_MODE (addr)); + emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (GET_MODE (addr), addr))); + emit_insn (gen_altivec_lvsr (operands[0], + replace_equiv_address (operands[1], temp))); + DONE; +}") + +;; Parallel some of the LVE* and STV*'s with unspecs because some have +;; identical rtl but different instructions-- and gcc gets confused. + +(define_expand "altivec_lve<VI_char>x" + [(parallel + [(set (match_operand:VI 0 "register_operand" "=v") + (match_operand:VI 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVE)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVE); + DONE; + } +}) + +(define_insn "*altivec_lve<VI_char>x_internal" + [(parallel + [(set (match_operand:VI 0 "register_operand" "=v") + (match_operand:VI 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVE)])] + "TARGET_ALTIVEC" + "lve<VI_char>x %0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "*altivec_lvesfx" + [(parallel + [(set (match_operand:V4SF 0 "register_operand" "=v") + (match_operand:V4SF 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVE)])] + "TARGET_ALTIVEC" + "lvewx %0,%y1" + [(set_attr "type" "vecload")]) + +(define_expand "altivec_lvxl_<mode>" + [(parallel + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_SET_VSCR)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_SET_VSCR); + DONE; + } +}) + +(define_insn "*altivec_lvxl_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_SET_VSCR)])] + "TARGET_ALTIVEC" + "lvxl %0,%y1" + [(set_attr "type" "vecload")]) + +; This version of lvx is used only in cases where we need to force an lvx +; over any other load, and we don't care about losing CSE opportunities. +; Its primary use is for prologue register saves. +(define_insn "altivec_lvx_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVX)])] + "TARGET_ALTIVEC" + "lvx %0,%y1" + [(set_attr "type" "vecload")]) + +; The next two patterns embody what lvx should usually look like. +(define_insn "altivec_lvx_<mode>_2op" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")) + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_64BIT" + "lvx %0,%1,%2" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvx_<mode>_1op" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r") + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_64BIT" + "lvx %0,0,%1" + [(set_attr "type" "vecload")]) + +; 32-bit versions of the above. +(define_insn "altivec_lvx_<mode>_2op_si" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "register_operand" "r")) + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_32BIT" + "lvx %0,%1,%2" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvx_<mode>_1op_si" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_32BIT" + "lvx %0,0,%1" + [(set_attr "type" "vecload")]) + +; This version of stvx is used only in cases where we need to force an stvx +; over any other store, and we don't care about losing CSE opportunities. +; Its primary use is for epilogue register restores. +(define_insn "altivec_stvx_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVX)])] + "TARGET_ALTIVEC" + "stvx %1,%y0" + [(set_attr "type" "vecstore")]) + +; The next two patterns embody what stvx should usually look like. +(define_insn "altivec_stvx_<mode>_2op" + [(set (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")) + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_64BIT" + "stvx %0,%1,%2" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvx_<mode>_1op" + [(set (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r") + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_64BIT" + "stvx %0,0,%1" + [(set_attr "type" "vecstore")]) + +; 32-bit versions of the above. +(define_insn "altivec_stvx_<mode>_2op_si" + [(set (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "register_operand" "r")) + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_32BIT" + "stvx %0,%1,%2" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvx_<mode>_1op_si" + [(set (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_32BIT" + "stvx %0,0,%1" + [(set_attr "type" "vecstore")]) + +(define_expand "altivec_stvxl_<mode>" + [(parallel + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVXL)])] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVXL); + DONE; + } +}) + +(define_insn "*altivec_stvxl_<mode>_internal" + [(parallel + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVXL)])] + "TARGET_ALTIVEC" + "stvxl %1,%y0" + [(set_attr "type" "vecstore")]) + +(define_expand "altivec_stve<VI_char>x" + [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z") + (unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))] + "TARGET_ALTIVEC" +{ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + altivec_expand_stvex_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVE); + DONE; + } +}) + +(define_insn "*altivec_stve<VI_char>x_internal" + [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z") + (unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))] + "TARGET_ALTIVEC" + "stve<VI_char>x %1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "*altivec_stvesfx" + [(set (match_operand:SF 0 "memory_operand" "=Z") + (unspec:SF [(match_operand:V4SF 1 "register_operand" "v")] UNSPEC_STVE))] + "TARGET_ALTIVEC" + "stvewx %1,%y0" + [(set_attr "type" "vecstore")]) + +;; Generate +;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0 +;; vsubu?m SCRATCH2,SCRATCH1,%1 +;; vmaxs? %0,%1,SCRATCH2" +(define_expand "abs<mode>2" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) + (minus:VI2 (match_dup 2) + (match_operand:VI2 1 "register_operand" "v"))) + (set (match_operand:VI2 0 "register_operand" "=v") + (smax:VI2 (match_dup 1) (match_dup 4)))] + "<VI_unit>" +{ + int i, n_elt = GET_MODE_NUNITS (<MODE>mode); + rtvec v = rtvec_alloc (n_elt); + + /* Create an all 0 constant. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = const0_rtx; + + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); + operands[4] = gen_reg_rtx (<MODE>mode); +}) + +;; Generate +;; vspltisw SCRATCH1,0 +;; vsubu?m SCRATCH2,SCRATCH1,%1 +;; vmins? %0,%1,SCRATCH2" +(define_expand "nabs<mode>2" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) + (minus:VI2 (match_dup 2) + (match_operand:VI2 1 "register_operand" "v"))) + (set (match_operand:VI2 0 "register_operand" "=v") + (smin:VI2 (match_dup 1) (match_dup 4)))] + "<VI_unit>" +{ + int i; + int n_elt = GET_MODE_NUNITS (<MODE>mode); + + rtvec v = rtvec_alloc (n_elt); + + /* Create an all 0 constant. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = const0_rtx; + + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); + operands[4] = gen_reg_rtx (<MODE>mode); +}) + +;; Generate +;; vspltisw SCRATCH1,-1 +;; vslw SCRATCH2,SCRATCH1,SCRATCH1 +;; vandc %0,%1,SCRATCH2 +(define_expand "altivec_absv4sf2" + [(set (match_dup 2) + (vec_duplicate:V4SI (const_int -1))) + (set (match_dup 3) + (ashift:V4SI (match_dup 2) (match_dup 2))) + (set (match_operand:V4SF 0 "register_operand" "=v") + (and:V4SF (not:V4SF (subreg:V4SF (match_dup 3) 0)) + (match_operand:V4SF 1 "register_operand" "v")))] + "TARGET_ALTIVEC" +{ + operands[2] = gen_reg_rtx (V4SImode); + operands[3] = gen_reg_rtx (V4SImode); +}) + +;; Generate +;; vspltis? SCRATCH0,0 +;; vsubs?s SCRATCH2,SCRATCH1,%1 +;; vmaxs? %0,%1,SCRATCH2" +(define_expand "altivec_abss_<mode>" + [(set (match_dup 2) (vec_duplicate:VI (const_int 0))) + (parallel [(set (match_dup 3) + (unspec:VI [(match_dup 2) + (match_operand:VI 1 "register_operand" "v")] + UNSPEC_VSUBS)) + (set (reg:SI VSCR_REGNO) + (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]) + (set (match_operand:VI 0 "register_operand" "=v") + (smax:VI (match_dup 1) (match_dup 3)))] + "TARGET_ALTIVEC" +{ + operands[2] = gen_reg_rtx (GET_MODE (operands[0])); + operands[3] = gen_reg_rtx (GET_MODE (operands[0])); +}) + +(define_expand "reduc_plus_scal_<mode>" + [(set (match_operand:<VI_scalar> 0 "register_operand" "=v") + (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] + UNSPEC_REDUC_PLUS))] + "TARGET_ALTIVEC" +{ + rtx vzero = gen_reg_rtx (V4SImode); + rtx vtmp1 = gen_reg_rtx (V4SImode); + rtx vtmp2 = gen_reg_rtx (<MODE>mode); + rtx dest = gen_lowpart (V4SImode, vtmp2); + int elt = VECTOR_ELT_ORDER_BIG ? GET_MODE_NUNITS (<MODE>mode) - 1 : 0; + + emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); + emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero)); + emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero)); + rs6000_expand_vector_extract (operands[0], vtmp2, GEN_INT (elt)); + DONE; +}) + +(define_insn "*p9_neg<mode>2" + [(set (match_operand:VNEG 0 "altivec_register_operand" "=v") + (neg:VNEG (match_operand:VNEG 1 "altivec_register_operand" "v")))] + "TARGET_P9_VECTOR" + "vneg<VI_char> %0,%1" + [(set_attr "type" "vecsimple")]) + +(define_expand "neg<mode>2" + [(set (match_operand:VI2 0 "register_operand" "") + (neg:VI2 (match_operand:VI2 1 "register_operand" "")))] + "<VI_unit>" +{ + if (!TARGET_P9_VECTOR || (<MODE>mode != V4SImode && <MODE>mode != V2DImode)) + { + rtx vzero; + + vzero = gen_reg_rtx (GET_MODE (operands[0])); + emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); + emit_insn (gen_sub<mode>3 (operands[0], vzero, operands[1])); + DONE; + } +}) + +(define_expand "udot_prod<mode>" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (plus:V4SI (match_operand:V4SI 3 "register_operand" "v") + (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v") + (match_operand:VIshort 2 "register_operand" "v")] + UNSPEC_VMSUMU)))] + "TARGET_ALTIVEC" + " +{ + emit_insn (gen_altivec_vmsumu<VI_char>m (operands[0], operands[1], operands[2], operands[3])); + DONE; +}") + +(define_expand "sdot_prodv8hi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (plus:V4SI (match_operand:V4SI 3 "register_operand" "v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMSUMSHM)))] + "TARGET_ALTIVEC" + " +{ + emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], operands[2], operands[3])); + DONE; +}") + +(define_expand "widen_usum<mode>3" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (plus:V4SI (match_operand:V4SI 2 "register_operand" "v") + (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")] + UNSPEC_VMSUMU)))] + "TARGET_ALTIVEC" + " +{ + rtx vones = gen_reg_rtx (GET_MODE (operands[1])); + + emit_insn (gen_altivec_vspltis<VI_char> (vones, const1_rtx)); + emit_insn (gen_altivec_vmsumu<VI_char>m (operands[0], operands[1], vones, operands[2])); + DONE; +}") + +(define_expand "widen_ssumv16qi3" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (plus:V4SI (match_operand:V4SI 2 "register_operand" "v") + (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VMSUMM)))] + "TARGET_ALTIVEC" + " +{ + rtx vones = gen_reg_rtx (V16QImode); + + emit_insn (gen_altivec_vspltisb (vones, const1_rtx)); + emit_insn (gen_altivec_vmsummbm (operands[0], operands[1], vones, operands[2])); + DONE; +}") + +(define_expand "widen_ssumv8hi3" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (plus:V4SI (match_operand:V4SI 2 "register_operand" "v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + UNSPEC_VMSUMSHM)))] + "TARGET_ALTIVEC" + " +{ + rtx vones = gen_reg_rtx (V8HImode); + + emit_insn (gen_altivec_vspltish (vones, const1_rtx)); + emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], vones, operands[2])); + DONE; +}") + +(define_expand "vec_unpacks_hi_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_HI_SIGN_DIRECT))] + "<VI_unit>" + "") + +(define_expand "vec_unpacks_lo_<VP_small_lc>" + [(set (match_operand:VP 0 "register_operand" "=v") + (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")] + UNSPEC_VUNPACK_LO_SIGN_DIRECT))] + "<VI_unit>" + "") + +(define_insn "vperm_v8hiv4si" + [(set (match_operand:V4SI 0 "register_operand" "=v,?wo") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,wo") + (match_operand:V4SI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERMSI))] + "TARGET_ALTIVEC" + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_insn "vperm_v16qiv8hi" + [(set (match_operand:V8HI 0 "register_operand" "=v,?wo") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,wo") + (match_operand:V8HI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERMHI))] + "TARGET_ALTIVEC" + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + + +(define_expand "vec_unpacku_hi_v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VUPKHUB))] + "TARGET_ALTIVEC" + " +{ + rtx vzero = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (V16QImode); + rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; + + emit_insn (gen_altivec_vspltish (vzero, const0_rtx)); + + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 0 : 16); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 6); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 2 : 16); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 4); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 4 : 16); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 2); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 6 : 16); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 0); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); + + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); + DONE; +}") + +(define_expand "vec_unpacku_hi_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + UNSPEC_VUPKHUH))] + "TARGET_ALTIVEC" + " +{ + rtx vzero = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (V16QImode); + rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; + + emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); + + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 6); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 0 : 17); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 4); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 2 : 17); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 2); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 4 : 17); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 0); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 6 : 17); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); + + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); + DONE; +}") + +(define_expand "vec_unpacku_lo_v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VUPKLUB))] + "TARGET_ALTIVEC" + " +{ + rtx vzero = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (V16QImode); + rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; + + emit_insn (gen_altivec_vspltish (vzero, const0_rtx)); + + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 8 : 16); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 8); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); + + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); + DONE; +}") + +(define_expand "vec_unpacku_lo_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + UNSPEC_VUPKLUH))] + "TARGET_ALTIVEC" + " +{ + rtx vzero = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (V16QImode); + rtvec v = rtvec_alloc (16); + bool be = BYTES_BIG_ENDIAN; + + emit_insn (gen_altivec_vspltisw (vzero, const0_rtx)); + + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14); + RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 8 : 17); + RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16); + RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13); + RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12); + RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17); + RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16); + RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11); + RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10); + RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17); + RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16); + RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9); + RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 8); + RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17); + RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); + + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); + DONE; +}") + +(define_expand "vec_widen_umult_hi_v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULWHUB))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V8HImode); + rtx vo = gen_reg_rtx (V8HImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_widen_umult_lo_v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULWLUB))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V8HImode); + rtx vo = gen_reg_rtx (V8HImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_widen_smult_hi_v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULWHSB))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V8HImode); + rtx vo = gen_reg_rtx (V8HImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_widen_smult_lo_v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VMULWLSB))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V8HImode); + rtx vo = gen_reg_rtx (V8HImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_widen_umult_hi_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULWHUH))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_widen_umult_lo_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULWLUH))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_widen_smult_hi_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULWHSH))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_widen_smult_lo_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")] + UNSPEC_VMULWLSH))] + "TARGET_ALTIVEC" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve)); + } + DONE; +}") + +(define_expand "vec_pack_trunc_<mode>" + [(set (match_operand:<VP_small> 0 "register_operand" "=v") + (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v") + (match_operand:VP 2 "register_operand" "v")] + UNSPEC_VPACK_UNS_UNS_MOD))] + "<VI_unit>" + "") + +(define_expand "mulv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (mult:V16QI (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")))] + "TARGET_ALTIVEC" + " +{ + rtx even = gen_reg_rtx (V8HImode); + rtx odd = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (V16QImode); + rtvec v = rtvec_alloc (16); + int i; + + for (i = 0; i < 8; ++i) { + RTVEC_ELT (v, 2 * i) + = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 1 : 31 - 2 * i); + RTVEC_ELT (v, 2 * i + 1) + = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 17 : 15 - 2 * i); + } + + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2])); + emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask)); + DONE; +}") + +(define_expand "altivec_negv4sf2" + [(use (match_operand:V4SF 0 "register_operand" "")) + (use (match_operand:V4SF 1 "register_operand" ""))] + "TARGET_ALTIVEC" + " +{ + rtx neg0; + + /* Generate [-0.0, -0.0, -0.0, -0.0]. */ + neg0 = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx)); + emit_insn (gen_vashlv4si3 (neg0, neg0, neg0)); + + /* XOR */ + emit_insn (gen_xorv4sf3 (operands[0], + gen_lowpart (V4SFmode, neg0), operands[1])); + + DONE; +}") + +;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL, +;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell. +(define_insn "altivec_lvlx" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] + UNSPEC_LVLX))] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "lvlx %0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvlxl" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] + UNSPEC_LVLXL))] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "lvlxl %0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvrx" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] + UNSPEC_LVRX))] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "lvrx %0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvrxl" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "Z")] + UNSPEC_LVRXL))] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "lvrxl %0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_stvlx" + [(parallel + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVLX)])] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "stvlx %1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvlxl" + [(parallel + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVLXL)])] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "stvlxl %1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvrx" + [(parallel + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVRX)])] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "stvrx %1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvrxl" + [(parallel + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "register_operand" "v")) + (unspec [(const_int 0)] UNSPEC_STVRXL)])] + "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL" + "stvrxl %1,%y0" + [(set_attr "type" "vecstore")]) + +(define_expand "vec_unpacks_float_hi_v8hi" + [(set (match_operand:V4SF 0 "register_operand" "") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")] + UNSPEC_VUPKHS_V4SF))] + "TARGET_ALTIVEC" + " +{ + rtx tmp = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1])); + emit_insn (gen_altivec_vcfsx (operands[0], tmp, const0_rtx)); + DONE; +}") + +(define_expand "vec_unpacks_float_lo_v8hi" + [(set (match_operand:V4SF 0 "register_operand" "") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")] + UNSPEC_VUPKLS_V4SF))] + "TARGET_ALTIVEC" + " +{ + rtx tmp = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1])); + emit_insn (gen_altivec_vcfsx (operands[0], tmp, const0_rtx)); + DONE; +}") + +(define_expand "vec_unpacku_float_hi_v8hi" + [(set (match_operand:V4SF 0 "register_operand" "") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")] + UNSPEC_VUPKHU_V4SF))] + "TARGET_ALTIVEC" + " +{ + rtx tmp = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1])); + emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); + DONE; +}") + +(define_expand "vec_unpacku_float_lo_v8hi" + [(set (match_operand:V4SF 0 "register_operand" "") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")] + UNSPEC_VUPKLU_V4SF))] + "TARGET_ALTIVEC" + " +{ + rtx tmp = gen_reg_rtx (V4SImode); + + emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1])); + emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx)); + DONE; +}") + + +;; Power8/power9 vector instructions encoded as Altivec instructions + +;; Vector count leading zeros +(define_insn "*p8v_clz<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vclz<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector absolute difference unsigned +(define_expand "vadu<mode>3" + [(set (match_operand:VI 0 "register_operand") + (unspec:VI [(match_operand:VI 1 "register_operand") + (match_operand:VI 2 "register_operand")] + UNSPEC_VADU))] + "TARGET_P9_VECTOR") + +;; Vector absolute difference unsigned +(define_insn "*p9_vadu<mode>3" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VADU))] + "TARGET_P9_VECTOR" + "vabsdu<wd> %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector count trailing zeros +(define_insn "*p9v_ctz<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ctz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P9_VECTOR" + "vctz<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector population count +(define_insn "*p8v_popcount<mode>2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P8_VECTOR" + "vpopcnt<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector parity +(define_insn "*p9v_parity<mode>2" + [(set (match_operand:VParity 0 "register_operand" "=v") + (parity:VParity (match_operand:VParity 1 "register_operand" "v")))] + "TARGET_P9_VECTOR" + "vprtyb<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Vector Gather Bits by Bytes by Doubleword +(define_insn "p8v_vgbbd" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] + UNSPEC_VGBBD))] + "TARGET_P8_VECTOR" + "vgbbd %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + + +;; 128-bit binary integer arithmetic +;; We have a special container type (V1TImode) to allow operations using the +;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without +;; having to worry about the register allocator deciding GPRs are better. + +(define_insn "altivec_vadduqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (plus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vadduqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VADDCUQ))] + "TARGET_VADDUQM" + "vaddcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (minus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vsubuqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VSUBCUQ))] + "TARGET_VADDUQM" + "vsubcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDEUQM))] + "TARGET_VADDUQM" + "vaddeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDECUQ))] + "TARGET_VADDUQM" + "vaddecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBEUQM))] + "TARGET_VADDUQM" + "vsubeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBECUQ))] + "TARGET_VADDUQM" + "vsubecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; We use V2DI as the output type to simplify converting the permute +;; bits into an integer +(define_insn "altivec_vbpermq" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMQ))] + "TARGET_P8_VECTOR" + "vbpermq %0,%1,%2" + [(set_attr "type" "vecperm")]) + +; One of the vector API interfaces requires returning vector unsigned char. +(define_insn "altivec_vbpermq2" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMQ))] + "TARGET_P8_VECTOR" + "vbpermq %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vbpermd" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMD))] + "TARGET_P9_VECTOR" + "vbpermd %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Decimal Integer operations +(define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) + +(define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") + (UNSPEC_BCDSUB "sub")]) + +(define_code_iterator BCD_TEST [eq lt gt unordered]) + +(define_insn "bcd<bcd_add_sub>" + [(set (match_operand:V1TI 0 "gpc_reg_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "gpc_reg_operand" "v") + (match_operand:V1TI 2 "gpc_reg_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "n")] + UNSPEC_BCD_ADD_SUB)) + (clobber (reg:CCFP CR6_REGNO))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +;; Use a floating point type (V2DFmode) for the compare to set CR6 so that we +;; can use the unordered test for BCD nans and add/subtracts that overflow. An +;; UNORDERED test on an integer type (like V1TImode) is not defined. The type +;; probably should be one that can go in the VMX (Altivec) registers, so we +;; can't use DDmode or DFmode. +(define_insn "*bcd<bcd_add_sub>_test" + [(set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "i")] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" "j"))) + (clobber (match_scratch:V1TI 0 "=v"))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "*bcd<bcd_add_sub>_test2" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:QI 3 "const_0_to_1_operand" "i")] + UNSPEC_BCD_ADD_SUB)) + (set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" "j")))] + "TARGET_P8_VECTOR" + "bcd<bcd_add_sub>. %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "darn_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_DARN_32))] + "TARGET_P9_MISC" + "darn %0,0" + [(set_attr "type" "integer")]) + +(define_insn "darn_raw" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,2" + [(set_attr "type" "integer")]) + +(define_insn "darn" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_DARN))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,1" + [(set_attr "type" "integer")]) + +;; Test byte within range. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as xx:xx:hi:lo. +;; +;; Return in target register operand 0 a value of 1 if lo <= vv and +;; vv <= hi. Otherwise, set register operand 0 to 0. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation only operates on +;; SI-mode operands as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmprb" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as xx:xx:hi:lo. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if +;; lo <= vv and vv <= hi. Otherwise, set the GT bit to 0. The other +;; 3 bits of the target CR register are all set to 0. +(define_insn "*cmprb_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB))] + "TARGET_P9_MISC" + "cmprb %0,0,%1,%2" + [(set_attr "type" "logical")]) + +;; Set operand 0 register to -1 if the LT bit (0x8) of condition +;; register operand 1 is on. Otherwise, set operand 0 register to 1 +;; if the GT bit (0x4) of condition register operand 1 is on. +;; Otherwise, set operand 0 to 0. Note that the result stored into +;; register operand 0 is non-zero iff either the LT or GT bits are on +;; within condition register operand 1. +(define_insn "setb_signed" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y") + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + +(define_insn "setb_unsigned" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y") + (const_int 0)) + (const_int -1) + (if_then_else (gtu (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + +;; Test byte within two ranges. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. +;; +;; Return in target register operand 0 a value of 1 if (lo_1 <= vv and +;; vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). Otherwise, set register +;; operand 0 to 0. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation only operates on +;; SI-mode operands as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmprb2" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the ranges specified by operand 2. +;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if +;; (lo_1 <= vv and vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). +;; Otherwise, set the GT bit to 0. The other 3 bits of the target +;; CR register are all set to 0. +(define_insn "*cmprb2_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2))] + "TARGET_P9_MISC" + "cmprb %0,1,%1,%2" + [(set_attr "type" "logical")]) + +;; Test byte membership within set of 8 bytes. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the set specified by operand 2. +;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. +;; +;; Return in target register operand 0 a value of 1 if vv equals one +;; of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, set +;; register operand 0 to 0. Note that the 8 byte values held within +;; operand 2 need not be unique. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation requires that operands +;; 0 and 1 have mode SI as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmpeqb" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the set specified by operand 2. +;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if vv +;; equals one of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, +;; set the GT bit to zero. The other 3 bits of the target CR register +;; are all set to 0. +(define_insn "*cmpeqb_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB))] + "TARGET_P9_MISC && TARGET_64BIT" + "cmpeqb %0,%1,%2" + [(set_attr "type" "logical")]) + +(define_expand "bcd<bcd_add_sub>_<code>" + [(parallel [(set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "") + (match_operand:V1TI 2 "register_operand" "") + (match_operand:QI 3 "const_0_to_1_operand" "")] + UNSPEC_BCD_ADD_SUB) + (match_dup 4))) + (clobber (match_scratch:V1TI 5 ""))]) + (set (match_operand:SI 0 "register_operand" "") + (BCD_TEST:SI (reg:CCFP CR6_REGNO) + (const_int 0)))] + "TARGET_P8_VECTOR" +{ + operands[4] = CONST0_RTX (V2DFmode); +}) + +;; Peephole2 pattern to combine a bcdadd/bcdsub that calculates the value and +;; the bcdadd/bcdsub that tests the value. The combiner won't work since +;; CR6 is a hard coded register. Unfortunately, all of the Altivec predicate +;; support is hard coded to use the fixed register CR6 instead of creating +;; a register class for CR6. + +(define_peephole2 + [(parallel [(set (match_operand:V1TI 0 "register_operand" "") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "") + (match_operand:V1TI 2 "register_operand" "") + (match_operand:QI 3 "const_0_to_1_operand" "")] + UNSPEC_BCD_ADD_SUB)) + (clobber (reg:CCFP CR6_REGNO))]) + (parallel [(set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_operand:V2DF 4 "zero_constant" ""))) + (clobber (match_operand:V1TI 5 "register_operand" ""))])] + "TARGET_P8_VECTOR" + [(parallel [(set (match_dup 0) + (unspec:V1TI [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB)) + (set (reg:CCFP CR6_REGNO) + (compare:CCFP + (unspec:V2DF [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_BCD_ADD_SUB) + (match_dup 4)))])]) diff --git a/gcc/config/powerpcspe/biarch64.h b/gcc/config/powerpcspe/biarch64.h new file mode 100644 index 000000000000..8a11ca8e17d3 --- /dev/null +++ b/gcc/config/powerpcspe/biarch64.h @@ -0,0 +1,26 @@ +/* Definitions of target machine for GNU compiler, for 32/64 bit powerpc. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Specify this in a cover file to provide bi-architecture (32/64) support. */ +#define RS6000_BI_ARCH 1 diff --git a/gcc/config/powerpcspe/bmi2intrin.h b/gcc/config/powerpcspe/bmi2intrin.h new file mode 100644 index 000000000000..fc634c1ed607 --- /dev/null +++ b/gcc/config/powerpcspe/bmi2intrin.h @@ -0,0 +1,169 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ + +#if !defined _X86INTRIN_H_INCLUDED +# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _BMI2INTRIN_H_INCLUDED +#define _BMI2INTRIN_H_INCLUDED + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bzhi_u32 (unsigned int __X, unsigned int __Y) +{ + return ((__X << (32 - __Y)) >> (32 - __Y)); +} + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) +{ + unsigned long long __res = (unsigned long long) __X * __Y; + *__P = (unsigned int) (__res >> 32); + return (unsigned int) __res; +} + +#ifdef __PPC64__ +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bzhi_u64 (unsigned long long __X, unsigned long long __Y) +{ + return ((__X << (64 - __Y)) >> (64 - __Y)); +} + +/* __int128 requires base 64-bit. */ +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mulx_u64 (unsigned long long __X, unsigned long long __Y, + unsigned long long *__P) +{ + unsigned __int128 __res = (unsigned __int128) __X * __Y; + *__P = (unsigned long long) (__res >> 64); + return (unsigned long long) __res; +} + +#ifdef _ARCH_PWR7 +/* popcount and bpermd require power7 minimum. */ +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pdep_u64 (unsigned long long __X, unsigned long long __M) +{ + unsigned long result = 0x0UL; + const unsigned long mask = 0x8000000000000000UL; + unsigned long m = __M; + unsigned long c, t; + unsigned long p; + + /* The pop-count of the mask gives the number of the bits from + source to process. This is also needed to shift bits from the + source into the correct position for the result. */ + p = 64 - __builtin_popcountl (__M); + + /* The loop is for the number of '1' bits in the mask and clearing + each mask bit as it is processed. */ + while (m != 0) + { + c = __builtin_clzl (m); + t = __X << (p - c); + m ^= (mask >> c); + result |= (t & (mask >> c)); + p++; + } + return (result); +} + +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pext_u64 (unsigned long long __X, unsigned long long __M) +{ + unsigned long p = 0x4040404040404040UL; // initial bit permute control + const unsigned long mask = 0x8000000000000000UL; + unsigned long m = __M; + unsigned long c; + unsigned long result; + + /* if the mask is constant and selects 8 bits or less we can use + the Power8 Bit permute instruction. */ + if (__builtin_constant_p (__M) && (__builtin_popcountl (__M) <= 8)) + { + /* Also if the pext mask is constant, then the popcount is + constant, we can evaluate the following loop at compile + time and use a constant bit permute vector. */ + for (long i = 0; i < __builtin_popcountl (__M); i++) + { + c = __builtin_clzl (m); + p = (p << 8) | c; + m ^= (mask >> c); + } + result = __builtin_bpermd (p, __X); + } + else + { + p = 64 - __builtin_popcountl (__M); + result = 0; + /* We could a use a for loop here, but that combined with + -funroll-loops can expand to a lot of code. The while + loop avoids unrolling and the compiler commons the xor + from clearing the mask bit with the (m != 0) test. The + result is a more compact loop setup and body. */ + while (m != 0) + { + unsigned long t; + c = __builtin_clzl (m); + t = (__X & (mask >> c)) >> (p - c); + m ^= (mask >> c); + result |= (t); + p++; + } + } + return (result); +} + +/* these 32-bit implementations depend on 64-bit pdep/pext + which depend on _ARCH_PWR7. */ +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pdep_u32 (unsigned int __X, unsigned int __Y) +{ + return _pdep_u64 (__X, __Y); +} + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pext_u32 (unsigned int __X, unsigned int __Y) +{ + return _pext_u64 (__X, __Y); +} +#endif /* _ARCH_PWR7 */ +#endif /* __PPC64__ */ + +#endif /* _BMI2INTRIN_H_INCLUDED */ diff --git a/gcc/config/powerpcspe/bmiintrin.h b/gcc/config/powerpcspe/bmiintrin.h new file mode 100644 index 000000000000..28671ca0d327 --- /dev/null +++ b/gcc/config/powerpcspe/bmiintrin.h @@ -0,0 +1,187 @@ +/* Copyright (C) 2010-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ + +#if !defined _X86INTRIN_H_INCLUDED +# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _BMIINTRIN_H_INCLUDED +#define _BMIINTRIN_H_INCLUDED + +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u16 (unsigned short __X) +{ + return __builtin_ctz (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__andn_u32 (unsigned int __X, unsigned int __Y) +{ + return (~__X & __Y); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bextr_u32 (unsigned int __X, unsigned int __P, unsigned int __L) +{ + return ((__X << (32 - (__L + __P))) >> (32 - __L)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextr_u32 (unsigned int __X, unsigned int __Y) +{ + unsigned int __P, __L; + __P = __Y & 0xFF; + __L = (__Y >> 8) & 0xFF; + return (_bextr_u32 (__X, __P, __L)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsi_u32 (unsigned int __X) +{ + return (__X & -__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u32 (unsigned int __X) +{ + return __blsi_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsmsk_u32 (unsigned int __X) +{ + return (__X ^ (__X - 1)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u32 (unsigned int __X) +{ + return __blsmsk_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsr_u32 (unsigned int __X) +{ + return (__X & (__X - 1)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u32 (unsigned int __X) +{ + return __blsr_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u32 (unsigned int __X) +{ + return __builtin_ctz (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u32 (unsigned int __X) +{ + return __builtin_ctz (__X); +} + +/* use the 64-bit shift, rotate, and count leading zeros instructions + for long long. */ +#ifdef __PPC64__ +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__andn_u64 (unsigned long long __X, unsigned long long __Y) +{ + return (~__X & __Y); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bextr_u64 (unsigned long long __X, unsigned int __P, unsigned int __L) +{ + return ((__X << (64 - (__L + __P))) >> (64 - __L)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextr_u64 (unsigned long long __X, unsigned long long __Y) +{ + unsigned int __P, __L; + __P = __Y & 0xFF; + __L = (__Y & 0xFF00) >> 8; + return (_bextr_u64 (__X, __P, __L)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsi_u64 (unsigned long long __X) +{ + return __X & -__X; +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u64 (unsigned long long __X) +{ + return __blsi_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsmsk_u64 (unsigned long long __X) +{ + return (__X ^ (__X - 1)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u64 (unsigned long long __X) +{ + return __blsmsk_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsr_u64 (unsigned long long __X) +{ + return (__X & (__X - 1)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u64 (unsigned long long __X) +{ + return __blsr_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u64 (unsigned long long __X) +{ + return __builtin_ctzll (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u64 (unsigned long long __X) +{ + return __builtin_ctzll (__X); +} +#endif /* __PPC64__ */ + +#endif /* _BMIINTRIN_H_INCLUDED */ diff --git a/gcc/config/powerpcspe/cell.md b/gcc/config/powerpcspe/cell.md new file mode 100644 index 000000000000..a92e82acecf9 --- /dev/null +++ b/gcc/config/powerpcspe/cell.md @@ -0,0 +1,423 @@ +;; Scheduling description for cell processor. +;; Copyright (C) 2001-2017 Free Software Foundation, Inc. +;; Contributed by Sony Computer Entertainment, Inc., + + +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Sources: BE BOOK4 (/sfs/enc/doc/PPU_BookIV_DD3.0_latest.pdf) + +;; BE Architecture *DD3.0 and DD3.1* +;; This file simulate PPU processor unit backend of pipeline, maualP24. +;; manual P27, stall and flush points +;; IU, XU, VSU, dispatcher decodes and dispatch 2 insns per cycle in program +;; order, the grouped address are aligned by 8 +;; This file only simulate one thread situation +;; XU executes all fixed point insns(3 units, a simple alu, a complex unit, +;; and load/store unit) +;; VSU executes all scalar floating points insn(a float unit), +;; VMX insns(VMX unit, 4 sub units, simple, permute, complex, floating point) + +;; Dual issue combination + +;; FXU LSU BR VMX VMX +;; (sx,cx,vsu_fp,fp_arith) (perm,vsu_ls,fp_ls) +;;FXU X +;;LSU X X X +;;BR X +;;VMX(sx,cx,vsu_fp,fp_arth) X +;;VMX(perm,vsu_ls, fp_ls) X +;; X are illegal combination. + +;; Dual issue exceptions: +;;(1) nop-pipelined FXU instr in slot 0 +;;(2) non-pipelined FPU inst in slot 0 +;; CSI instr(contex-synchronizing insn) +;; Microcode insn + +;; BRU unit: bru(none register stall), bru_cr(cr register stall) +;; VSU unit: vus(vmx simple), vup(vmx permute), vuc(vmx complex), +;; vuf(vmx float), fpu(floats). fpu_div is hypothetical, it is for +;; nonpipelined simulation +;; micr insns will stall at least 7 cycles to get the first instr from ROM, +;; micro instructions are not dual issued. + +;; slot0 is older than slot1 +;; non-pipelined insn need to be in slot1 to avoid 1cycle stall + +;; There different stall point +;; IB2, only stall one thread if stall here, so try to stall here as much as +;; we can +;; condition(1) insert nop, OR and ORI instruction form +;; condition(2) flush happens, in case of: RAW, WAW, D-ERAT miss, or +;; CR0-access while stdcx, or stwcx +;; IS2 stall ;; Page91 for details +;; VQ8 stall +;; IS2 stall can be activated by VQ8 stall and trying to issue a vsu instr to +;; the vsu issue queue + +;;(define_automaton "cellxu") + +;;(define_cpu_unit "fxu_cell,lsu_cell,bru_cell,vsu1_cell,vsu2_cell" "cellxu") + +;; ndfa +(define_automaton "cellxu,cellvsu,cellbru,cell_mis") + +(define_cpu_unit "fxu_cell,lsu_cell" "cellxu") +(define_cpu_unit "bru_cell" "cellbru") +(define_cpu_unit "vsu1_cell,vsu2_cell" "cellvsu") + +(define_cpu_unit "slot0,slot1" "cell_mis") + +(absence_set "slot0" "slot1") + +(define_reservation "nonpipeline" "fxu_cell+lsu_cell+vsu1_cell+vsu2_cell") +(define_reservation "slot01" "slot0|slot1") + + +;; Load/store +;; lmw, lswi, lswx are only generated for optimize for space, MC, +;; these instr are not simulated +(define_insn_reservation "cell-load" 2 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "cell")) + "slot01,lsu_cell") + +;; ldux, ldu, lbzux, lbzu, hardware breaks it down to two instrs, +;; if with 32bytes alignment, CMC +(define_insn_reservation "cell-load-ux" 2 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "cpu" "cell")) + "slot01,fxu_cell+lsu_cell") + +;; lha, lhax, lhau, lhaux, lwa, lwax, lwaux, MC, latency unknown +;; 11/7, 11/8, 11/12 +(define_insn_reservation "cell-load-ext" 2 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "cpu" "cell")) + "slot01,fxu_cell+lsu_cell") + +;;lfs,lfsx,lfd,lfdx, 1 cycle +(define_insn_reservation "cell-fpload" 1 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "cpu" "cell")) + "vsu2_cell+lsu_cell+slot01") + +;; lfsu,lfsux,lfdu,lfdux 1cycle(fpr) 2 cycle(gpr) +(define_insn_reservation "cell-fpload-update" 1 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "cpu" "cell")) + "fxu_cell+vsu2_cell+lsu_cell+slot01") + +(define_insn_reservation "cell-vecload" 2 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "cell")) + "slot01,vsu2_cell+lsu_cell") + +;;st? stw(MC) +(define_insn_reservation "cell-store" 1 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "cpu" "cell")) + "lsu_cell+slot01") + +;;stdux, stdu, (hardware breaks into store and add) 2 for update reg +(define_insn_reservation "cell-store-update" 1 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "cpu" "cell")) + "fxu_cell+lsu_cell+slot01") + +(define_insn_reservation "cell-fpstore" 1 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "cell")) + "vsu2_cell+lsu_cell+slot01") + +(define_insn_reservation "cell-fpstore-update" 1 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "cell")) + "vsu2_cell+fxu_cell+lsu_cell+slot01") + +(define_insn_reservation "cell-vecstore" 1 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "cell")) + "vsu2_cell+lsu_cell+slot01") + +;; Integer latency is 2 cycles +(define_insn_reservation "cell-integer" 2 + (and (ior (eq_attr "type" "integer,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no")) + (and (eq_attr "type" "insert") + (eq_attr "size" "64"))) + (eq_attr "cpu" "cell")) + "slot01,fxu_cell") + +;; Two integer latency is 4 cycles +(define_insn_reservation "cell-two" 4 + (and (eq_attr "type" "two") + (eq_attr "cpu" "cell")) + "slot01,fxu_cell,fxu_cell*2") + +;; Three integer latency is 6 cycles +(define_insn_reservation "cell-three" 6 + (and (eq_attr "type" "three") + (eq_attr "cpu" "cell")) + "slot01,fxu_cell,fxu_cell*4") + +;; rlwimi, alter cr0 +(define_insn_reservation "cell-insert" 2 + (and (eq_attr "type" "insert") + (eq_attr "size" "32") + (eq_attr "cpu" "cell")) + "slot01,fxu_cell") + +;; cmpi, cmpli, cmpla, add, addo, sub, subo, alter cr0 +(define_insn_reservation "cell-cmp" 1 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "cell")) + "fxu_cell+slot01") + +;; add, addo, sub, subo, alter cr0, rldcli, rlwinm +(define_insn_reservation "cell-fast-cmp" 2 + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes") + (eq_attr "cpu" "cell") + (eq_attr "cell_micro" "not")) + "slot01,fxu_cell") + +(define_insn_reservation "cell-cmp-microcoded" 9 + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes") + (eq_attr "cpu" "cell") + (eq_attr "cell_micro" "always")) + "slot0+slot1,fxu_cell,fxu_cell*7") + +;; mulld +(define_insn_reservation "cell-lmul" 15 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "64") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*13") + +;; mulld. is microcoded +(define_insn_reservation "cell-lmul-cmp" 22 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "size" "64") + (eq_attr "cpu" "cell")) + "slot0+slot1,nonpipeline,nonpipeline*20") + +;; mulli, 6 cycles +(define_insn_reservation "cell-imul23" 6 + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*4") + +;; mullw, 9 +(define_insn_reservation "cell-imul" 9 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*7") + +;; divide +(define_insn_reservation "cell-idiv" 32 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*30") + +(define_insn_reservation "cell-ldiv" 64 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*62") + +;;mflr and mfctr are pipelined +(define_insn_reservation "cell-mfjmpr" 1 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "cell")) + "slot01+bru_cell") + +;;mtlr and mtctr, +;;mtspr fully pipelined +(define_insn_reservation "cell-mtjmpr" 1 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "cell")) + "bru_cell+slot01") + +;; Branches +;; b, ba, bl, bla, unconditional branch always predicts correctly n/a latency +;; bcctr, bcctrl, latency 2, actually adjust by be to 4 +(define_insn_reservation "cell-branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "cpu" "cell")) + "bru_cell+slot1") + +(define_insn_reservation "cell-branchreg" 1 + (and (eq_attr "type" "jmpreg") + (eq_attr "cpu" "cell")) + "bru_cell+slot1") + +;; cr hazard +;; page 90, special cases for CR hazard, only one instr can access cr per cycle +;; if insn reads CR following a stwcx, pipeline stall till stwcx finish +(define_insn_reservation "cell-crlogical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "cell")) + "bru_cell+slot01") + +;; mfcrf and mfcr is about 34 cycles and nonpipelined +(define_insn_reservation "cell-mfcr" 34 + (and (eq_attr "type" "mfcrf,mfcr") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*32") + +;; mtcrf (1 field) +(define_insn_reservation "cell-mtcrf" 1 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "cell")) + "fxu_cell+slot01") + +; Basic FP latency is 10 cycles, thoughput is 1/cycle +(define_insn_reservation "cell-fp" 10 + (and (eq_attr "type" "fp,fpsimple,dmul") + (eq_attr "cpu" "cell")) + "slot01,vsu1_cell,vsu1_cell*8") + +(define_insn_reservation "cell-fpcompare" 1 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "cell")) + "vsu1_cell+slot01") + +;; sdiv thoughput 1/74, not pipelined but only in the FPU +(define_insn_reservation "cell-sdiv" 74 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*72") + +;; fsqrt thoughput 1/84, not pipelined but only in the FPU +(define_insn_reservation "cell-sqrt" 84 + (and (eq_attr "type" "ssqrt,dsqrt") + (eq_attr "cpu" "cell")) + "slot1,nonpipeline,nonpipeline*82") + +; VMX +(define_insn_reservation "cell-vecsimple" 4 + (and (eq_attr "type" "vecsimple,veclogical,vecmove") + (eq_attr "cpu" "cell")) + "slot01,vsu1_cell,vsu1_cell*2") + +;; mult, div, madd +(define_insn_reservation "cell-veccomplex" 10 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "cell")) + "slot01,vsu1_cell,vsu1_cell*8") + +;; TODO: add support for recording instructions +(define_insn_reservation "cell-veccmp" 4 + (and (eq_attr "type" "veccmp,veccmpfx") + (eq_attr "cpu" "cell")) + "slot01,vsu1_cell,vsu1_cell*2") + +(define_insn_reservation "cell-vecfloat" 12 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "cell")) + "slot01,vsu1_cell,vsu1_cell*10") + +(define_insn_reservation "cell-vecperm" 4 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "cell")) + "slot01,vsu2_cell,vsu2_cell*2") + +;; New for 4.2, syncs + +(define_insn_reservation "cell-sync" 11 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "cell")) + "slot01,lsu_cell,lsu_cell*9") + +(define_insn_reservation "cell-isync" 11 + (and (eq_attr "type" "isync") + (eq_attr "cpu" "cell")) + "slot01,lsu_cell,lsu_cell*9") + +(define_insn_reservation "cell-load_l" 11 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "cell")) + "slot01,lsu_cell,lsu_cell*9") + +(define_insn_reservation "cell-store_c" 11 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "cell")) + "slot01,lsu_cell,lsu_cell*9") + +;; RAW register dependency + +;; addi r3, r3, 1 +;; lw r4,offset(r3) +;; there are 5 cycle deplay for r3 bypassing +;; there are 5 cycle delay for a dependent load after a load +(define_bypass 5 "cell-integer" "cell-load") +(define_bypass 5 "cell-integer" "cell-load-ext") +(define_bypass 5 "cell-load,cell-load-ext" "cell-load,cell-load-ext") + +;; there is a 6 cycle delay after a fp compare until you can use the cr. +(define_bypass 6 "cell-fpcompare" "cell-branch,cell-branchreg,cell-mfcr,cell-crlogical") + +;; VXU float RAW +(define_bypass 11 "cell-vecfloat" "cell-vecfloat") + +;; VXU and FPU +(define_bypass 6 "cell-veccomplex" "cell-vecsimple") +;;(define_bypass 6 "cell-veccompare" "cell-branch,cell-branchreg") +(define_bypass 3 "cell-vecfloat" "cell-veccomplex") +; this is not correct, +;; this is a stall in general and not dependent on result +(define_bypass 13 "cell-vecstore" "cell-fpstore") +; this is not correct, this can never be true, not dependent on result +(define_bypass 7 "cell-fp" "cell-fpload") +;; vsu1 should avoid writing to the same target register as vsu2 insn +;; within 12 cycles. + +;; WAW hazard + +;; the target of VSU estimate should not be reused within 10 dispatch groups +;; the target of VSU float should not be reused within 8 dispatch groups +;; the target of VSU complex should not be reused within 5 dispatch groups +;; FP LOAD should not reuse an FPU Arithmetic target with 6 dispatch gropus + +;; mtctr-bcctr/bcctrl, branch target ctr register shadow update at +;; ex4 stage(10 cycles) +(define_bypass 10 "cell-mtjmpr" "cell-branchreg") + +;;Things are not simulated: +;; update instruction, update address gpr are not simulated +;; vrefp, vrsqrtefp have latency(14), currently simulated as 12 cycle float +;; insns + diff --git a/gcc/config/powerpcspe/constraints.md b/gcc/config/powerpcspe/constraints.md new file mode 100644 index 000000000000..44f45d8b6761 --- /dev/null +++ b/gcc/config/powerpcspe/constraints.md @@ -0,0 +1,323 @@ +;; Constraint definitions for RS6000 +;; Copyright (C) 2006-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Available constraint letters: e k q t u A B C D S T + +;; Register constraints + +(define_register_constraint "f" "rs6000_constraints[RS6000_CONSTRAINT_f]" + "@internal") + +(define_register_constraint "d" "rs6000_constraints[RS6000_CONSTRAINT_d]" + "@internal") + +(define_register_constraint "b" "BASE_REGS" + "@internal") + +(define_register_constraint "h" "SPECIAL_REGS" + "@internal") + +(define_register_constraint "c" "CTR_REGS" + "@internal") + +(define_register_constraint "l" "LINK_REGS" + "@internal") + +(define_register_constraint "v" "ALTIVEC_REGS" + "@internal") + +(define_register_constraint "x" "CR0_REGS" + "@internal") + +(define_register_constraint "y" "CR_REGS" + "@internal") + +(define_register_constraint "z" "CA_REGS" + "@internal") + +;; Use w as a prefix to add VSX modes +;; any VSX register +(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]" + "Any VSX register if the -mvsx option was used or NO_REGS.") + +(define_register_constraint "wb" "rs6000_constraints[RS6000_CONSTRAINT_wb]" + "Altivec register if the -mpower9-dform option was used or NO_REGS.") + +;; NOTE: For compatibility, "wc" is reserved to represent individual CR bits. +;; It is currently used for that purpose in LLVM. + +(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]" + "VSX vector register to hold vector double data or NO_REGS.") + +(define_register_constraint "we" "rs6000_constraints[RS6000_CONSTRAINT_we]" + "VSX register if the -mpower9-vector -m64 options were used or NO_REGS.") + +(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]" + "VSX vector register to hold vector float data or NO_REGS.") + +(define_register_constraint "wg" "rs6000_constraints[RS6000_CONSTRAINT_wg]" + "If -mmfpgpr was used, a floating point register or NO_REGS.") + +(define_register_constraint "wh" "rs6000_constraints[RS6000_CONSTRAINT_wh]" + "Floating point register if direct moves are available, or NO_REGS.") + +(define_register_constraint "wi" "rs6000_constraints[RS6000_CONSTRAINT_wi]" + "FP or VSX register to hold 64-bit integers for VSX insns or NO_REGS.") + +(define_register_constraint "wj" "rs6000_constraints[RS6000_CONSTRAINT_wj]" + "FP or VSX register to hold 64-bit integers for direct moves or NO_REGS.") + +(define_register_constraint "wk" "rs6000_constraints[RS6000_CONSTRAINT_wk]" + "FP or VSX register to hold 64-bit doubles for direct moves or NO_REGS.") + +(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]" + "Floating point register if the LFIWAX instruction is enabled or NO_REGS.") + +(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]" + "VSX register if direct move instructions are enabled, or NO_REGS.") + +;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use +;; direct move directly, and movsf can't to move between the register sets. +;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode +(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).") + +(define_register_constraint "wo" "rs6000_constraints[RS6000_CONSTRAINT_wo]" + "VSX register if the -mpower9-vector option was used or NO_REGS.") + +(define_register_constraint "wp" "rs6000_constraints[RS6000_CONSTRAINT_wp]" + "VSX register to use for IEEE 128-bit fp TFmode, or NO_REGS.") + +(define_register_constraint "wq" "rs6000_constraints[RS6000_CONSTRAINT_wq]" + "VSX register to use for IEEE 128-bit fp KFmode, or NO_REGS.") + +(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]" + "General purpose register if 64-bit instructions are enabled or NO_REGS.") + +(define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]" + "VSX vector register to hold scalar double values or NO_REGS.") + +(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]" + "VSX vector register to hold 128 bit integer or NO_REGS.") + +(define_register_constraint "wu" "rs6000_constraints[RS6000_CONSTRAINT_wu]" + "Altivec register to use for float/32-bit int loads/stores or NO_REGS.") + +(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]" + "Altivec register to use for double loads/stores or NO_REGS.") + +(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]" + "FP or VSX register to perform float operations under -mvsx or NO_REGS.") + +(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]" + "Floating point register if the STFIWX instruction is enabled or NO_REGS.") + +(define_register_constraint "wy" "rs6000_constraints[RS6000_CONSTRAINT_wy]" + "FP or VSX register to perform ISA 2.07 float ops or NO_REGS.") + +(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]" + "Floating point register if the LFIWZX instruction is enabled or NO_REGS.") + +(define_register_constraint "wA" "rs6000_constraints[RS6000_CONSTRAINT_wA]" + "BASE_REGS if 64-bit instructions are enabled or NO_REGS.") + +;; wB needs ISA 2.07 VUPKHSW +(define_constraint "wB" + "Signed 5-bit constant integer that can be loaded into an altivec register." + (and (match_code "const_int") + (and (match_test "TARGET_P8_VECTOR") + (match_operand 0 "s5bit_cint_operand")))) + +(define_constraint "wD" + "Int constant that is the element number of the 64-bit scalar in a vector." + (and (match_code "const_int") + (match_test "TARGET_VSX && (ival == VECTOR_ELEMENT_SCALAR_64BIT)"))) + +(define_constraint "wE" + "Vector constant that can be loaded with the XXSPLTIB instruction." + (match_test "xxspltib_constant_nosplit (op, mode)")) + +;; Extended fusion store +(define_memory_constraint "wF" + "Memory operand suitable for power9 fusion load/stores" + (match_operand 0 "fusion_addis_mem_combo_load")) + +;; Fusion gpr load. +(define_memory_constraint "wG" + "Memory operand suitable for TOC fusion memory references" + (match_operand 0 "toc_fusion_mem_wrapped")) + +(define_register_constraint "wH" "rs6000_constraints[RS6000_CONSTRAINT_wH]" + "Altivec register to hold 32-bit integers or NO_REGS.") + +(define_register_constraint "wI" "rs6000_constraints[RS6000_CONSTRAINT_wI]" + "FPR register to hold 32-bit integers or NO_REGS.") + +(define_register_constraint "wJ" "rs6000_constraints[RS6000_CONSTRAINT_wJ]" + "FPR register to hold 8/16-bit integers or NO_REGS.") + +(define_register_constraint "wK" "rs6000_constraints[RS6000_CONSTRAINT_wK]" + "Altivec register to hold 8/16-bit integers or NO_REGS.") + +(define_constraint "wL" + "Int constant that is the element number mfvsrld accesses in a vector." + (and (match_code "const_int") + (and (match_test "TARGET_DIRECT_MOVE_128") + (match_test "(ival == VECTOR_ELEMENT_MFVSRLD_64BIT)")))) + +;; Generate the XXORC instruction to set a register to all 1's +(define_constraint "wM" + "Match vector constant with all 1's if the XXLORC instruction is available" + (and (match_test "TARGET_P8_VECTOR") + (match_operand 0 "all_ones_constant"))) + +;; ISA 3.0 vector d-form addresses +(define_memory_constraint "wO" + "Memory operand suitable for the ISA 3.0 vector d-form instructions." + (match_operand 0 "vsx_quad_dform_memory_operand")) + +;; Lq/stq validates the address for load/store quad +(define_memory_constraint "wQ" + "Memory operand suitable for the load/store quad instructions" + (match_operand 0 "quad_memory_operand")) + +(define_constraint "wS" + "Vector constant that can be loaded with XXSPLTIB & sign extension." + (match_test "xxspltib_constant_split (op, mode)")) + +;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form. +;; Used by LXSD/STXSD/LXSSP/STXSSP. In contrast to "Y", the multiple-of-four +;; offset is enforced for 32-bit too. +(define_memory_constraint "wY" + "Offsettable memory operand, with bottom 2 bits 0" + (and (match_code "mem") + (not (match_test "update_address_mem (op, mode)")) + (match_test "mem_operand_ds_form (op, mode)"))) + +;; Altivec style load/store that ignores the bottom bits of the address +(define_memory_constraint "wZ" + "Indexed or indirect memory operand, ignoring the bottom 4 bits" + (match_operand 0 "altivec_indexed_or_indirect_operand")) + +;; Integer constraints + +(define_constraint "I" + "A signed 16-bit constant" + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) ival + 0x8000) < 0x10000"))) + +(define_constraint "J" + "high-order 16 bits nonzero" + (and (match_code "const_int") + (match_test "(ival & (~ (unsigned HOST_WIDE_INT) 0xffff0000)) == 0"))) + +(define_constraint "K" + "low-order 16 bits nonzero" + (and (match_code "const_int") + (match_test "(ival & (~ (HOST_WIDE_INT) 0xffff)) == 0"))) + +(define_constraint "L" + "signed 16-bit constant shifted left 16 bits" + (and (match_code "const_int") + (match_test "((ival & 0xffff) == 0 + && (ival >> 31 == -1 || ival >> 31 == 0))"))) + +(define_constraint "M" + "constant greater than 31" + (and (match_code "const_int") + (match_test "ival > 31"))) + +(define_constraint "N" + "positive constant that is an exact power of two" + (and (match_code "const_int") + (match_test "ival > 0 && exact_log2 (ival) >= 0"))) + +(define_constraint "O" + "constant zero" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "P" + "constant whose negation is signed 16-bit constant" + (and (match_code "const_int") + (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000"))) + +;; Floating-point constraints + +(define_constraint "G" + "Constant that can be copied into GPR with two insns for DF/DI + and one for SF." + (and (match_code "const_double") + (match_test "num_insns_constant (op, mode) + == (mode == SFmode ? 1 : 2)"))) + +(define_constraint "H" + "DF/DI constant that takes three insns." + (and (match_code "const_double") + (match_test "num_insns_constant (op, mode) == 3"))) + +;; Memory constraints + +(define_memory_constraint "es" + "A ``stable'' memory operand; that is, one which does not include any +automodification of the base register. Unlike @samp{m}, this constraint +can be used in @code{asm} statements that might access the operand +several times, or that might not access it at all." + (and (match_code "mem") + (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC"))) + +(define_memory_constraint "Q" + "Memory operand that is an offset from a register (it is usually better +to use @samp{m} or @samp{es} in @code{asm} statements)" + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == REG"))) + +(define_memory_constraint "Y" + "memory operand for 8 byte and 16 byte gpr load/store" + (and (match_code "mem") + (match_test "mem_operand_gpr (op, mode)"))) + +(define_memory_constraint "Z" + "Memory operand that is an indexed or indirect from a register (it is +usually better to use @samp{m} or @samp{es} in @code{asm} statements)" + (match_operand 0 "indexed_or_indirect_operand")) + +;; Address constraints + +(define_address_constraint "a" + "Indexed or indirect address operand" + (match_operand 0 "indexed_or_indirect_address")) + +(define_constraint "R" + "AIX TOC entry" + (match_test "legitimate_constant_pool_address_p (op, QImode, false)")) + +;; General constraints + +(define_constraint "U" + "V.4 small data reference" + (and (match_test "DEFAULT_ABI == ABI_V4") + (match_test "small_data_operand (op, mode)"))) + +(define_constraint "W" + "vector constant that does not require memory" + (match_operand 0 "easy_vector_constant")) + +(define_constraint "j" + "Zero vector constant" + (match_test "op == const0_rtx || op == CONST0_RTX (mode)")) diff --git a/gcc/config/powerpcspe/crypto.md b/gcc/config/powerpcspe/crypto.md new file mode 100644 index 000000000000..5892f8918f2c --- /dev/null +++ b/gcc/config/powerpcspe/crypto.md @@ -0,0 +1,110 @@ +;; Cryptographic instructions added in ISA 2.07 +;; Copyright (C) 2012-2017 Free Software Foundation, Inc. +;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; NOTE: Although this file contains all the instructions from +;; section 5.11 of ISA 2.07, only those in sections 5.11.1 and +;; 5.11.2 are in Category:Vector.Crypto. Those are the only +;; ones controlled by -m[no-]crypto. + +;; FIXME: The builtin names for the instructions in this file +;; are likely to be deprecated in favor of other names to be +;; agreed upon with the XL compilers and LLVM. + +(define_c_enum "unspec" + [UNSPEC_VCIPHER + UNSPEC_VNCIPHER + UNSPEC_VCIPHERLAST + UNSPEC_VNCIPHERLAST + UNSPEC_VSBOX + UNSPEC_VSHASIGMA + UNSPEC_VPERMXOR + UNSPEC_VPMSUM]) + +;; Iterator for VPMSUM/VPERMXOR +(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI]) + +(define_mode_attr CR_char [(V16QI "b") + (V8HI "h") + (V4SI "w") + (V2DI "d")]) + +;; Iterator for VSHASIGMAD/VSHASIGMAW +(define_mode_iterator CR_hash [V4SI V2DI]) + +;; Iterator for the other crypto functions +(define_int_iterator CR_code [UNSPEC_VCIPHER + UNSPEC_VNCIPHER + UNSPEC_VCIPHERLAST + UNSPEC_VNCIPHERLAST]) + +(define_int_attr CR_insn [(UNSPEC_VCIPHER "vcipher") + (UNSPEC_VNCIPHER "vncipher") + (UNSPEC_VCIPHERLAST "vcipherlast") + (UNSPEC_VNCIPHERLAST "vncipherlast")]) + +;; 2 operand crypto instructions +(define_insn "crypto_<CR_insn>" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + CR_code))] + "TARGET_CRYPTO" + "<CR_insn> %0,%1,%2" + [(set_attr "type" "crypto")]) + +(define_insn "crypto_vpmsum<CR_char>" + [(set (match_operand:CR_mode 0 "register_operand" "=v") + (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v") + (match_operand:CR_mode 2 "register_operand" "v")] + UNSPEC_VPMSUM))] + "TARGET_P8_VECTOR" + "vpmsum<CR_char> %0,%1,%2" + [(set_attr "type" "crypto")]) + +;; 3 operand crypto instructions +(define_insn "crypto_vpermxor_<mode>" + [(set (match_operand:CR_mode 0 "register_operand" "=v") + (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v") + (match_operand:CR_mode 2 "register_operand" "v") + (match_operand:CR_mode 3 "register_operand" "v")] + UNSPEC_VPERMXOR))] + "TARGET_P8_VECTOR" + "vpermxor %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +;; 1 operand crypto instruction +(define_insn "crypto_vsbox" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")] + UNSPEC_VSBOX))] + "TARGET_CRYPTO" + "vsbox %0,%1" + [(set_attr "type" "crypto")]) + +;; Hash crypto instructions +(define_insn "crypto_vshasigma<CR_char>" + [(set (match_operand:CR_hash 0 "register_operand" "=v") + (unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_1_operand" "n") + (match_operand:SI 3 "const_0_to_15_operand" "n")] + UNSPEC_VSHASIGMA))] + "TARGET_CRYPTO" + "vshasigma<CR_char> %0,%1,%2,%3" + [(set_attr "type" "vecsimple")]) diff --git a/gcc/config/powerpcspe/darwin.h b/gcc/config/powerpcspe/darwin.h new file mode 100644 index 000000000000..61e5e836de00 --- /dev/null +++ b/gcc/config/powerpcspe/darwin.h @@ -0,0 +1,422 @@ +/* Target definitions for PowerPC running Darwin (Mac OS X). + Copyright (C) 1997-2017 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#undef DARWIN_PPC +#define DARWIN_PPC 1 + +/* The "Darwin ABI" is mostly like AIX, but with some key differences. */ + +#define DEFAULT_ABI ABI_DARWIN + +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +#ifdef __powerpc64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif + +/* The object file format is Mach-O. */ + +#define TARGET_OBJECT_FORMAT OBJECT_MACHO + +/* Size of the Obj-C jump buffer. */ +#define OBJC_JBLEN ((TARGET_64BIT) ? (26*2 + 18*2 + 129 + 1) : (26 + 18*2 + 129 + 1)) + +/* We're not ever going to do TOCs. */ + +#define TARGET_TOC 0 +#define TARGET_NO_TOC 1 + +/* Override the default rs6000 definition. */ +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int") + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + if (!TARGET_64BIT) builtin_define ("__ppc__"); \ + if (TARGET_64BIT) builtin_define ("__ppc64__"); \ + builtin_define ("__POWERPC__"); \ + builtin_define ("__NATURAL_ALIGNMENT__"); \ + darwin_cpp_builtins (pfile); \ + } \ + while (0) + +/* Generate branch islands stubs if this is true. */ +extern int darwin_emit_branch_islands; + +#define SUBTARGET_OVERRIDE_OPTIONS darwin_rs6000_override_options () + +#define C_COMMON_OVERRIDE_OPTIONS do { \ + /* On powerpc, __cxa_get_exception_ptr is available starting in the \ + 10.4.6 libstdc++.dylib. */ \ + if (strverscmp (darwin_macosx_version_min, "10.4.6") < 0 \ + && flag_use_cxa_get_exception_ptr == 2) \ + flag_use_cxa_get_exception_ptr = 0; \ + if (flag_mkernel) \ + flag_no_builtin = 1; \ + SUBTARGET_C_COMMON_OVERRIDE_OPTIONS; \ +} while (0) + +/* Darwin has 128-bit long double support in libc in 10.4 and later. + Default to 128-bit long doubles even on earlier platforms for ABI + consistency; arithmetic will work even if libc and libm support is + not available. */ + +#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128 + + +/* We want -fPIC by default, unless we're using -static to compile for + the kernel or some such. The "-faltivec" option should have been + called "-maltivec" all along. */ + +#define CC1_SPEC "\ + %(cc1_cpu) \ + %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }} \ + %{static: %{Zdynamic: %e conflicting code gen style switches are used}}\ + %{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \ + %{faltivec:-maltivec -include altivec.h} %{fno-altivec:-mno-altivec} \ + %<faltivec %<fno-altivec " \ + DARWIN_CC1_SPEC + +#define DARWIN_ARCH_SPEC "%{m64:ppc64;:ppc}" + +#define DARWIN_SUBARCH_SPEC " \ + %{m64: ppc64} \ + %{!m64: \ + %{mcpu=601:ppc601; \ + mcpu=603:ppc603; \ + mcpu=603e:ppc603; \ + mcpu=604:ppc604; \ + mcpu=604e:ppc604e; \ + mcpu=740:ppc750; \ + mcpu=750:ppc750; \ + mcpu=G3:ppc750; \ + mcpu=7400:ppc7400; \ + mcpu=G4:ppc7400; \ + mcpu=7450:ppc7450; \ + mcpu=970:ppc970; \ + mcpu=power4:ppc970; \ + mcpu=G5:ppc970; \ + :ppc}}" + +/* crt2.o is at least partially required for 10.3.x and earlier. */ +#define DARWIN_CRT2_SPEC \ + "%{!m64:%:version-compare(!> 10.4 mmacosx-version-min= crt2.o%s)}" + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + DARWIN_EXTRA_SPECS \ + { "darwin_arch", DARWIN_ARCH_SPEC }, \ + { "darwin_crt2", DARWIN_CRT2_SPEC }, \ + { "darwin_subarch", DARWIN_SUBARCH_SPEC }, + +/* Output a .machine directive. */ +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START rs6000_darwin_file_start + +/* Make both r2 and r13 available for allocation. */ +#define FIXED_R2 0 +#define FIXED_R13 0 + +/* Base register for access to local variables of the function. */ + +#undef HARD_FRAME_POINTER_REGNUM +#define HARD_FRAME_POINTER_REGNUM 30 + +#undef RS6000_PIC_OFFSET_TABLE_REGNUM +#define RS6000_PIC_OFFSET_TABLE_REGNUM 31 + +/* Pad the outgoing args area to 16 bytes instead of the usual 8. */ + +#undef STARTING_FRAME_OFFSET +#define STARTING_FRAME_OFFSET \ + (FRAME_GROWS_DOWNWARD \ + ? 0 \ + : (RS6000_ALIGN (crtl->outgoing_args_size, 16) \ + + RS6000_SAVE_AREA)) + +#undef STACK_DYNAMIC_OFFSET +#define STACK_DYNAMIC_OFFSET(FUNDECL) \ + (RS6000_ALIGN (crtl->outgoing_args_size, 16) \ + + (STACK_POINTER_OFFSET)) + +/* Darwin uses a function call if everything needs to be saved/restored. */ + +#undef WORLD_SAVE_P +#define WORLD_SAVE_P(INFO) ((INFO)->world_save_p) + +/* We don't use these on Darwin, they are just place-holders. */ +#define SAVE_FP_PREFIX "" +#define SAVE_FP_SUFFIX "" +#define RESTORE_FP_PREFIX "" +#define RESTORE_FP_SUFFIX "" + +/* The assembler wants the alternate register names, but without + leading percent sign. */ +#undef REGISTER_NAMES +#define REGISTER_NAMES \ +{ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", \ + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ + "mq", "lr", "ctr", "ap", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "xer", \ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", \ + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", \ + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", \ + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", \ + "vrsave", "vscr", \ + "spe_acc", "spefscr", \ + "sfp", \ + "tfhar", "tfiar", "texasr", \ + "rh0", "rh1", "rh2", "rh3", "rh4", "rh5", "rh6", "rh7", \ + "rh8", "rh9", "rh10", "rh11", "rh12", "rh13", "rh14", "rh15", \ + "rh16", "rh17", "rh18", "rh19", "rh20", "rh21", "rh22", "rh23", \ + "rh24", "rh25", "rh26", "rh27", "rh28", "rh29", "rh30", "rh31" \ +} + +/* This outputs NAME to FILE. */ + +#undef RS6000_OUTPUT_BASENAME +#define RS6000_OUTPUT_BASENAME(FILE, NAME) \ + assemble_name (FILE, NAME) + +/* Globalizing directive for a label. */ +#undef GLOBAL_ASM_OP +#define GLOBAL_ASM_OP "\t.globl " +#undef TARGET_ASM_GLOBALIZE_LABEL + +/* This is how to output an internal label prefix. rs6000.c uses this + when generating traceback tables. */ +/* Not really used for Darwin? */ + +#undef ASM_OUTPUT_INTERNAL_LABEL_PREFIX +#define ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX) \ + fprintf (FILE, "%s", PREFIX) + +/* Override the standard rs6000 definition. */ + +#undef ASM_COMMENT_START +#define ASM_COMMENT_START ";" + +/* This is how to output an assembler line that says to advance + the location counter to a multiple of 2**LOG bytes using the + "nop" instruction as padding. */ + +#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG) \ + do \ + { \ + if ((LOG) < 3) \ + { \ + ASM_OUTPUT_ALIGN (FILE,LOG); \ + } \ + else /* nop == ori r0,r0,0 */ \ + fprintf (FILE, "\t.align32 %d,0x60000000\n", (LOG)); \ + } while (0) + +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN +/* This is supported in cctools 465 and later. The macro test + above prevents using it in earlier build environments. */ +#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ + if ((LOG) != 0) \ + { \ + if ((MAX_SKIP) == 0) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ + fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ + } +#endif + +/* Generate insns to call the profiler. */ + +#define PROFILE_HOOK(LABEL) output_profile_hook (LABEL) + +/* Function name to call to do profiling. */ + +#define RS6000_MCOUNT "*mcount" + +/* Default processor: G4, and G5 for 64-bit. */ + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_PPC7400 +#undef PROCESSOR_DEFAULT64 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER4 + +/* Default target flag settings. Despite the fact that STMW/LMW + serializes, it's still a big code size win to use them. Use FSEL by + default as well. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_MULTIPLE | MASK_PPC_GFXOPT) + +/* Darwin always uses IBM long double, never IEEE long double. */ +#undef TARGET_IEEEQUAD +#define TARGET_IEEEQUAD 0 + +/* Since Darwin doesn't do TOCs, stub this out. */ + +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) ((void)X, (void)MODE, 0) + +/* Unlike most other PowerPC targets, chars are signed, for + consistency with other Darwin architectures. */ + +#undef DEFAULT_SIGNED_CHAR +#define DEFAULT_SIGNED_CHAR (1) + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. + + On the RS/6000, we have to return NO_REGS when we want to reload a + floating-point CONST_DOUBLE to force it to be copied to memory. + + Don't allow R0 when loading the address of, or otherwise furtling with, + a SYMBOL_REF. */ + +#undef PREFERRED_RELOAD_CLASS +#define PREFERRED_RELOAD_CLASS(X,CLASS) \ + ((CONSTANT_P (X) \ + && reg_classes_intersect_p ((CLASS), FLOAT_REGS)) \ + ? NO_REGS \ + : ((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == HIGH) \ + && reg_class_subset_p (BASE_REGS, (CLASS))) \ + ? BASE_REGS \ + : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \ + && (CLASS) == NON_SPECIAL_REGS) \ + ? GENERAL_REGS \ + : (CLASS)) + +/* Compute field alignment. + This implements the 'power' alignment rule by pegging the alignment of + items (beyond the first aggregate field) to 32 bits. The pegging is + suppressed for vector and long double items (both 128 in size). + There is a dummy use of the FIELD argument to avoid an unused variable + warning (see PR59496). */ +#define ADJUST_FIELD_ALIGN(FIELD, TYPE, COMPUTED) \ + ((void) (FIELD), \ + (TARGET_ALIGN_NATURAL \ + ? (COMPUTED) \ + : (COMPUTED) == 128 \ + ? 128 \ + : MIN ((COMPUTED), 32))) + +/* Darwin increases natural record alignment to doubleword if the first + field is an FP double while the FP fields remain word aligned. */ +#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) \ + ((TREE_CODE (STRUCT) == RECORD_TYPE \ + || TREE_CODE (STRUCT) == UNION_TYPE \ + || TREE_CODE (STRUCT) == QUAL_UNION_TYPE) \ + && TARGET_ALIGN_NATURAL == 0 \ + ? darwin_rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED) \ + : (TREE_CODE (STRUCT) == VECTOR_TYPE \ + && ALTIVEC_VECTOR_MODE (TYPE_MODE (STRUCT))) \ + ? MAX (MAX ((COMPUTED), (SPECIFIED)), 128) \ + : MAX ((COMPUTED), (SPECIFIED))) + +/* Specify padding for the last element of a block move between + registers and memory. FIRST is nonzero if this is the only + element. */ +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE)) + +#define DOUBLE_INT_ASM_OP "\t.quad\t" + +/* For binary compatibility with 2.95; Darwin C APIs use bool from + stdbool.h, which was an int-sized enum in 2.95. Users can explicitly + choose to have sizeof(bool)==1 with the -mone-byte-bool switch. */ +#define BOOL_TYPE_SIZE (darwin_one_byte_bool ? CHAR_TYPE_SIZE : INT_TYPE_SIZE) + +#undef REGISTER_TARGET_PRAGMAS +#define REGISTER_TARGET_PRAGMAS() \ + do \ + { \ + DARWIN_REGISTER_TARGET_PRAGMAS(); \ + targetm.resolve_overloaded_builtin = altivec_resolve_overloaded_builtin; \ + } \ + while (0) + +#ifdef IN_LIBGCC2 +#include <stdbool.h> +#endif + +/* True, iff we're generating fast turn around debugging code. When + true, we arrange for function prologues to start with 5 nops so + that gdb may insert code to redirect them, and for data to be + accessed indirectly. The runtime uses this indirection to forward + references for data to the original instance of that data. */ + +#define TARGET_FIX_AND_CONTINUE (darwin_fix_and_continue) + +/* This is the reserved direct dispatch address for Objective-C. */ +#define OFFS_MSGSEND_FAST 0xFFFEFF00 + +/* This is the reserved ivar address Objective-C. */ +#define OFFS_ASSIGNIVAR_FAST 0xFFFEFEC0 + +/* Old versions of Mac OS/Darwin don't have C99 functions available. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION darwin_libc_has_function + +/* When generating kernel code or kexts, we don't use Altivec by + default, as kernel code doesn't save/restore those registers. */ +#define OS_MISSING_ALTIVEC (flag_mkernel || flag_apple_kext) + +/* Darwin has support for section anchors on powerpc*. + It is disabled for any section containing a "zero-sized item" (because these + are re-written as size=1 to be compatible with the OSX ld64). + The re-writing would interfere with the computation of anchor offsets. + Therefore, we place zero-sized items in their own sections and make such + sections unavailable to section anchoring. */ + +#undef TARGET_ASM_OUTPUT_ANCHOR +#define TARGET_ASM_OUTPUT_ANCHOR darwin_asm_output_anchor + +#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P +#define TARGET_USE_ANCHORS_FOR_SYMBOL_P darwin_use_anchors_for_symbol_p + +#undef DARWIN_SECTION_ANCHORS +#define DARWIN_SECTION_ANCHORS 1 + +/* PPC Darwin has to rename some of the long double builtins. */ +#undef SUBTARGET_INIT_BUILTINS +#define SUBTARGET_INIT_BUILTINS \ +do { \ + darwin_patch_builtins (); \ + rs6000_builtin_decls[(unsigned) (RS6000_BUILTIN_CFSTRING)] \ + = darwin_init_cfstring_builtins ((unsigned) (RS6000_BUILTIN_CFSTRING)); \ +} while(0) + +/* So far, there is no rs6000_fold_builtin, if one is introduced, then + this will need to be modified similar to the x86 case. */ +#define TARGET_FOLD_BUILTIN SUBTARGET_FOLD_BUILTIN + +/* Use standard DWARF numbering for DWARF debugging information. */ +#define RS6000_USE_DWARF_NUMBERING + diff --git a/gcc/config/powerpcspe/darwin.md b/gcc/config/powerpcspe/darwin.md new file mode 100644 index 000000000000..fde67fd268dc --- /dev/null +++ b/gcc/config/powerpcspe/darwin.md @@ -0,0 +1,480 @@ +/* Machine description patterns for PowerPC running Darwin (Mac OS X). + Copyright (C) 2004-2017 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. */ + +(define_insn "adddi3_high" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b") + (plus:DI (match_operand:DI 1 "gpc_reg_operand" "b") + (high:DI (match_operand 2 "" ""))))] + "TARGET_MACHO && TARGET_64BIT" + "addis %0,%1,ha16(%2)" + [(set_attr "length" "4")]) + +(define_insn "movdf_low_si" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f,!r") + (mem:DF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_64BIT" + "* +{ + switch (which_alternative) + { + case 0: + return \"lfd %0,lo16(%2)(%1)\"; + case 1: + { + if (TARGET_POWERPC64 && TARGET_32BIT) + /* Note, old assemblers didn't support relocation here. */ + return \"ld %0,lo16(%2)(%1)\"; + else + { + output_asm_insn (\"la %0,lo16(%2)(%1)\", operands); + output_asm_insn (\"lwz %L0,4(%0)\", operands); + return (\"lwz %0,0(%0)\"); + } + } + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "load") + (set_attr "length" "4,12")]) + + +(define_insn "movdf_low_di" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f,!r") + (mem:DF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT" + "* +{ + switch (which_alternative) + { + case 0: + return \"lfd %0,lo16(%2)(%1)\"; + case 1: + return \"ld %0,lo16(%2)(%1)\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "load") + (set_attr "length" "4,4")]) + +(define_insn "movdf_low_st_si" + [(set (mem:DF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" ""))) + (match_operand:DF 0 "gpc_reg_operand" "f"))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT" + "stfd %0,lo16(%2)(%1)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "movdf_low_st_di" + [(set (mem:DF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b") + (match_operand 2 "" ""))) + (match_operand:DF 0 "gpc_reg_operand" "f"))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT" + "stfd %0,lo16(%2)(%1)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "movsf_low_si" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,!r") + (mem:SF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT" + "@ + lfs %0,lo16(%2)(%1) + lwz %0,lo16(%2)(%1)" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "movsf_low_di" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,!r") + (mem:SF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT" + "@ + lfs %0,lo16(%2)(%1) + lwz %0,lo16(%2)(%1)" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "movsf_low_st_si" + [(set (mem:SF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))) + (match_operand:SF 0 "gpc_reg_operand" "f,!r"))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT" + "@ + stfs %0,lo16(%2)(%1) + stw %0,lo16(%2)(%1)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "movsf_low_st_di" + [(set (mem:SF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))) + (match_operand:SF 0 "gpc_reg_operand" "f,!r"))] + "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT" + "@ + stfs %0,lo16(%2)(%1) + stw %0,lo16(%2)(%1)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +;; 64-bit MachO load/store support +(define_insn "movdi_low" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,*!d") + (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))))] + "TARGET_MACHO && TARGET_64BIT" + "@ + ld %0,lo16(%2)(%1) + lfd %0,lo16(%2)(%1)" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_insn "movsi_low_st" + [(set (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" ""))) + (match_operand:SI 0 "gpc_reg_operand" "r"))] + "TARGET_MACHO && ! TARGET_64BIT" + "stw %0,lo16(%2)(%1)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "movdi_low_st" + [(set (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b") + (match_operand 2 "" ""))) + (match_operand:DI 0 "gpc_reg_operand" "r,*!d"))] + "TARGET_MACHO && TARGET_64BIT" + "@ + std %0,lo16(%2)(%1) + stfd %0,lo16(%2)(%1)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +;; Mach-O PIC trickery. +(define_expand "macho_high" + [(set (match_operand 0 "" "") + (high (match_operand 1 "" "")))] + "TARGET_MACHO" +{ + if (TARGET_64BIT) + emit_insn (gen_macho_high_di (operands[0], operands[1])); + else + emit_insn (gen_macho_high_si (operands[0], operands[1])); + + DONE; +}) + +(define_insn "macho_high_si" + [(set (match_operand:SI 0 "gpc_reg_operand" "=b*r") + (high:SI (match_operand 1 "" "")))] + "TARGET_MACHO && ! TARGET_64BIT" + "lis %0,ha16(%1)") + + +(define_insn "macho_high_di" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b*r") + (high:DI (match_operand 1 "" "")))] + "TARGET_MACHO && TARGET_64BIT" + "lis %0,ha16(%1)") + +(define_expand "macho_low" + [(set (match_operand 0 "" "") + (lo_sum (match_operand 1 "" "") + (match_operand 2 "" "")))] + "TARGET_MACHO" +{ + if (TARGET_64BIT) + emit_insn (gen_macho_low_di (operands[0], operands[1], operands[2])); + else + emit_insn (gen_macho_low_si (operands[0], operands[1], operands[2])); + + DONE; +}) + +(define_insn "macho_low_si" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" "")))] + "TARGET_MACHO && ! TARGET_64BIT" + "la %0,lo16(%2)(%1)") + +(define_insn "macho_low_di" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b") + (match_operand 2 "" "")))] + "TARGET_MACHO && TARGET_64BIT" + "la %0,lo16(%2)(%1)") + +(define_split + [(set (mem:V4SI (plus:DI (match_operand:DI 0 "gpc_reg_operand" "") + (match_operand:DI 1 "short_cint_operand" ""))) + (match_operand:V4SI 2 "register_operand" "")) + (clobber (match_operand:DI 3 "gpc_reg_operand" ""))] + "TARGET_MACHO && TARGET_64BIT" + [(set (match_dup 3) (plus:DI (match_dup 0) (match_dup 1))) + (set (mem:V4SI (match_dup 3)) + (match_dup 2))] + "") + +(define_expand "load_macho_picbase" + [(set (reg:SI LR_REGNO) + (unspec [(match_operand 0 "" "")] + UNSPEC_LD_MPIC))] + "(DEFAULT_ABI == ABI_DARWIN) && flag_pic" +{ + if (TARGET_32BIT) + emit_insn (gen_load_macho_picbase_si (operands[0])); + else + emit_insn (gen_load_macho_picbase_di (operands[0])); + + DONE; +}) + +(define_insn "load_macho_picbase_si" + [(set (reg:SI LR_REGNO) + (unspec:SI [(match_operand:SI 0 "immediate_operand" "s") + (pc)] UNSPEC_LD_MPIC))] + "(DEFAULT_ABI == ABI_DARWIN) && flag_pic" +{ +#if TARGET_MACHO + machopic_should_output_picbase_label (); /* Update for new func. */ +#else + gcc_unreachable (); +#endif + return "bcl 20,31,%0\\n%0:"; +} + [(set_attr "type" "branch") + (set_attr "cannot_copy" "yes") + (set_attr "length" "4")]) + +(define_insn "load_macho_picbase_di" + [(set (reg:DI LR_REGNO) + (unspec:DI [(match_operand:DI 0 "immediate_operand" "s") + (pc)] UNSPEC_LD_MPIC))] + "(DEFAULT_ABI == ABI_DARWIN) && flag_pic && TARGET_64BIT" +{ +#if TARGET_MACHO + machopic_should_output_picbase_label (); /* Update for new func. */ +#else + gcc_unreachable (); +#endif + return "bcl 20,31,%0\\n%0:"; +} + [(set_attr "type" "branch") + (set_attr "cannot_copy" "yes") + (set_attr "length" "4")]) + +(define_expand "macho_correct_pic" + [(set (match_operand 0 "" "") + (plus (match_operand 1 "" "") + (unspec [(match_operand 2 "" "") + (match_operand 3 "" "")] + UNSPEC_MPIC_CORRECT)))] + "DEFAULT_ABI == ABI_DARWIN" +{ + if (TARGET_32BIT) + emit_insn (gen_macho_correct_pic_si (operands[0], operands[1], operands[2], + operands[3])); + else + emit_insn (gen_macho_correct_pic_di (operands[0], operands[1], operands[2], + operands[3])); + + DONE; +}) + +(define_insn "macho_correct_pic_si" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (match_operand:SI 1 "gpc_reg_operand" "r") + (unspec:SI [(match_operand:SI 2 "immediate_operand" "s") + (match_operand:SI 3 "immediate_operand" "s")] + UNSPEC_MPIC_CORRECT)))] + "DEFAULT_ABI == ABI_DARWIN" + "addis %0,%1,ha16(%2-%3)\n\taddi %0,%0,lo16(%2-%3)" + [(set_attr "length" "8")]) + +(define_insn "macho_correct_pic_di" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (plus:DI (match_operand:DI 1 "gpc_reg_operand" "r") + (unspec:DI [(match_operand:DI 2 "immediate_operand" "s") + (match_operand:DI 3 "immediate_operand" "s")] + 16)))] + "DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT" + "addis %0,%1,ha16(%2-%3)\n\taddi %0,%0,lo16(%2-%3)" + [(set_attr "length" "8")]) + +(define_insn "*call_indirect_nonlocal_darwin64" + [(call (mem:SI (match_operand:DI 0 "register_operand" "c,*l,c,*l")) + (match_operand 1 "" "g,g,g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,O,n,n")) + (clobber (reg:SI LR_REGNO))] + "DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT" +{ + return "b%T0l"; +} + [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg") + (set_attr "length" "4,4,8,8")]) + +(define_insn "*call_nonlocal_darwin64" + [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s,s")) + (match_operand 1 "" "g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "(DEFAULT_ABI == ABI_DARWIN) + && (INTVAL (operands[2]) & CALL_LONG) == 0" +{ +#if TARGET_MACHO + return output_call(insn, operands, 0, 2); +#else + gcc_unreachable (); +#endif +} + [(set_attr "type" "branch,branch") + (set_attr "length" "4,8")]) + +(define_insn "*call_value_indirect_nonlocal_darwin64" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "register_operand" "c,*l,c,*l")) + (match_operand 2 "" "g,g,g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,O,n,n")) + (clobber (reg:SI LR_REGNO))] + "DEFAULT_ABI == ABI_DARWIN" +{ + return "b%T1l"; +} + [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg") + (set_attr "length" "4,4,8,8")]) + +(define_insn "*call_value_nonlocal_darwin64" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s,s")) + (match_operand 2 "" "g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "(DEFAULT_ABI == ABI_DARWIN) + && (INTVAL (operands[3]) & CALL_LONG) == 0" +{ +#if TARGET_MACHO + return output_call(insn, operands, 1, 3); +#else + gcc_unreachable (); +#endif +} + [(set_attr "type" "branch,branch") + (set_attr "length" "4,8")]) + +(define_expand "reload_macho_picbase" + [(set (reg:SI LR_REGNO) + (unspec [(match_operand 0 "" "")] + UNSPEC_RELD_MPIC))] + "(DEFAULT_ABI == ABI_DARWIN) && flag_pic" +{ + if (TARGET_32BIT) + emit_insn (gen_reload_macho_picbase_si (operands[0])); + else + emit_insn (gen_reload_macho_picbase_di (operands[0])); + + DONE; +}) + +(define_insn "reload_macho_picbase_si" + [(set (reg:SI LR_REGNO) + (unspec:SI [(match_operand:SI 0 "immediate_operand" "s") + (pc)] UNSPEC_RELD_MPIC))] + "(DEFAULT_ABI == ABI_DARWIN) && flag_pic" +{ +#if TARGET_MACHO + if (machopic_should_output_picbase_label ()) + { + static char tmp[64]; + const char *cnam = machopic_get_function_picbase (); + snprintf (tmp, 64, "bcl 20,31,%s\\n%s:\\n%%0:", cnam, cnam); + return tmp; + } + else +#else + gcc_unreachable (); +#endif + return "bcl 20,31,%0\\n%0:"; +} + [(set_attr "type" "branch") + (set_attr "cannot_copy" "yes") + (set_attr "length" "4")]) + +(define_insn "reload_macho_picbase_di" + [(set (reg:DI LR_REGNO) + (unspec:DI [(match_operand:DI 0 "immediate_operand" "s") + (pc)] UNSPEC_RELD_MPIC))] + "(DEFAULT_ABI == ABI_DARWIN) && flag_pic && TARGET_64BIT" +{ +#if TARGET_MACHO + if (machopic_should_output_picbase_label ()) + { + static char tmp[64]; + const char *cnam = machopic_get_function_picbase (); + snprintf (tmp, 64, "bcl 20,31,%s\\n%s:\\n%%0:", cnam, cnam); + return tmp; + } + else +#else + gcc_unreachable (); +#endif + return "bcl 20,31,%0\\n%0:"; +} + [(set_attr "type" "branch") + (set_attr "cannot_copy" "yes") + (set_attr "length" "4")]) + +;; We need to restore the PIC register, at the site of nonlocal label. + +(define_insn_and_split "nonlocal_goto_receiver" + [(unspec_volatile [(const_int 0)] UNSPECV_NLGR)] + "TARGET_MACHO && flag_pic" + "#" + "&& reload_completed" + [(const_int 0)] +{ +#if TARGET_MACHO + if (crtl->uses_pic_offset_table) + { + static unsigned n = 0; + rtx picrtx = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME); + rtx picreg = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); + rtx tmplrtx; + char tmplab[20]; + + ASM_GENERATE_INTERNAL_LABEL(tmplab, "Lnlgr", ++n); + tmplrtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tmplab)); + + emit_insn (gen_reload_macho_picbase (tmplrtx)); + emit_move_insn (picreg, gen_rtx_REG (Pmode, LR_REGNO)); + emit_insn (gen_macho_correct_pic (picreg, picreg, picrtx, tmplrtx)); + } + else + /* Not using PIC reg, no reload needed. */ + emit_note (NOTE_INSN_DELETED); +#else + gcc_unreachable (); +#endif + DONE; +}) diff --git a/gcc/config/powerpcspe/darwin.opt b/gcc/config/powerpcspe/darwin.opt new file mode 100644 index 000000000000..aca1c3ee67d1 --- /dev/null +++ b/gcc/config/powerpcspe/darwin.opt @@ -0,0 +1,42 @@ +; Darwin options for PPC port. +; +; Copyright (C) 2005-2017 Free Software Foundation, Inc. +; Contributed by Aldy Hernandez <aldy@quesejoda.com>. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +Waltivec-long-deprecated +Driver Alias(mwarn-altivec-long) + +faltivec +Driver + +; -ffix-and-continue and -findirect-data are for compatibility for old +; compilers. +ffix-and-continue +Driver RejectNegative Alias(mfix-and-continue) + +findirect-data +Driver RejectNegative Alias(mfix-and-continue) + +m64 +Target RejectNegative Negative(m32) Mask(64BIT) Var(rs6000_isa_flags) +Generate 64-bit code. + +m32 +Target RejectNegative Negative(m64) InverseMask(64BIT) Var(rs6000_isa_flags) +Generate 32-bit code. diff --git a/gcc/config/powerpcspe/darwin64.h b/gcc/config/powerpcspe/darwin64.h new file mode 100644 index 000000000000..79d780cd71b1 --- /dev/null +++ b/gcc/config/powerpcspe/darwin64.h @@ -0,0 +1,32 @@ +/* Target definitions for PowerPC running Darwin (Mac OS X). + Copyright (C) 2006-2017 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_POWERPC64 | MASK_64BIT \ + | MASK_MULTIPLE | MASK_PPC_GFXOPT) + +#undef DARWIN_ARCH_SPEC +#define DARWIN_ARCH_SPEC "%{m32:ppc;:ppc64}" + +#undef DARWIN_SUBARCH_SPEC +#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC + +#undef DARWIN_CRT2_SPEC +#define DARWIN_CRT2_SPEC "" diff --git a/gcc/config/powerpcspe/darwin7.h b/gcc/config/powerpcspe/darwin7.h new file mode 100644 index 000000000000..f5e9f8930308 --- /dev/null +++ b/gcc/config/powerpcspe/darwin7.h @@ -0,0 +1,32 @@ +/* Target definitions for Darwin 7.x (Mac OS X) systems. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Machine dependent libraries. Include libmx when compiling for + Darwin 7.0 and above, but before libSystem, since the functions are + actually in libSystem but for 7.x compatibility we want them to be + looked for in libmx first. Include libmx by default because otherwise + libstdc++ isn't usable. */ + +#undef LIB_SPEC +#define LIB_SPEC "%{!static:\ + %:version-compare(!< 10.3 mmacosx-version-min= -lmx)\ + -lSystem}" + +#undef DEF_MIN_OSX_VERSION +#define DEF_MIN_OSX_VERSION "10.3.9" diff --git a/gcc/config/powerpcspe/darwin8.h b/gcc/config/powerpcspe/darwin8.h new file mode 100644 index 000000000000..bccf998de7ce --- /dev/null +++ b/gcc/config/powerpcspe/darwin8.h @@ -0,0 +1,31 @@ +/* Target definitions for Darwin 8.0 and above (Mac OS X) systems. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Machine dependent libraries. Include libmx when compiling on + Darwin 7.0 and above, but before libSystem, since the functions are + actually in libSystem but for 7.x compatibility we want them to be + looked for in libmx first---but only do this if 7.x compatibility + is a concern, which it's not in 64-bit mode. Include + libSystemStubs when compiling on (not necessarily for) 8.0 and + above and not 64-bit long double. */ + +#undef LIB_SPEC +#define LIB_SPEC "%{!static:\ + %{!mlong-double-64:%{pg:-lSystemStubs_profile;:-lSystemStubs}} \ + %{!m64:%:version-compare(>< 10.3 10.4 mmacosx-version-min= -lmx)} -lSystem}" diff --git a/gcc/config/powerpcspe/default64.h b/gcc/config/powerpcspe/default64.h new file mode 100644 index 000000000000..ebce9f25aae3 --- /dev/null +++ b/gcc/config/powerpcspe/default64.h @@ -0,0 +1,31 @@ +/* Definitions of target machine for GNU compiler, + for 64 bit powerpc linux defaulting to -m64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define RS6000_CPU(NAME, CPU, FLAGS) +#include "rs6000-cpus.def" +#undef RS6000_CPU + +#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN) +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (ISA_2_7_MASKS_SERVER | MASK_POWERPC64 | MASK_64BIT | MASK_LITTLE_ENDIAN) +#else +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_PPC_GFXOPT | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 | MASK_64BIT) +#endif diff --git a/gcc/config/powerpcspe/dfp.md b/gcc/config/powerpcspe/dfp.md new file mode 100644 index 000000000000..790f2e43d495 --- /dev/null +++ b/gcc/config/powerpcspe/dfp.md @@ -0,0 +1,419 @@ +;; Decimal Floating Point (DFP) patterns. +;; Copyright (C) 2007-2017 Free Software Foundation, Inc. +;; Contributed by Ben Elliston (bje@au.ibm.com) and Peter Bergner +;; (bergner@vnet.ibm.com). + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; +;; UNSPEC usage +;; + +(define_c_enum "unspec" + [UNSPEC_MOVSD_LOAD + UNSPEC_MOVSD_STORE + ]) + + +(define_insn "movsd_store" + [(set (match_operand:DD 0 "nonimmediate_operand" "=m") + (unspec:DD [(match_operand:SD 1 "input_operand" "d")] + UNSPEC_MOVSD_STORE))] + "(gpc_reg_operand (operands[0], DDmode) + || gpc_reg_operand (operands[1], SDmode)) + && TARGET_HARD_FLOAT && TARGET_FPRS" + "stfd%U0%X0 %1,%0" + [(set_attr "type" "fpstore") + (set_attr "length" "4")]) + +(define_insn "movsd_load" + [(set (match_operand:SD 0 "nonimmediate_operand" "=f") + (unspec:SD [(match_operand:DD 1 "input_operand" "m")] + UNSPEC_MOVSD_LOAD))] + "(gpc_reg_operand (operands[0], SDmode) + || gpc_reg_operand (operands[1], DDmode)) + && TARGET_HARD_FLOAT && TARGET_FPRS" + "lfd%U1%X1 %0,%1" + [(set_attr "type" "fpload") + (set_attr "length" "4")]) + +;; Hardware support for decimal floating point operations. + +(define_insn "extendsddd2" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (float_extend:DD (match_operand:SD 1 "gpc_reg_operand" "f")))] + "TARGET_DFP" + "dctdp %0,%1" + [(set_attr "type" "dfp")]) + +(define_expand "extendsdtd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (float_extend:TD (match_operand:SD 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" +{ + rtx tmp = gen_reg_rtx (DDmode); + emit_insn (gen_extendsddd2 (tmp, operands[1])); + emit_insn (gen_extendddtd2 (operands[0], tmp)); + DONE; +}) + +(define_insn "truncddsd2" + [(set (match_operand:SD 0 "gpc_reg_operand" "=f") + (float_truncate:SD (match_operand:DD 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "drsp %0,%1" + [(set_attr "type" "dfp")]) + +(define_expand "negdd2" + [(set (match_operand:DD 0 "gpc_reg_operand" "") + (neg:DD (match_operand:DD 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "") + +(define_insn "*negdd2_fpr" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (neg:DD (match_operand:DD 1 "gpc_reg_operand" "d")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "fneg %0,%1" + [(set_attr "type" "fpsimple")]) + +(define_expand "absdd2" + [(set (match_operand:DD 0 "gpc_reg_operand" "") + (abs:DD (match_operand:DD 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "") + +(define_insn "*absdd2_fpr" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (abs:DD (match_operand:DD 1 "gpc_reg_operand" "d")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "fabs %0,%1" + [(set_attr "type" "fpsimple")]) + +(define_insn "*nabsdd2_fpr" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (neg:DD (abs:DD (match_operand:DD 1 "gpc_reg_operand" "d"))))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "fnabs %0,%1" + [(set_attr "type" "fpsimple")]) + +(define_expand "negtd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "") + (neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "") + +(define_insn "*negtd2_fpr" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d") + (neg:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "@ + fneg %0,%1 + fneg %0,%1\;fmr %L0,%L1" + [(set_attr "type" "fpsimple") + (set_attr "length" "4,8")]) + +(define_expand "abstd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "") + (abs:TD (match_operand:TD 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "") + +(define_insn "*abstd2_fpr" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d") + (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "@ + fabs %0,%1 + fabs %0,%1\;fmr %L0,%L1" + [(set_attr "type" "fpsimple") + (set_attr "length" "4,8")]) + +(define_insn "*nabstd2_fpr" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d") + (neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d"))))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "@ + fnabs %0,%1 + fnabs %0,%1\;fmr %L0,%L1" + [(set_attr "type" "fpsimple") + (set_attr "length" "4,8")]) + +;; Hardware support for decimal floating point operations. + +(define_insn "extendddtd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (float_extend:TD (match_operand:DD 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dctqpq %0,%1" + [(set_attr "type" "dfp")]) + +;; The result of drdpq is an even/odd register pair with the converted +;; value in the even register and zero in the odd register. +;; FIXME: Avoid the register move by using a reload constraint to ensure +;; that the result is the first of the pair receiving the result of drdpq. + +(define_insn "trunctddd2" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (float_truncate:DD (match_operand:TD 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:TD 2 "=d"))] + "TARGET_DFP" + "drdpq %2,%1\;fmr %0,%2" + [(set_attr "type" "dfp") + (set_attr "length" "8")]) + +(define_insn "adddd3" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (plus:DD (match_operand:DD 1 "gpc_reg_operand" "%d") + (match_operand:DD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dadd %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "addtd3" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (plus:TD (match_operand:TD 1 "gpc_reg_operand" "%d") + (match_operand:TD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "daddq %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "subdd3" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (minus:DD (match_operand:DD 1 "gpc_reg_operand" "d") + (match_operand:DD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dsub %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "subtd3" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (minus:TD (match_operand:TD 1 "gpc_reg_operand" "d") + (match_operand:TD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dsubq %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "muldd3" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (mult:DD (match_operand:DD 1 "gpc_reg_operand" "%d") + (match_operand:DD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dmul %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "multd3" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (mult:TD (match_operand:TD 1 "gpc_reg_operand" "%d") + (match_operand:TD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dmulq %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "divdd3" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (div:DD (match_operand:DD 1 "gpc_reg_operand" "d") + (match_operand:DD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "ddiv %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "divtd3" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (div:TD (match_operand:TD 1 "gpc_reg_operand" "d") + (match_operand:TD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "ddivq %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "*cmpdd_internal1" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:DD 1 "gpc_reg_operand" "d") + (match_operand:DD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dcmpu %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "*cmptd_internal1" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:TD 1 "gpc_reg_operand" "d") + (match_operand:TD 2 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dcmpuq %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "floatdidd2" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))] + "TARGET_DFP && TARGET_POPCNTD" + "dcffix %0,%1" + [(set_attr "type" "dfp")]) + +(define_insn "floatditd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (float:TD (match_operand:DI 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dcffixq %0,%1" + [(set_attr "type" "dfp")]) + +;; Convert a decimal64 to a decimal64 whose value is an integer. +;; This is the first stage of converting it to an integer type. + +(define_insn "ftruncdd2" + [(set (match_operand:DD 0 "gpc_reg_operand" "=d") + (fix:DD (match_operand:DD 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "drintn. 0,%0,%1,1" + [(set_attr "type" "dfp")]) + +;; Convert a decimal64 whose value is an integer to an actual integer. +;; This is the second stage of converting decimal float to integer type. + +(define_insn "fixdddi2" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + (fix:DI (match_operand:DD 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dctfix %0,%1" + [(set_attr "type" "dfp")]) + +;; Convert a decimal128 to a decimal128 whose value is an integer. +;; This is the first stage of converting it to an integer type. + +(define_insn "ftrunctd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (fix:TD (match_operand:TD 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "drintnq. 0,%0,%1,1" + [(set_attr "type" "dfp")]) + +;; Convert a decimal128 whose value is an integer to an actual integer. +;; This is the second stage of converting decimal float to integer type. + +(define_insn "fixtddi2" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + (fix:DI (match_operand:TD 1 "gpc_reg_operand" "d")))] + "TARGET_DFP" + "dctfixq %0,%1" + [(set_attr "type" "dfp")]) + + +;; Decimal builtin support + +(define_c_enum "unspec" + [UNSPEC_DDEDPD + UNSPEC_DENBCD + UNSPEC_DXEX + UNSPEC_DIEX + UNSPEC_DSCLI + UNSPEC_DTSTSFI + UNSPEC_DSCRI]) + +(define_code_iterator DFP_TEST [eq lt gt unordered]) + +(define_mode_iterator D64_D128 [DD TD]) + +(define_mode_attr dfp_suffix [(DD "") + (TD "q")]) + +(define_insn "dfp_ddedpd_<mode>" + [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") + (unspec:D64_D128 [(match_operand:QI 1 "const_0_to_3_operand" "i") + (match_operand:D64_D128 2 "gpc_reg_operand" "d")] + UNSPEC_DDEDPD))] + "TARGET_DFP" + "ddedpd<dfp_suffix> %1,%0,%2" + [(set_attr "type" "dfp")]) + +(define_insn "dfp_denbcd_<mode>" + [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") + (unspec:D64_D128 [(match_operand:QI 1 "const_0_to_1_operand" "i") + (match_operand:D64_D128 2 "gpc_reg_operand" "d")] + UNSPEC_DENBCD))] + "TARGET_DFP" + "denbcd<dfp_suffix> %1,%0,%2" + [(set_attr "type" "dfp")]) + +(define_insn "dfp_dxex_<mode>" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + (unspec:DI [(match_operand:D64_D128 1 "gpc_reg_operand" "d")] + UNSPEC_DXEX))] + "TARGET_DFP" + "dxex<dfp_suffix> %0,%1" + [(set_attr "type" "dfp")]) + +(define_insn "dfp_diex_<mode>" + [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") + (unspec:D64_D128 [(match_operand:DI 1 "gpc_reg_operand" "d") + (match_operand:D64_D128 2 "gpc_reg_operand" "d")] + UNSPEC_DXEX))] + "TARGET_DFP" + "diex<dfp_suffix> %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_expand "dfptstsfi_<code>_<mode>" + [(set (match_dup 3) + (compare:CCFP + (unspec:D64_D128 + [(match_operand:SI 1 "const_int_operand" "n") + (match_operand:D64_D128 2 "gpc_reg_operand" "d")] + UNSPEC_DTSTSFI) + (match_dup 4))) + (set (match_operand:SI 0 "register_operand" "") + (DFP_TEST:SI (match_dup 3) + (const_int 0))) + ] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCFPmode); + operands[4] = const0_rtx; +}) + +(define_insn "*dfp_sgnfcnc_<mode>" + [(set (match_operand:CCFP 0 "" "=y") + (compare:CCFP + (unspec:D64_D128 [(match_operand:SI 1 "const_int_operand" "n") + (match_operand:D64_D128 2 "gpc_reg_operand" "d")] + UNSPEC_DTSTSFI) + (match_operand:SI 3 "zero_constant" "j")))] + "TARGET_P9_MISC" +{ + /* If immediate operand is greater than 63, it will behave as if + the value had been 63. The code generator does not support + immediate operand values greater than 63. */ + if (!(IN_RANGE (INTVAL (operands[1]), 0, 63))) + operands[1] = GEN_INT (63); + return "dtstsfi<dfp_suffix> %0,%1,%2"; +} + [(set_attr "type" "fp")]) + +(define_insn "dfp_dscli_<mode>" + [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") + (unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d") + (match_operand:QI 2 "immediate_operand" "i")] + UNSPEC_DSCLI))] + "TARGET_DFP" + "dscli<dfp_suffix> %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_insn "dfp_dscri_<mode>" + [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") + (unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d") + (match_operand:QI 2 "immediate_operand" "i")] + UNSPEC_DSCRI))] + "TARGET_DFP" + "dscri<dfp_suffix> %0,%1,%2" + [(set_attr "type" "dfp")]) diff --git a/gcc/config/powerpcspe/driver-powerpcspe.c b/gcc/config/powerpcspe/driver-powerpcspe.c new file mode 100644 index 000000000000..8215abbd6258 --- /dev/null +++ b/gcc/config/powerpcspe/driver-powerpcspe.c @@ -0,0 +1,539 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include <stdlib.h> + +#ifdef _AIX +# include <sys/systemcfg.h> +#endif + +#ifdef __linux__ +# include <link.h> +#endif + +#if defined (__APPLE__) || (__FreeBSD__) +# include <sys/types.h> +# include <sys/sysctl.h> +#endif + +const char *host_detect_local_cpu (int argc, const char **argv); + +#if GCC_VERSION >= 0 + +/* Returns parameters that describe L1_ASSOC associative cache of size + L1_SIZEKB with lines of size L1_LINE, and L2_SIZEKB. */ + +static char * +describe_cache (unsigned l1_sizekb, unsigned l1_line, + unsigned l1_assoc ATTRIBUTE_UNUSED, unsigned l2_sizekb) +{ + char l1size[1000], line[1000], l2size[1000]; + + /* At the moment, gcc middle-end does not use the information about the + associativity of the cache. */ + + sprintf (l1size, "--param l1-cache-size=%u", l1_sizekb); + sprintf (line, "--param l1-cache-line-size=%u", l1_line); + sprintf (l2size, "--param l2-cache-size=%u", l2_sizekb); + + return concat (l1size, " ", line, " ", l2size, " ", NULL); +} + +#ifdef __APPLE__ + +/* Returns the description of caches on Darwin. */ + +static char * +detect_caches_darwin (void) +{ + unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb; + size_t len = 4; + static int l1_size_name[2] = { CTL_HW, HW_L1DCACHESIZE }; + static int l1_line_name[2] = { CTL_HW, HW_CACHELINE }; + static int l2_size_name[2] = { CTL_HW, HW_L2CACHESIZE }; + + sysctl (l1_size_name, 2, &l1_sizekb, &len, NULL, 0); + sysctl (l1_line_name, 2, &l1_line, &len, NULL, 0); + sysctl (l2_size_name, 2, &l2_sizekb, &len, NULL, 0); + l1_assoc = 0; + + return describe_cache (l1_sizekb / 1024, l1_line, l1_assoc, + l2_sizekb / 1024); +} + +static const char * +detect_processor_darwin (void) +{ + unsigned int proc; + size_t len = 4; + + sysctlbyname ("hw.cpusubtype", &proc, &len, NULL, 0); + + if (len > 0) + switch (proc) + { + case 1: + return "601"; + case 2: + return "602"; + case 3: + return "603"; + case 4: + case 5: + return "603e"; + case 6: + return "604"; + case 7: + return "604e"; + case 8: + return "620"; + case 9: + return "750"; + case 10: + return "7400"; + case 11: + return "7450"; + case 100: + return "970"; + default: + return "powerpc"; + } + + return "powerpc"; +} + +#endif /* __APPLE__ */ + +#ifdef __FreeBSD__ + +/* Returns the description of caches on FreeBSD PPC. */ + +static char * +detect_caches_freebsd (void) +{ + unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb; + size_t len = 4; + + /* Currently, as of FreeBSD-7.0, there is only the cacheline_size + available via sysctl. */ + sysctlbyname ("machdep.cacheline_size", &l1_line, &len, NULL, 0); + + l1_sizekb = 32; + l1_assoc = 0; + l2_sizekb = 512; + + return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb); +} + +/* Currently returns default powerpc. */ +static const char * +detect_processor_freebsd (void) +{ + return "powerpc"; +} + +#endif /* __FreeBSD__ */ + +#ifdef __linux__ + +/* Returns AT_PLATFORM if present, otherwise generic PowerPC. */ + +static const char * +elf_platform (void) +{ + int fd; + + fd = open ("/proc/self/auxv", O_RDONLY); + + if (fd != -1) + { + char buf[1024]; + ElfW(auxv_t) *av; + ssize_t n; + + n = read (fd, buf, sizeof (buf)); + close (fd); + + if (n > 0) + { + for (av = (ElfW(auxv_t) *) buf; av->a_type != AT_NULL; ++av) + switch (av->a_type) + { + case AT_PLATFORM: + return (const char *) av->a_un.a_val; + + default: + break; + } + } + } + return NULL; +} + +/* Returns AT_DCACHEBSIZE if present, otherwise generic 32. */ + +static int +elf_dcachebsize (void) +{ + int fd; + + fd = open ("/proc/self/auxv", O_RDONLY); + + if (fd != -1) + { + char buf[1024]; + ElfW(auxv_t) *av; + ssize_t n; + + n = read (fd, buf, sizeof (buf)); + close (fd); + + if (n > 0) + { + for (av = (ElfW(auxv_t) *) buf; av->a_type != AT_NULL; ++av) + switch (av->a_type) + { + case AT_DCACHEBSIZE: + return av->a_un.a_val; + + default: + break; + } + } + } + return 32; +} + +/* Returns the description of caches on Linux. */ + +static char * +detect_caches_linux (void) +{ + unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb; + const char *platform; + + platform = elf_platform (); + + if (platform != NULL) + { + l1_line = 128; + + if (platform[5] == '6') + /* POWER6 and POWER6x */ + l1_sizekb = 64; + else + l1_sizekb = 32; + } + else + { + l1_line = elf_dcachebsize (); + l1_sizekb = 32; + } + + l1_assoc = 0; + l2_sizekb = 512; + + return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb); +} + +static const char * +detect_processor_linux (void) +{ + const char *platform; + + platform = elf_platform (); + + if (platform != NULL) + return platform; + else + return "powerpc"; +} + +#endif /* __linux__ */ + +#ifdef _AIX +/* Returns the description of caches on AIX. */ + +static char * +detect_caches_aix (void) +{ + unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb; + + l1_sizekb = _system_configuration.dcache_size / 1024; + l1_line = _system_configuration.dcache_line; + l1_assoc = _system_configuration.dcache_asc; + l2_sizekb = _system_configuration.L2_cache_size / 1024; + + return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb); +} + + +/* Returns the processor implementation on AIX. */ + +static const char * +detect_processor_aix (void) +{ + switch (_system_configuration.implementation) + { + case 0x0008: + return "601"; + + case 0x0020: + return "603"; + + case 0x0010: + return "604"; + + case 0x0040: + return "620"; + + case 0x0080: + return "630"; + + case 0x0100: + case 0x0200: + case 0x0400: + return "rs64"; + + case 0x0800: + return "power4"; + + case 0x2000: + if (_system_configuration.version == 0x0F0000) + return "power5"; + else + return "power5+"; + + case 0x4000: + return "power6"; + + case 0x8000: + return "power7"; + + case 0x10000: + return "power8"; + + case 0x20000: + return "power9"; + + default: + return "powerpc"; + } +} +#endif /* _AIX */ + + +/* + * Array to map -mcpu=native names to the switches passed to the assembler. + * This list mirrors the specs in ASM_CPU_SPEC, and any changes made here + * should be made there as well. + */ + +struct asm_name { + const char *cpu; + const char *asm_sw; +}; + +static const struct asm_name asm_names[] = { +#if defined (_AIX) + { "power3", "-m620" }, + { "power4", "-mpwr4" }, + { "power5", "-mpwr5" }, + { "power5+", "-mpwr5x" }, + { "power6", "-mpwr6" }, + { "power6x", "-mpwr6" }, + { "power7", "-mpwr7" }, + { "power8", "-mpwr8" }, + { "power9", "-mpwr9" }, + { "powerpc", "-mppc" }, + { "rs64a", "-mppc" }, + { "603", "-m603" }, + { "603e", "-m603" }, + { "604", "-m604" }, + { "604e", "-m604" }, + { "620", "-m620" }, + { "630", "-m620" }, + { "970", "-m970" }, + { "G5", "-m970" }, + { NULL, "\ +%{!maix64: \ +%{mpowerpc64: -mppc64} \ +%{maltivec: -m970} \ +%{!maltivec: %{!mpowerpc64: %(asm_default)}}}" }, + +#else + { "cell", "-mcell" }, + { "power3", "-mppc64" }, + { "power4", "-mpower4" }, + { "power5", "%(asm_cpu_power5)" }, + { "power5+", "%(asm_cpu_power5)" }, + { "power6", "%(asm_cpu_power6) -maltivec" }, + { "power6x", "%(asm_cpu_power6) -maltivec" }, + { "power7", "%(asm_cpu_power7)" }, + { "power8", "%(asm_cpu_power8)" }, + { "power9", "%(asm_cpu_power9)" }, + { "powerpc", "-mppc" }, + { "rs64a", "-mppc64" }, + { "401", "-mppc" }, + { "403", "-m403" }, + { "405", "-m405" }, + { "405fp", "-m405" }, + { "440", "-m440" }, + { "440fp", "-m440" }, + { "464", "-m440" }, + { "464fp", "-m440" }, + { "505", "-mppc" }, + { "601", "-m601" }, + { "602", "-mppc" }, + { "603", "-mppc" }, + { "603e", "-mppc" }, + { "ec603e", "-mppc" }, + { "604", "-mppc" }, + { "604e", "-mppc" }, + { "620", "-mppc64" }, + { "630", "-mppc64" }, + { "740", "-mppc" }, + { "750", "-mppc" }, + { "G3", "-mppc" }, + { "7400", "-mppc -maltivec" }, + { "7450", "-mppc -maltivec" }, + { "G4", "-mppc -maltivec" }, + { "801", "-mppc" }, + { "821", "-mppc" }, + { "823", "-mppc" }, + { "860", "-mppc" }, + { "970", "-mpower4 -maltivec" }, + { "G5", "-mpower4 -maltivec" }, + { "8540", "-me500" }, + { "8548", "-me500" }, + { "e300c2", "-me300" }, + { "e300c3", "-me300" }, + { "e500mc", "-me500mc" }, + { NULL, "\ +%{mpowerpc64*: -mppc64} \ +%{!mpowerpc64*: %(asm_default)}" }, +#endif +}; + +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "arch" or "tune" as argument depending on if -march=native + or -mtune=native is to be substituted. + + Additionally it will be called with "asm" to select the appropriate flags + for the assembler. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. + + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + const char *cpu = NULL; + const char *cache = ""; + const char *options = ""; + bool arch; + bool assembler; + size_t i; + + if (argc < 1) + return NULL; + + arch = strcmp (argv[0], "cpu") == 0; + assembler = (!arch && strcmp (argv[0], "asm") == 0); + if (!arch && !assembler && strcmp (argv[0], "tune")) + return NULL; + + if (! assembler) + { +#if defined (_AIX) + cache = detect_caches_aix (); +#elif defined (__APPLE__) + cache = detect_caches_darwin (); +#elif defined (__FreeBSD__) + cache = detect_caches_freebsd (); + /* FreeBSD PPC does not provide any cache information yet. */ + cache = ""; +#elif defined (__linux__) + cache = detect_caches_linux (); + /* PPC Linux does not provide any cache information yet. */ + cache = ""; +#else + cache = ""; +#endif + } + +#if defined (_AIX) + cpu = detect_processor_aix (); +#elif defined (__APPLE__) + cpu = detect_processor_darwin (); +#elif defined (__FreeBSD__) + cpu = detect_processor_freebsd (); +#elif defined (__linux__) + cpu = detect_processor_linux (); +#else + cpu = "powerpc"; +#endif + + if (assembler) + { + for (i = 0; i < sizeof (asm_names) / sizeof (asm_names[0]); i++) + { + if (!asm_names[i].cpu || !strcmp (asm_names[i].cpu, cpu)) + return asm_names[i].asm_sw; + } + + return NULL; + } + + return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL); +} + +#else /* GCC_VERSION */ + +/* If we aren't compiling with GCC we just provide a minimal + default value. */ +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + const char *cpu; + bool arch; + + if (argc < 1) + return NULL; + + arch = strcmp (argv[0], "cpu") == 0; + if (!arch && strcmp (argv[0], "tune")) + return NULL; + + if (arch) + cpu = "powerpc"; + + return concat ("-m", argv[0], "=", cpu, NULL); +} + +#endif /* GCC_VERSION */ + diff --git a/gcc/config/powerpcspe/e300c2c3.md b/gcc/config/powerpcspe/e300c2c3.md new file mode 100644 index 000000000000..f014f817d8a1 --- /dev/null +++ b/gcc/config/powerpcspe/e300c2c3.md @@ -0,0 +1,193 @@ +;; Pipeline description for Motorola PowerPC e300c3 core. +;; Copyright (C) 2008-2017 Free Software Foundation, Inc. +;; Contributed by Edmar Wienskoski (edmar@freescale.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "ppce300c3_most,ppce300c3_long,ppce300c3_retire") +(define_cpu_unit "ppce300c3_decode_0,ppce300c3_decode_1" "ppce300c3_most") + +;; We don't simulate general issue queue (GIC). If we have SU insn +;; and then SU1 insn, they can not be issued on the same cycle +;; (although SU1 insn and then SU insn can be issued) because the SU +;; insn will go to SU1 from GIC0 entry. Fortunately, the first cycle +;; multipass insn scheduling will find the situation and issue the SU1 +;; insn and then the SU insn. +(define_cpu_unit "ppce300c3_issue_0,ppce300c3_issue_1" "ppce300c3_most") + +;; We could describe completion buffers slots in combination with the +;; retirement units and the order of completion but the result +;; automaton would behave in the same way because we can not describe +;; real latency time with taking in order completion into account. +;; Actually we could define the real latency time by querying reserved +;; automaton units but the current scheduler uses latency time before +;; issuing insns and making any reservations. +;; +;; So our description is aimed to achieve a insn schedule in which the +;; insns would not wait in the completion buffer. +(define_cpu_unit "ppce300c3_retire_0,ppce300c3_retire_1" "ppce300c3_retire") + +;; Branch unit: +(define_cpu_unit "ppce300c3_bu" "ppce300c3_most") + +;; IU: +(define_cpu_unit "ppce300c3_iu0_stage0,ppce300c3_iu1_stage0" "ppce300c3_most") + +;; IU: This used to describe non-pipelined division. +(define_cpu_unit "ppce300c3_mu_div" "ppce300c3_long") + +;; SRU: +(define_cpu_unit "ppce300c3_sru_stage0" "ppce300c3_most") + +;; Here we simplified LSU unit description not describing the stages. +(define_cpu_unit "ppce300c3_lsu" "ppce300c3_most") + +;; FPU: +(define_cpu_unit "ppce300c3_fpu" "ppce300c3_most") + +;; The following units are used to make automata deterministic +(define_cpu_unit "present_ppce300c3_decode_0" "ppce300c3_most") +(define_cpu_unit "present_ppce300c3_issue_0" "ppce300c3_most") +(define_cpu_unit "present_ppce300c3_retire_0" "ppce300c3_retire") +(define_cpu_unit "present_ppce300c3_iu0_stage0" "ppce300c3_most") + +;; The following sets to make automata deterministic when option ndfa is used. +(presence_set "present_ppce300c3_decode_0" "ppce300c3_decode_0") +(presence_set "present_ppce300c3_issue_0" "ppce300c3_issue_0") +(presence_set "present_ppce300c3_retire_0" "ppce300c3_retire_0") +(presence_set "present_ppce300c3_iu0_stage0" "ppce300c3_iu0_stage0") + +;; Some useful abbreviations. +(define_reservation "ppce300c3_decode" + "ppce300c3_decode_0|ppce300c3_decode_1+present_ppce300c3_decode_0") +(define_reservation "ppce300c3_issue" + "ppce300c3_issue_0|ppce300c3_issue_1+present_ppce300c3_issue_0") +(define_reservation "ppce300c3_retire" + "ppce300c3_retire_0|ppce300c3_retire_1+present_ppce300c3_retire_0") +(define_reservation "ppce300c3_iu_stage0" + "ppce300c3_iu0_stage0|ppce300c3_iu1_stage0+present_ppce300c3_iu0_stage0") + +;; Compares can be executed either one of the IU or SRU +(define_insn_reservation "ppce300c3_cmp" 1 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+(ppce300c3_iu_stage0|ppce300c3_sru_stage0) \ + +ppce300c3_retire") + +;; Other one cycle IU insns +(define_insn_reservation "ppce300c3_iu" 1 + (and (ior (eq_attr "type" "integer,insert,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0+ppce300c3_retire") + +;; Branch. Actually this latency time is not used by the scheduler. +(define_insn_reservation "ppce300c3_branch" 1 + (and (eq_attr "type" "jmpreg,branch") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_bu,ppce300c3_retire") + +;; Multiply is non-pipelined but can be executed in any IU +(define_insn_reservation "ppce300c3_multiply" 2 + (and (eq_attr "type" "mul") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0, \ + ppce300c3_iu_stage0+ppce300c3_retire") + +;; Divide. We use the average latency time here. We omit reserving a +;; retire unit because of the result automata will be huge. +(define_insn_reservation "ppce300c3_divide" 20 + (and (eq_attr "type" "div") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0+ppce300c3_mu_div,\ + ppce300c3_mu_div*19") + +;; CR logical +(define_insn_reservation "ppce300c3_cr_logical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire") + +;; Mfcr +(define_insn_reservation "ppce300c3_mfcr" 1 + (and (eq_attr "type" "mfcr") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire") + +;; Mtcrf +(define_insn_reservation "ppce300c3_mtcrf" 1 + (and (eq_attr "type" "mtcr") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire") + +;; Mtjmpr +(define_insn_reservation "ppce300c3_mtjmpr" 1 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire") + +;; Float point instructions +(define_insn_reservation "ppce300c3_fpcompare" 3 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppce300c3")) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire") + +(define_insn_reservation "ppce300c3_fp" 3 + (and (eq_attr "type" "fp,fpsimple") + (eq_attr "cpu" "ppce300c3")) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire") + +(define_insn_reservation "ppce300c3_dmul" 4 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppce300c3")) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu,nothing,ppce300c3_retire") + +; Divides are not pipelined +(define_insn_reservation "ppce300c3_sdiv" 18 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppce300c3")) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu*17") + +(define_insn_reservation "ppce300c3_ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppce300c3")) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu*32") + +;; Loads +(define_insn_reservation "ppce300c3_load" 2 + (and (eq_attr "type" "load") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire") + +(define_insn_reservation "ppce300c3_fpload" 2 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppce300c3")) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire") + +;; Stores. +(define_insn_reservation "ppce300c3_store" 2 + (and (eq_attr "type" "store") + (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3"))) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire") + +(define_insn_reservation "ppce300c3_fpstore" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppce300c3")) + "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire") diff --git a/gcc/config/powerpcspe/e500.h b/gcc/config/powerpcspe/e500.h new file mode 100644 index 000000000000..3da9a6ecff23 --- /dev/null +++ b/gcc/config/powerpcspe/e500.h @@ -0,0 +1,45 @@ +/* Enable E500 support. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_SPE_ABI +#undef TARGET_SPE +#undef TARGET_FPRS +#undef TARGET_E500_SINGLE +#undef TARGET_E500_DOUBLE +#undef CHECK_E500_OPTIONS + +#define TARGET_SPE_ABI rs6000_spe_abi +#define TARGET_SPE rs6000_spe +#define TARGET_FPRS (rs6000_float_gprs == 0) +#define TARGET_E500_SINGLE (TARGET_HARD_FLOAT && rs6000_float_gprs == 1) +#define TARGET_E500_DOUBLE (TARGET_HARD_FLOAT && rs6000_float_gprs == 2) +#define CHECK_E500_OPTIONS \ + do { \ + if (TARGET_SPE || TARGET_SPE_ABI \ + || TARGET_E500_SINGLE || TARGET_E500_DOUBLE) \ + { \ + if (TARGET_ALTIVEC) \ + error ("AltiVec and SPE instructions cannot coexist"); \ + if (TARGET_VSX) \ + error ("VSX and SPE instructions cannot coexist"); \ + if (TARGET_64BIT) \ + error ("64-bit SPE not supported"); \ + if (TARGET_HARD_FLOAT && TARGET_FPRS) \ + error ("E500 and FPRs not supported"); \ + } \ + } while (0) diff --git a/gcc/config/powerpcspe/e500mc.md b/gcc/config/powerpcspe/e500mc.md new file mode 100644 index 000000000000..9878aaacba89 --- /dev/null +++ b/gcc/config/powerpcspe/e500mc.md @@ -0,0 +1,198 @@ +;; Pipeline description for Motorola PowerPC e500mc core. +;; Copyright (C) 2008-2017 Free Software Foundation, Inc. +;; Contributed by Edmar Wienskoski (edmar@freescale.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. +;; +;; e500mc 32-bit SU(2), LSU, FPU, BPU +;; Max issue 3 insns/clock cycle (includes 1 branch) +;; FP is half clocked, timings of other instructions are as in the e500v2. + +(define_automaton "e500mc_most,e500mc_long,e500mc_retire") +(define_cpu_unit "e500mc_decode_0,e500mc_decode_1" "e500mc_most") +(define_cpu_unit "e500mc_issue_0,e500mc_issue_1" "e500mc_most") +(define_cpu_unit "e500mc_retire_0,e500mc_retire_1" "e500mc_retire") + +;; SU. +(define_cpu_unit "e500mc_su0_stage0,e500mc_su1_stage0" "e500mc_most") + +;; MU. +(define_cpu_unit "e500mc_mu_stage0,e500mc_mu_stage1" "e500mc_most") +(define_cpu_unit "e500mc_mu_stage2,e500mc_mu_stage3" "e500mc_most") + +;; Non-pipelined division. +(define_cpu_unit "e500mc_mu_div" "e500mc_long") + +;; LSU. +(define_cpu_unit "e500mc_lsu" "e500mc_most") + +;; FPU. +(define_cpu_unit "e500mc_fpu" "e500mc_most") + +;; Branch unit. +(define_cpu_unit "e500mc_bu" "e500mc_most") + +;; The following units are used to make the automata deterministic. +(define_cpu_unit "present_e500mc_decode_0" "e500mc_most") +(define_cpu_unit "present_e500mc_issue_0" "e500mc_most") +(define_cpu_unit "present_e500mc_retire_0" "e500mc_retire") +(define_cpu_unit "present_e500mc_su0_stage0" "e500mc_most") + +;; The following sets to make automata deterministic when option ndfa is used. +(presence_set "present_e500mc_decode_0" "e500mc_decode_0") +(presence_set "present_e500mc_issue_0" "e500mc_issue_0") +(presence_set "present_e500mc_retire_0" "e500mc_retire_0") +(presence_set "present_e500mc_su0_stage0" "e500mc_su0_stage0") + +;; Some useful abbreviations. +(define_reservation "e500mc_decode" + "e500mc_decode_0|e500mc_decode_1+present_e500mc_decode_0") +(define_reservation "e500mc_issue" + "e500mc_issue_0|e500mc_issue_1+present_e500mc_issue_0") +(define_reservation "e500mc_retire" + "e500mc_retire_0|e500mc_retire_1+present_e500mc_retire_0") +(define_reservation "e500mc_su_stage0" + "e500mc_su0_stage0|e500mc_su1_stage0+present_e500mc_su0_stage0") + +;; Simple SU insns. +(define_insn_reservation "e500mc_su" 1 + (and (eq_attr "type" "integer,add,logical,insert,cmp,\ + shift,trap,cntlz,exts,isel") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire") + +(define_insn_reservation "e500mc_two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire,\ + e500mc_issue+e500mc_su_stage0+e500mc_retire") + +(define_insn_reservation "e500mc_three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire,\ + e500mc_issue+e500mc_su_stage0+e500mc_retire,\ + e500mc_issue+e500mc_su_stage0+e500mc_retire") + +;; Multiply. +(define_insn_reservation "e500mc_multiply" 4 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_mu_stage0,e500mc_mu_stage1,\ + e500mc_mu_stage2,e500mc_mu_stage3+e500mc_retire") + +;; Divide. We use the average latency time here. +(define_insn_reservation "e500mc_divide" 14 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_mu_stage0+e500mc_mu_div,\ + e500mc_mu_div*13") + +;; Branch. +(define_insn_reservation "e500mc_branch" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_bu,e500mc_retire") + +;; CR logical. +(define_insn_reservation "e500mc_cr_logical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_bu,e500mc_retire") + +;; Mfcr. +(define_insn_reservation "e500mc_mfcr" 1 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_su1_stage0+e500mc_retire") + +;; Mtcrf. +(define_insn_reservation "e500mc_mtcrf" 1 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_su1_stage0+e500mc_retire") + +;; Mtjmpr. +(define_insn_reservation "e500mc_mtjmpr" 1 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire") + +;; Brinc. +(define_insn_reservation "e500mc_brinc" 1 + (and (eq_attr "type" "brinc") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire") + +;; Loads. +(define_insn_reservation "e500mc_load" 3 + (and (eq_attr "type" "load,load_l,sync") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire") + +(define_insn_reservation "e500mc_fpload" 4 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_lsu,nothing*2,e500mc_retire") + +;; Stores. +(define_insn_reservation "e500mc_store" 3 + (and (eq_attr "type" "store,store_c") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire") + +(define_insn_reservation "e500mc_fpstore" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire") + +;; The following ignores the retire unit to avoid a large automata. + +;; Simple FP. +(define_insn_reservation "e500mc_simple_float" 8 + (and (eq_attr "type" "fpsimple") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_fpu") +; "e500mc_decode,e500mc_issue+e500mc_fpu,nothing*6,e500mc_retire") + +;; FP. +(define_insn_reservation "e500mc_float" 8 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_fpu") +; "e500mc_decode,e500mc_issue+e500mc_fpu,nothing*6,e500mc_retire") + +(define_insn_reservation "e500mc_fpcompare" 8 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_fpu") + +(define_insn_reservation "e500mc_dmul" 10 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_fpu") + +;; FP divides are not pipelined. +(define_insn_reservation "e500mc_sdiv" 36 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_fpu,e500mc_fpu*35") + +(define_insn_reservation "e500mc_ddiv" 66 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppce500mc")) + "e500mc_decode,e500mc_issue+e500mc_fpu,e500mc_fpu*65") diff --git a/gcc/config/powerpcspe/e500mc64.md b/gcc/config/powerpcspe/e500mc64.md new file mode 100644 index 000000000000..366b4c497918 --- /dev/null +++ b/gcc/config/powerpcspe/e500mc64.md @@ -0,0 +1,200 @@ +;; Pipeline description for Freescale PowerPC e500mc64 core. +;; Copyright (C) 2009-2017 Free Software Foundation, Inc. +;; Contributed by Edmar Wienskoski (edmar@freescale.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. +;; +;; e500mc64 64-bit SU(2), LSU, FPU, BPU +;; Max issue 3 insns/clock cycle (includes 1 branch) + +(define_automaton "e500mc64_most,e500mc64_long,e500mc64_retire") +(define_cpu_unit "e500mc64_decode_0,e500mc64_decode_1" "e500mc64_most") +(define_cpu_unit "e500mc64_issue_0,e500mc64_issue_1" "e500mc64_most") +(define_cpu_unit "e500mc64_retire_0,e500mc64_retire_1" "e500mc64_retire") + +;; SU. +(define_cpu_unit "e500mc64_su0_stage0,e500mc64_su1_stage0" "e500mc64_most") + +;; MU. +(define_cpu_unit "e500mc64_mu_stage0,e500mc64_mu_stage1" "e500mc64_most") +(define_cpu_unit "e500mc64_mu_stage2,e500mc64_mu_stage3" "e500mc64_most") + +;; Non-pipelined division. +(define_cpu_unit "e500mc64_mu_div" "e500mc64_long") + +;; LSU. +(define_cpu_unit "e500mc64_lsu" "e500mc64_most") + +;; FPU. +(define_cpu_unit "e500mc64_fpu" "e500mc64_most") + +;; Branch unit. +(define_cpu_unit "e500mc64_bu" "e500mc64_most") + +;; The following units are used to make the automata deterministic. +(define_cpu_unit "present_e500mc64_decode_0" "e500mc64_most") +(define_cpu_unit "present_e500mc64_issue_0" "e500mc64_most") +(define_cpu_unit "present_e500mc64_retire_0" "e500mc64_retire") +(define_cpu_unit "present_e500mc64_su0_stage0" "e500mc64_most") + +;; The following sets to make automata deterministic when option ndfa is used. +(presence_set "present_e500mc64_decode_0" "e500mc64_decode_0") +(presence_set "present_e500mc64_issue_0" "e500mc64_issue_0") +(presence_set "present_e500mc64_retire_0" "e500mc64_retire_0") +(presence_set "present_e500mc64_su0_stage0" "e500mc64_su0_stage0") + +;; Some useful abbreviations. +(define_reservation "e500mc64_decode" + "e500mc64_decode_0|e500mc64_decode_1+present_e500mc64_decode_0") +(define_reservation "e500mc64_issue" + "e500mc64_issue_0|e500mc64_issue_1+present_e500mc64_issue_0") +(define_reservation "e500mc64_retire" + "e500mc64_retire_0|e500mc64_retire_1+present_e500mc64_retire_0") +(define_reservation "e500mc64_su_stage0" + "e500mc64_su0_stage0|e500mc64_su1_stage0+present_e500mc64_su0_stage0") + +;; Simple SU insns. +(define_insn_reservation "e500mc64_su" 1 + (and (ior (eq_attr "type" "integer,insert,cntlz") + (and (eq_attr "type" "add,logical,exts") + (eq_attr "dot" "no")) + (and (eq_attr "type" "shift") + (eq_attr "dot" "no") + (eq_attr "var_shift" "no"))) + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire") + +(define_insn_reservation "e500mc64_su2" 2 + (and (ior (eq_attr "type" "cmp,trap") + (and (eq_attr "type" "add,logical,exts") + (eq_attr "dot" "yes")) + (and (eq_attr "type" "shift") + (eq_attr "dot" "yes") + (eq_attr "var_shift" "no"))) + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0,e500mc64_retire") + +(define_insn_reservation "e500mc64_delayed" 2 + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "yes") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0,e500mc64_retire") + +(define_insn_reservation "e500mc64_two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\ + e500mc64_issue+e500mc64_su_stage0+e500mc64_retire") + +(define_insn_reservation "e500mc64_three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\ + e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\ + e500mc64_issue+e500mc64_su_stage0+e500mc64_retire") + +;; Multiply. +(define_insn_reservation "e500mc64_multiply" 4 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_mu_stage0,e500mc64_mu_stage1,\ + e500mc64_mu_stage2,e500mc64_mu_stage3+e500mc64_retire") + +;; Divide. We use the average latency time here. +(define_insn_reservation "e500mc64_divide" 14 + (and (eq_attr "type" "div") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_mu_stage0+e500mc64_mu_div,\ + e500mc64_mu_div*13") + +;; Branch. +(define_insn_reservation "e500mc64_branch" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_bu,e500mc64_retire") + +;; CR logical. +(define_insn_reservation "e500mc64_cr_logical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_bu,e500mc64_retire") + +;; Mfcr. +(define_insn_reservation "e500mc64_mfcr" 4 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su1_stage0,e500mc64_su1_stage0*3+e500mc64_retire") + +;; Mtcrf. +(define_insn_reservation "e500mc64_mtcrf" 1 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su1_stage0+e500mc64_retire") + +;; Mtjmpr. +(define_insn_reservation "e500mc64_mtjmpr" 1 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire") + +;; Brinc. +(define_insn_reservation "e500mc64_brinc" 1 + (and (eq_attr "type" "brinc") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire") + +;; Loads. +(define_insn_reservation "e500mc64_load" 3 + (and (eq_attr "type" "load,load_l,sync") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire") + +(define_insn_reservation "e500mc64_fpload" 4 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing*2,e500mc64_retire") + +;; Stores. +(define_insn_reservation "e500mc64_store" 3 + (and (eq_attr "type" "store,store_c") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire") + +(define_insn_reservation "e500mc64_fpstore" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire") + +;; The following ignores the retire unit to avoid a large automata. + +;; FP. +(define_insn_reservation "e500mc64_float" 7 + (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_fpu") +; "e500mc64_decode,e500mc64_issue+e500mc64_fpu,nothing*5,e500mc64_retire") + +;; FP divides are not pipelined. +(define_insn_reservation "e500mc64_sdiv" 20 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_fpu,e500mc64_fpu*19") + +(define_insn_reservation "e500mc64_ddiv" 35 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppce500mc64")) + "e500mc64_decode,e500mc64_issue+e500mc64_fpu,e500mc64_fpu*34") diff --git a/gcc/config/powerpcspe/e5500.md b/gcc/config/powerpcspe/e5500.md new file mode 100644 index 000000000000..d66f8a584212 --- /dev/null +++ b/gcc/config/powerpcspe/e5500.md @@ -0,0 +1,190 @@ +;; Pipeline description for Freescale PowerPC e5500 core. +;; Copyright (C) 2012-2017 Free Software Foundation, Inc. +;; Contributed by Edmar Wienskoski (edmar@freescale.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. +;; +;; e5500 64-bit SFX(2), CFX, LSU, FPU, BU +;; Max issue 3 insns/clock cycle (includes 1 branch) + +(define_automaton "e5500_most,e5500_long") +(define_cpu_unit "e5500_decode_0,e5500_decode_1" "e5500_most") + +;; SFX. +(define_cpu_unit "e5500_sfx_0,e5500_sfx_1" "e5500_most") + +;; CFX. +(define_cpu_unit "e5500_cfx_stage0,e5500_cfx_stage1" "e5500_most") + +;; Non-pipelined division. +(define_cpu_unit "e5500_cfx_div" "e5500_long") + +;; LSU. +(define_cpu_unit "e5500_lsu" "e5500_most") + +;; FPU. +(define_cpu_unit "e5500_fpu" "e5500_long") + +;; BU. +(define_cpu_unit "e5500_bu" "e5500_most") + +;; The following units are used to make the automata deterministic. +(define_cpu_unit "present_e5500_decode_0" "e5500_most") +(define_cpu_unit "present_e5500_sfx_0" "e5500_most") +(presence_set "present_e5500_decode_0" "e5500_decode_0") +(presence_set "present_e5500_sfx_0" "e5500_sfx_0") + +;; Some useful abbreviations. +(define_reservation "e5500_decode" + "e5500_decode_0|e5500_decode_1+present_e5500_decode_0") +(define_reservation "e5500_sfx" + "e5500_sfx_0|e5500_sfx_1+present_e5500_sfx_0") + +;; SFX. +(define_insn_reservation "e5500_sfx" 1 + (and (ior (eq_attr "type" "integer,insert,cntlz") + (and (eq_attr "type" "add,logical,exts") + (eq_attr "dot" "no")) + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "no"))) + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx") + +(define_insn_reservation "e5500_sfx2" 2 + (and (ior (eq_attr "type" "cmp,trap") + (and (eq_attr "type" "add,logical,exts") + (eq_attr "dot" "yes")) + (and (eq_attr "type" "shift") + (eq_attr "dot" "yes") + (eq_attr "var_shift" "no"))) + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx") + +(define_insn_reservation "e5500_delayed" 2 + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "yes") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx*2") + +(define_insn_reservation "e5500_two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_decode+e5500_sfx,e5500_sfx") + +(define_insn_reservation "e5500_three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,(e5500_decode+e5500_sfx)*2,e5500_sfx") + +;; SFX - Mfcr. +(define_insn_reservation "e5500_mfcr" 4 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx_0*4") + +;; SFX - Mtcrf. +(define_insn_reservation "e5500_mtcrf" 1 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx_0") + +;; SFX - Mtjmpr. +(define_insn_reservation "e5500_mtjmpr" 1 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx") + +;; CFX - Multiply. +(define_insn_reservation "e5500_multiply" 4 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0,e5500_cfx_stage1") + +(define_insn_reservation "e5500_multiply_i" 5 + (and (eq_attr "type" "mul") + (ior (eq_attr "dot" "yes") + (eq_attr "size" "8,16")) + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0,\ + e5500_cfx_stage0+e5500_cfx_stage1,e5500_cfx_stage1") + +;; CFX - Divide. +(define_insn_reservation "e5500_divide" 16 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0+e5500_cfx_div,\ + e5500_cfx_div*15") + +(define_insn_reservation "e5500_divide_d" 26 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0+e5500_cfx_div,\ + e5500_cfx_div*25") + +;; LSU - Loads. +(define_insn_reservation "e5500_load" 3 + (and (eq_attr "type" "load,load_l,sync") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +(define_insn_reservation "e5500_fpload" 4 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +;; LSU - Stores. +(define_insn_reservation "e5500_store" 3 + (and (eq_attr "type" "store,store_c") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +(define_insn_reservation "e5500_fpstore" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +;; FP. +(define_insn_reservation "e5500_float" 7 + (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_fpu") + +(define_insn_reservation "e5500_sdiv" 20 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_fpu*20") + +(define_insn_reservation "e5500_ddiv" 35 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_fpu*35") + +;; BU. +(define_insn_reservation "e5500_branch" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_bu") + +;; BU - CR logical. +(define_insn_reservation "e5500_cr_logical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_bu") diff --git a/gcc/config/powerpcspe/e6500.md b/gcc/config/powerpcspe/e6500.md new file mode 100644 index 000000000000..779aa4f8c45a --- /dev/null +++ b/gcc/config/powerpcspe/e6500.md @@ -0,0 +1,228 @@ +;; Pipeline description for Freescale PowerPC e6500 core. +;; Copyright (C) 2012-2017 Free Software Foundation, Inc. +;; Contributed by Edmar Wienskoski (edmar@freescale.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. +;; +;; e6500 64-bit SFX(2), CFX, LSU, FPU, BU, VSFX, VCFX, VFPU, VPERM +;; Max issue 3 insns/clock cycle (includes 1 branch) + +(define_automaton "e6500_most,e6500_long,e6500_vec") +(define_cpu_unit "e6500_decode_0,e6500_decode_1" "e6500_most") + +;; SFX. +(define_cpu_unit "e6500_sfx_0,e6500_sfx_1" "e6500_most") + +;; CFX. +(define_cpu_unit "e6500_cfx_stage0,e6500_cfx_stage1" "e6500_most") + +;; Non-pipelined division. +(define_cpu_unit "e6500_cfx_div" "e6500_long") + +;; LSU. +(define_cpu_unit "e6500_lsu" "e6500_most") + +;; FPU. +(define_cpu_unit "e6500_fpu" "e6500_long") + +;; BU. +(define_cpu_unit "e6500_bu" "e6500_most") + +;; Altivec unit +(define_cpu_unit "e6500_vec,e6500_vecperm" "e6500_vec") + +;; The following units are used to make the automata deterministic. +(define_cpu_unit "present_e6500_decode_0" "e6500_most") +(define_cpu_unit "present_e6500_sfx_0" "e6500_most") +(presence_set "present_e6500_decode_0" "e6500_decode_0") +(presence_set "present_e6500_sfx_0" "e6500_sfx_0") + +;; Some useful abbreviations. +(define_reservation "e6500_decode" + "e6500_decode_0|e6500_decode_1+present_e6500_decode_0") +(define_reservation "e6500_sfx" + "e6500_sfx_0|e6500_sfx_1+present_e6500_sfx_0") + +;; SFX. +(define_insn_reservation "e6500_sfx" 1 + (and (ior (eq_attr "type" "integer,insert,cntlz") + (and (eq_attr "type" "add,logical,exts") + (eq_attr "dot" "no")) + (and (eq_attr "type" "shift") + (eq_attr "dot" "no") + (eq_attr "var_shift" "no"))) + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_sfx") + +(define_insn_reservation "e6500_sfx2" 2 + (and (ior (eq_attr "type" "cmp,trap") + (and (eq_attr "type" "add,logical,exts") + (eq_attr "dot" "yes")) + (and (eq_attr "type" "shift") + (eq_attr "dot" "yes") + (eq_attr "var_shift" "no"))) + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_sfx") + +(define_insn_reservation "e6500_delayed" 2 + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "yes") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_sfx*2") + +(define_insn_reservation "e6500_two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_decode+e6500_sfx,e6500_sfx") + +(define_insn_reservation "e6500_three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,(e6500_decode+e6500_sfx)*2,e6500_sfx") + +;; SFX - Mfcr. +(define_insn_reservation "e6500_mfcr" 4 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_sfx_0*4") + +;; SFX - Mtcrf. +(define_insn_reservation "e6500_mtcrf" 1 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_sfx_0") + +;; SFX - Mtjmpr. +(define_insn_reservation "e6500_mtjmpr" 1 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_sfx") + +;; CFX - Multiply. +(define_insn_reservation "e6500_multiply" 4 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_cfx_stage0,e6500_cfx_stage1") + +(define_insn_reservation "e6500_multiply_i" 5 + (and (eq_attr "type" "mul") + (ior (eq_attr "dot" "yes") + (eq_attr "size" "8,16")) + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_cfx_stage0,\ + e6500_cfx_stage0+e6500_cfx_stage1,e6500_cfx_stage1") + +;; CFX - Divide. +(define_insn_reservation "e6500_divide" 16 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_cfx_stage0+e6500_cfx_div,\ + e6500_cfx_div*15") + +(define_insn_reservation "e6500_divide_d" 26 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_cfx_stage0+e6500_cfx_div,\ + e6500_cfx_div*25") + +;; LSU - Loads. +(define_insn_reservation "e6500_load" 3 + (and (eq_attr "type" "load,load_l,sync") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_lsu") + +(define_insn_reservation "e6500_fpload" 4 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_lsu") + +(define_insn_reservation "e6500_vecload" 4 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_lsu") + +;; LSU - Stores. +(define_insn_reservation "e6500_store" 3 + (and (eq_attr "type" "store,store_c") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_lsu") + +(define_insn_reservation "e6500_fpstore" 3 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_lsu") + +(define_insn_reservation "e6500_vecstore" 4 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_lsu") + +;; FP. +(define_insn_reservation "e6500_float" 7 + (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_fpu") + +(define_insn_reservation "e6500_sdiv" 20 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_fpu*20") + +(define_insn_reservation "e6500_ddiv" 35 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_fpu*35") + +;; BU. +(define_insn_reservation "e6500_branch" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_bu") + +;; BU - CR logical. +(define_insn_reservation "e6500_cr_logical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_bu") + +;; VSFX. +(define_insn_reservation "e6500_vecsimple" 1 + (and (eq_attr "type" "vecsimple,veclogical,vecmove,veccmp,veccmpfx") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_vec") + +;; VCFX. +(define_insn_reservation "e6500_veccomplex" 4 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_vec") + +;; VFPU. +(define_insn_reservation "e6500_vecfloat" 6 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_vec") + +;; VPERM. +(define_insn_reservation "e6500_vecperm" 2 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "ppce6500")) + "e6500_decode,e6500_vecperm") diff --git a/gcc/config/powerpcspe/eabi.h b/gcc/config/powerpcspe/eabi.h new file mode 100644 index 000000000000..ae1a73c03d9c --- /dev/null +++ b/gcc/config/powerpcspe/eabi.h @@ -0,0 +1,41 @@ +/* Core target definitions for GNU compiler + for IBM RS/6000 PowerPC targeted to embedded ELF systems. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + Contributed by Cygnus Support. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Add -meabi to target flags. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT MASK_EABI + +/* Invoke an initializer function to set up the GOT. */ +#define NAME__MAIN "__eabi" +#define INVOKE__main + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("PPC"); \ + builtin_define ("__embedded__"); \ + builtin_assert ("system=embedded"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + while (0) diff --git a/gcc/config/powerpcspe/eabialtivec.h b/gcc/config/powerpcspe/eabialtivec.h new file mode 100644 index 000000000000..a8fc9f0a8252 --- /dev/null +++ b/gcc/config/powerpcspe/eabialtivec.h @@ -0,0 +1,27 @@ +/* Core target definitions for GNU compiler + for PowerPC targeted systems with AltiVec support. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (aldyh@redhat.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Add -meabi and -maltivec to target flags. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_EABI | MASK_ALTIVEC) + +#undef SUBSUBTARGET_OVERRIDE_OPTIONS +#define SUBSUBTARGET_OVERRIDE_OPTIONS rs6000_altivec_abi = 1 diff --git a/gcc/config/powerpcspe/eabisim.h b/gcc/config/powerpcspe/eabisim.h new file mode 100644 index 000000000000..fda50ba14088 --- /dev/null +++ b/gcc/config/powerpcspe/eabisim.h @@ -0,0 +1,51 @@ +/* Support for GCC on simulated PowerPC systems targeted to embedded ELF + systems. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + Contributed by Cygnus Support. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("PPC"); \ + builtin_define ("__embedded__"); \ + builtin_define ("__simulator__"); \ + builtin_assert ("system=embedded"); \ + builtin_assert ("system=simulator"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + while (0) + +/* Make the simulator the default */ +#undef LIB_DEFAULT_SPEC +#define LIB_DEFAULT_SPEC "%(lib_sim)" + +#undef STARTFILE_DEFAULT_SPEC +#define STARTFILE_DEFAULT_SPEC "%(startfile_sim)" + +#undef ENDFILE_DEFAULT_SPEC +#define ENDFILE_DEFAULT_SPEC "%(endfile_sim)" + +#undef LINK_START_DEFAULT_SPEC +#define LINK_START_DEFAULT_SPEC "%(link_start_sim)" + +#undef LINK_OS_DEFAULT_SPEC +#define LINK_OS_DEFAULT_SPEC "%(link_os_sim)" diff --git a/gcc/config/powerpcspe/eabispe.h b/gcc/config/powerpcspe/eabispe.h new file mode 100644 index 000000000000..db8030a791b8 --- /dev/null +++ b/gcc/config/powerpcspe/eabispe.h @@ -0,0 +1,26 @@ +/* Core target definitions for GNU compiler + for PowerPC embedded targeted systems with SPE support. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (aldyh@redhat.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_STRICT_ALIGN | MASK_EABI) + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc -mspe -me500" diff --git a/gcc/config/powerpcspe/freebsd.h b/gcc/config/powerpcspe/freebsd.h new file mode 100644 index 000000000000..5201dd791b5d --- /dev/null +++ b/gcc/config/powerpcspe/freebsd.h @@ -0,0 +1,79 @@ +/* Definitions for PowerPC running FreeBSD using the ELF format + Copyright (C) 2001-2017 Free Software Foundation, Inc. + Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Override the defaults, which exist to force the proper definition. */ + +#undef CPP_OS_DEFAULT_SPEC +#define CPP_OS_DEFAULT_SPEC "%(cpp_os_freebsd)" + +#undef STARTFILE_DEFAULT_SPEC +#define STARTFILE_DEFAULT_SPEC "%(startfile_freebsd)" + +#undef ENDFILE_DEFAULT_SPEC +#define ENDFILE_DEFAULT_SPEC "%(endfile_freebsd)" + +#undef LIB_DEFAULT_SPEC +#define LIB_DEFAULT_SPEC "%(lib_freebsd)" + +#undef LINK_START_DEFAULT_SPEC +#define LINK_START_DEFAULT_SPEC "%(link_start_freebsd)" + +#undef LINK_OS_DEFAULT_SPEC +#define LINK_OS_DEFAULT_SPEC "%(link_os_freebsd)" + +/* XXX: This is wrong for many platforms in sysv4.h. + We should work on getting that definition fixed. */ +#undef LINK_SHLIB_SPEC +#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}" + + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with <machine/ansi.h>. GCC defaults come from c-decl.c, + c-common.c, and config/<arch>/<arch>.h. */ + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +/* rs6000.h gets this wrong for FreeBSD. We use the GCC defaults instead. */ +#undef WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* Override rs6000.h definition. */ +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +/* Override rs6000.h definition. */ +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" + +/* We don't need to generate entries in .fixup, except when + -mrelocatable or -mrelocatable-lib is given. */ +#undef RELOCATABLE_NEEDS_FIXUP +#define RELOCATABLE_NEEDS_FIXUP \ + (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE) + +/* Use standard DWARF numbering for DWARF debugging information. */ +#define RS6000_USE_DWARF_NUMBERING + +#define POWERPC_FREEBSD diff --git a/gcc/config/powerpcspe/freebsd64.h b/gcc/config/powerpcspe/freebsd64.h new file mode 100644 index 000000000000..d75ac2b9a5d6 --- /dev/null +++ b/gcc/config/powerpcspe/freebsd64.h @@ -0,0 +1,433 @@ +/* Definitions for 64-bit PowerPC running FreeBSD using the ELF format + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Override the defaults, which exist to force the proper definition. */ + +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +#ifdef __powerpc64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif + +#undef TARGET_AIX +#define TARGET_AIX TARGET_64BIT + +#ifdef HAVE_LD_NO_DOT_SYMS +/* New ABI uses a local sym for the function entry point. */ +extern int dot_symbols; +#undef DOT_SYMBOLS +#define DOT_SYMBOLS dot_symbols +#endif + +#define TARGET_USES_LINUX64_OPT 1 +#ifdef HAVE_LD_LARGE_TOC +#undef TARGET_CMODEL +#define TARGET_CMODEL rs6000_current_cmodel +#define SET_CMODEL(opt) rs6000_current_cmodel = opt +#else +#define SET_CMODEL(opt) do {} while (0) +#endif + +/* Until now the 970 is the only Processor where FreeBSD 64-bit runs on. */ +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_POWER4 +#undef PROCESSOR_DEFAULT64 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER4 + +/* We don't need to generate entries in .fixup, except when + -mrelocatable or -mrelocatable-lib is given. */ +#undef RELOCATABLE_NEEDS_FIXUP +#define RELOCATABLE_NEEDS_FIXUP \ + (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE) + +#undef RS6000_ABI_NAME +#define RS6000_ABI_NAME "freebsd" + +#define INVALID_64BIT "-m%s not supported in this configuration" +#define INVALID_32BIT INVALID_64BIT + +/* Use LINUX64 instead of FREEBSD64 for compat with e.g. sysv4le.h */ +#ifdef LINUX64_DEFAULT_ABI_ELFv2 +#define ELFv2_ABI_CHECK (rs6000_elf_abi != 1) +#else +#define ELFv2_ABI_CHECK (rs6000_elf_abi == 2) +#endif + +#undef SUBSUBTARGET_OVERRIDE_OPTIONS +#define SUBSUBTARGET_OVERRIDE_OPTIONS \ + do \ + { \ + if (!global_options_set.x_rs6000_alignment_flags) \ + rs6000_alignment_flags = MASK_ALIGN_NATURAL; \ + if (TARGET_64BIT) \ + { \ + if (DEFAULT_ABI != ABI_AIX) \ + { \ + rs6000_current_abi = ABI_AIX; \ + error (INVALID_64BIT, "call"); \ + } \ + dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \ + if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \ + { \ + rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \ + error (INVALID_64BIT, "relocatable"); \ + } \ + if (ELFv2_ABI_CHECK) \ + { \ + rs6000_current_abi = ABI_ELFv2; \ + if (dot_symbols) \ + error ("-mcall-aixdesc incompatible with -mabi=elfv2"); \ + } \ + if (rs6000_isa_flags & OPTION_MASK_EABI) \ + { \ + rs6000_isa_flags &= ~OPTION_MASK_EABI; \ + error (INVALID_64BIT, "eabi"); \ + } \ + if (TARGET_PROTOTYPE) \ + { \ + target_prototype = 0; \ + error (INVALID_64BIT, "prototype"); \ + } \ + if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + error ("-m64 requires a PowerPC64 cpu"); \ + } \ + if ((rs6000_isa_flags_explicit \ + & OPTION_MASK_MINIMAL_TOC) != 0) \ + { \ + if (global_options_set.x_rs6000_current_cmodel \ + && rs6000_current_cmodel != CMODEL_SMALL) \ + error ("-mcmodel incompatible with other toc options"); \ + SET_CMODEL (CMODEL_SMALL); \ + } \ + else \ + { \ + if (!global_options_set.x_rs6000_current_cmodel) \ + SET_CMODEL (CMODEL_MEDIUM); \ + if (rs6000_current_cmodel != CMODEL_SMALL) \ + { \ + TARGET_NO_FP_IN_TOC = 0; \ + TARGET_NO_SUM_IN_TOC = 0; \ + } \ + } \ + } \ + } \ + while (0) + +#undef ASM_DEFAULT_SPEC +#undef ASM_SPEC +#undef LINK_OS_FREEBSD_SPEC + +#define ASM_DEFAULT_SPEC "-mppc%{!m32:64}" +#define ASM_SPEC "%{m32:%(asm_spec32)}%{!m32:%(asm_spec64)} %(asm_spec_common)" +#define LINK_OS_FREEBSD_SPEC "%{m32:%(link_os_freebsd_spec32)}%{!m32:%(link_os_freebsd_spec64)}" + +#define ASM_SPEC32 "-a32 \ +%{mrelocatable} %{mrelocatable-lib} %{" FPIE_OR_FPIC_SPEC ":-K PIC} \ +%{memb} %{!memb: %{msdata=eabi: -memb}} \ +%{!mlittle: %{!mlittle-endian: %{!mbig: %{!mbig-endian: \ + %{mcall-freebsd: -mbig} \ + %{mcall-i960-old: -mlittle} \ + %{mcall-linux: -mbig} \ + %{mcall-gnu: -mbig} \ + %{mcall-netbsd: -mbig} \ +}}}}" + +#define ASM_SPEC64 "-a64" + +#define ASM_SPEC_COMMON "%(asm_cpu) \ +%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \ +%{mlittle} %{mlittle-endian} %{mbig} %{mbig-endian}" + +#undef SUBSUBTARGET_EXTRA_SPECS +#define SUBSUBTARGET_EXTRA_SPECS \ + { "asm_spec_common", ASM_SPEC_COMMON }, \ + { "asm_spec32", ASM_SPEC32 }, \ + { "asm_spec64", ASM_SPEC64 }, \ + { "link_os_freebsd_spec32", LINK_OS_FREEBSD_SPEC32 }, \ + { "link_os_freebsd_spec64", LINK_OS_FREEBSD_SPEC64 }, + +#define LINK_OS_FREEBSD_SPEC_DEF "\ + %{p:%nconsider using `-pg' instead of `-p' with gprof(1)} \ + %{v:-V} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{!shared: \ + %{!static: \ + %{rdynamic: -export-dynamic} \ + %{!dynamic-linker:-dynamic-linker " FBSD_DYNAMIC_LINKER "}} \ + %{static:-Bstatic}} \ + %{symbolic:-Bsymbolic}" + +#define LINK_OS_FREEBSD_SPEC32 "-melf32ppc_fbsd " LINK_OS_FREEBSD_SPEC_DEF + +#define LINK_OS_FREEBSD_SPEC64 "-melf64ppc_fbsd " LINK_OS_FREEBSD_SPEC_DEF + +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS { "m64" } + +/* PowerPC-64 FreeBSD increases natural record alignment to doubleword if + the first field is an FP double, only if in power alignment mode. */ +#undef ROUND_TYPE_ALIGN +#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) \ + ((TARGET_64BIT \ + && (TREE_CODE (STRUCT) == RECORD_TYPE \ + || TREE_CODE (STRUCT) == UNION_TYPE \ + || TREE_CODE (STRUCT) == QUAL_UNION_TYPE) \ + && TARGET_ALIGN_NATURAL == 0) \ + ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED) \ + : MAX ((COMPUTED), (SPECIFIED))) + +/* Use the default for compiling target libs. */ +#ifdef IN_TARGET_LIBS +#undef TARGET_ALIGN_NATURAL +#define TARGET_ALIGN_NATURAL 1 +#endif + +/* Indicate that jump tables go in the text section. */ +#undef JUMP_TABLES_IN_TEXT_SECTION +#define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT + +/* The linux ppc64 ABI isn't explicit on whether aggregates smaller + than a doubleword should be padded upward or downward. You could + reasonably assume that they follow the normal rules for structure + layout treating the parameter area as any other block of memory, + then map the reg param area to registers. i.e. pad upward. + Setting both of the following defines results in this behavior. + Setting just the first one will result in aggregates that fit in a + doubleword being padded downward, and others being padded upward. + Not a bad idea as this results in struct { int x; } being passed + the same way as an int. */ +#define AGGREGATE_PADDING_FIXED TARGET_64BIT +#define AGGREGATES_PAD_UPWARD_ALWAYS 0 + +/* Specify padding for the last element of a block move between + registers and memory. FIRST is nonzero if this is the only + element. */ +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE)) + +/* FreeBSD doesn't support saving and restoring 64-bit regs with a 32-bit + kernel. This is supported when running on a 64-bit kernel with + COMPAT_FREEBSD32, but tell GCC it isn't so that our 32-bit binaries + are compatible. */ +#define OS_MISSING_POWERPC64 !TARGET_64BIT + +#undef FBSD_TARGET_CPU_CPP_BUILTINS +#define FBSD_TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__PPC__"); \ + builtin_define ("__ppc__"); \ + builtin_define ("__powerpc__"); \ + if (TARGET_64BIT) \ + { \ + builtin_define ("__arch64__"); \ + builtin_define ("__LP64__"); \ + builtin_define ("__PPC64__"); \ + builtin_define ("__powerpc64__"); \ + builtin_assert ("cpu=powerpc64"); \ + builtin_assert ("machine=powerpc64"); \ + } \ + else \ + { \ + builtin_define_std ("PPC"); \ + builtin_define_std ("powerpc"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + } \ + while (0) + +#undef CPP_OS_DEFAULT_SPEC +#define CPP_OS_DEFAULT_SPEC "%(cpp_os_freebsd)" + +#undef CPP_OS_FREEBSD_SPEC +#define CPP_OS_FREEBSD_SPEC "" + +#undef STARTFILE_DEFAULT_SPEC +#define STARTFILE_DEFAULT_SPEC "%(startfile_freebsd)" + +#undef ENDFILE_DEFAULT_SPEC +#define ENDFILE_DEFAULT_SPEC "%(endfile_freebsd)" + +#undef LIB_DEFAULT_SPEC +#define LIB_DEFAULT_SPEC "%(lib_freebsd)" + +#undef LINK_START_DEFAULT_SPEC +#define LINK_START_DEFAULT_SPEC "%(link_start_freebsd)" + +#undef LINK_OS_DEFAULT_SPEC +#define LINK_OS_DEFAULT_SPEC "%(link_os_freebsd)" + +/* XXX: This is wrong for many platforms in sysv4.h. + We should work on getting that definition fixed. */ +#undef LINK_SHLIB_SPEC +#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}" + + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with <machine/ansi.h>. GCC defaults come from c-decl.c, + c-common.c, and config/<arch>/<arch>.h. */ + + +#undef SIZE_TYPE +#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int") + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int") + +/* rs6000.h gets this wrong for FreeBSD. We use the GCC defaults instead. */ +#undef WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + + +/* Override rs6000.h definition. */ +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +/* Override rs6000.h definition. */ +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" + +/* Function profiling bits */ +#undef RS6000_MCOUNT +#define RS6000_MCOUNT "_mcount" + +#define PROFILE_HOOK(LABEL) \ + do { if (TARGET_64BIT) output_profile_hook (LABEL); } while (0) + +/* _init and _fini functions are built from bits spread across many + object files, each potentially with a different TOC pointer. For + that reason, place a nop after the call so that the linker can + restore the TOC pointer if a TOC adjusting call stub is needed. */ +#ifdef __powerpc64__ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ +" bl " #FUNC "\n" \ +" nop\n" \ +" .previous"); +#endif + +/* FP save and restore routines. */ +#undef SAVE_FP_PREFIX +#define SAVE_FP_PREFIX (TARGET_64BIT ? "._savef" : "_savefpr_") +#undef SAVE_FP_SUFFIX +#define SAVE_FP_SUFFIX "" +#undef RESTORE_FP_PREFIX +#define RESTORE_FP_PREFIX (TARGET_64BIT ? "._restf" : "_restfpr_") +#undef RESTORE_FP_SUFFIX +#define RESTORE_FP_SUFFIX "" + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. */ +#undef ASM_PREFERRED_EH_DATA_FORMAT +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (TARGET_64BIT || flag_pic \ + ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel \ + | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4)) \ + : DW_EH_PE_absptr) + +/* Static stack checking is supported by means of probes. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +/* The default value isn't sufficient in 64-bit mode. */ +#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024) + +/* Use standard DWARF numbering for DWARF debugging information. */ +#define RS6000_USE_DWARF_NUMBERING + +/* PowerPC64 Linux word-aligns FP doubles when -malign-power is given. */ +#undef ADJUST_FIELD_ALIGN +#define ADJUST_FIELD_ALIGN(FIELD, TYPE, COMPUTED) \ + (rs6000_special_adjust_field_align_p ((TYPE), (COMPUTED)) \ + ? 128 \ + : (TARGET_64BIT \ + && TARGET_ALIGN_NATURAL == 0 \ + && TYPE_MODE (strip_array_types (TYPE)) == DFmode) \ + ? MIN ((COMPUTED), 32) \ + : (COMPUTED)) + +#undef TOC_SECTION_ASM_OP +#define TOC_SECTION_ASM_OP \ + (TARGET_64BIT \ + ? "\t.section\t\".toc\",\"aw\"" \ + : "\t.section\t\".got\",\"aw\"") + +#undef MINIMAL_TOC_SECTION_ASM_OP +#define MINIMAL_TOC_SECTION_ASM_OP \ + (TARGET_64BIT \ + ? "\t.section\t\".toc1\",\"aw\"" \ + : (flag_pic \ + ? "\t.section\t\".got2\",\"aw\"" \ + : "\t.section\t\".got1\",\"aw\"")) + +/* This is how to declare the size of a function. */ +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + if (!flag_inhibit_size_directive) \ + { \ + fputs ("\t.size\t", (FILE)); \ + if (TARGET_64BIT && DOT_SYMBOLS) \ + putc ('.', (FILE)); \ + assemble_name ((FILE), (FNAME)); \ + fputs (",.-", (FILE)); \ + rs6000_output_function_entry (FILE, FNAME); \ + putc ('\n', (FILE)); \ + } \ + } \ + while (0) + +#undef ASM_OUTPUT_SPECIAL_POOL_ENTRY_P +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) \ + (TARGET_TOC \ + && (GET_CODE (X) == SYMBOL_REF \ + || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF) \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST_INT \ + && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode)) \ + || (GET_CODE (X) == CONST_DOUBLE \ + && ((TARGET_64BIT \ + && (TARGET_MINIMAL_TOC \ + || (SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && ! TARGET_NO_FP_IN_TOC))) \ + || (!TARGET_64BIT \ + && !TARGET_NO_FP_IN_TOC \ + && SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && BITS_PER_WORD == HOST_BITS_PER_INT))))) + +/* Use --as-needed -lgcc_s for eh support. */ +#ifdef HAVE_LD_AS_NEEDED +#define USE_LD_AS_NEEDED 1 +#endif + +#define POWERPC_FREEBSD diff --git a/gcc/config/powerpcspe/genopt.sh b/gcc/config/powerpcspe/genopt.sh new file mode 100755 index 000000000000..f04c00127c19 --- /dev/null +++ b/gcc/config/powerpcspe/genopt.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# Generate powerpcspe-tables.opt from the list of CPUs in powerpcspe-cpus.def. +# Copyright (C) 2011-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +cat <<EOF +; -*- buffer-read-only: t -*- +; Generated automatically by genopt.sh from powerpcspe-cpus.def. + +; Copyright (C) 2011-2017 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +Enum +Name(rs6000_cpu_opt_value) Type(int) +Known CPUs (for use with the -mcpu= and -mtune= options): + +EnumValue +Enum(rs6000_cpu_opt_value) String(native) Value(RS6000_CPU_OPTION_NATIVE) DriverOnly + +EOF + +awk -F'[(, ]+' ' +BEGIN { + value = 0 +} + +/^RS6000_CPU/ { + name = $2 + gsub("\"", "", name) + print "EnumValue" + print "Enum(rs6000_cpu_opt_value) String(" name ") Value(" value ")" + print "" + value++ +}' $1/powerpcspe-cpus.def diff --git a/gcc/config/powerpcspe/host-darwin.c b/gcc/config/powerpcspe/host-darwin.c new file mode 100644 index 000000000000..da90e2f64522 --- /dev/null +++ b/gcc/config/powerpcspe/host-darwin.c @@ -0,0 +1,153 @@ +/* Darwin/powerpc host-specific hook definitions. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic.h" +#include <sys/ucontext.h> +#include "hosthooks.h" +#include "hosthooks-def.h" +#include "config/host-darwin.h" + +static void segv_crash_handler (int); +static void segv_handler (int, siginfo_t *, void *); +static void darwin_rs6000_extra_signals (void); + +#ifndef HAVE_DECL_SIGALTSTACK +/* This doesn't have a prototype in signal.h in 10.2.x and earlier, + fixed in later releases. */ +extern int sigaltstack(const struct sigaltstack *, struct sigaltstack *); +#endif + +/* The fields of the mcontext_t type have acquired underscores in later + OS versions. */ +#ifdef HAS_MCONTEXT_T_UNDERSCORES +#define MC_FLD(x) __ ## x +#else +#define MC_FLD(x) x +#endif + +#undef HOST_HOOKS_EXTRA_SIGNALS +#define HOST_HOOKS_EXTRA_SIGNALS darwin_rs6000_extra_signals + +/* On Darwin/powerpc, hitting the stack limit turns into a SIGSEGV. + This code detects the difference between hitting the stack limit and + a true wild pointer dereference by looking at the instruction that + faulted; only a few kinds of instruction are used to access below + the previous bottom of the stack. */ + +static void +segv_crash_handler (int sig ATTRIBUTE_UNUSED) +{ + internal_error ("Segmentation Fault (code)"); +} + +static void +segv_handler (int sig ATTRIBUTE_UNUSED, + siginfo_t *sip ATTRIBUTE_UNUSED, + void *scp) +{ + ucontext_t *uc = (ucontext_t *)scp; + sigset_t sigset; + unsigned faulting_insn; + + /* The fault might have happened when trying to run some instruction, in + which case the next line will segfault _again_. Handle this case. */ + signal (SIGSEGV, segv_crash_handler); + sigemptyset (&sigset); + sigaddset (&sigset, SIGSEGV); + sigprocmask (SIG_UNBLOCK, &sigset, NULL); + + faulting_insn = *(unsigned *)uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0); + + /* Note that this only has to work for GCC, so we don't have to deal + with all the possible cases (GCC has no AltiVec code, for + instance). It's complicated because Darwin allows stores to + below the stack pointer, and the prologue code takes advantage of + this. */ + + if ((faulting_insn & 0xFFFF8000) == 0x94218000 /* stwu %r1, -xxx(%r1) */ + || (faulting_insn & 0xFC1F03FF) == 0x7C01016E /* stwux xxx, %r1, xxx */ + || (faulting_insn & 0xFC1F8000) == 0x90018000 /* stw xxx, -yyy(%r1) */ + || (faulting_insn & 0xFC1F8000) == 0xD8018000 /* stfd xxx, -yyy(%r1) */ + || (faulting_insn & 0xFC1F8000) == 0xBC018000 /* stmw xxx, -yyy(%r1) */) + { + char *shell_name; + + fnotice (stderr, "Out of stack space.\n"); + shell_name = getenv ("SHELL"); + if (shell_name != NULL) + shell_name = strrchr (shell_name, '/'); + if (shell_name != NULL) + { + static const char * shell_commands[][2] = { + { "sh", "ulimit -S -s unlimited" }, + { "bash", "ulimit -S -s unlimited" }, + { "tcsh", "limit stacksize unlimited" }, + { "csh", "limit stacksize unlimited" }, + /* zsh doesn't have "unlimited", this will work under the + default configuration. */ + { "zsh", "limit stacksize 32m" } + }; + size_t i; + + for (i = 0; i < ARRAY_SIZE (shell_commands); i++) + if (strcmp (shell_commands[i][0], shell_name + 1) == 0) + { + fnotice (stderr, + "Try running '%s' in the shell to raise its limit.\n", + shell_commands[i][1]); + } + } + + if (global_dc->abort_on_error) + fancy_abort (__FILE__, __LINE__, __FUNCTION__); + + exit (FATAL_EXIT_CODE); + } + + fprintf (stderr, "[address=%08lx pc=%08x]\n", + uc->uc_mcontext->MC_FLD(es).MC_FLD(dar), + uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0)); + internal_error ("Segmentation Fault"); + exit (FATAL_EXIT_CODE); +} + +static void +darwin_rs6000_extra_signals (void) +{ + struct sigaction sact; + stack_t sigstk; + + sigstk.ss_sp = (char*)xmalloc (SIGSTKSZ); + sigstk.ss_size = SIGSTKSZ; + sigstk.ss_flags = 0; + if (sigaltstack (&sigstk, NULL) < 0) + fatal_error (input_location, "While setting up signal stack: %m"); + + sigemptyset(&sact.sa_mask); + sact.sa_flags = SA_ONSTACK | SA_SIGINFO; + sact.sa_sigaction = segv_handler; + if (sigaction (SIGSEGV, &sact, 0) < 0) + fatal_error (input_location, "While setting up signal handler: %m"); +} + + +const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER; diff --git a/gcc/config/powerpcspe/host-ppc64-darwin.c b/gcc/config/powerpcspe/host-ppc64-darwin.c new file mode 100644 index 000000000000..381fe1673ae5 --- /dev/null +++ b/gcc/config/powerpcspe/host-ppc64-darwin.c @@ -0,0 +1,30 @@ +/* ppc64-darwin host-specific hook definitions. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "hosthooks.h" +#include "hosthooks-def.h" +#include "config/host-darwin.h" + +/* Darwin doesn't do anything special for ppc64 hosts; this file exists just + to include config/host-darwin.h. */ + +const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER; diff --git a/gcc/config/powerpcspe/htm.md b/gcc/config/powerpcspe/htm.md new file mode 100644 index 000000000000..de769f7db3c7 --- /dev/null +++ b/gcc/config/powerpcspe/htm.md @@ -0,0 +1,296 @@ +;; Hardware Transactional Memory (HTM) patterns. +;; Copyright (C) 2013-2017 Free Software Foundation, Inc. +;; Contributed by Peter Bergner <bergner@vnet.ibm.com>. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_constants + [(TFHAR_SPR 128) + (TFIAR_SPR 129) + (TEXASR_SPR 130) + (TEXASRU_SPR 131) + (MAX_HTM_OPERANDS 4) + ]) + +;; +;; UNSPEC usage +;; + +(define_c_enum "unspec" + [UNSPEC_HTM_FENCE + ]) + +;; +;; UNSPEC_VOLATILE usage +;; + +(define_c_enum "unspecv" + [UNSPECV_HTM_TABORT + UNSPECV_HTM_TABORTXC + UNSPECV_HTM_TABORTXCI + UNSPECV_HTM_TBEGIN + UNSPECV_HTM_TCHECK + UNSPECV_HTM_TEND + UNSPECV_HTM_TRECHKPT + UNSPECV_HTM_TRECLAIM + UNSPECV_HTM_TSR + UNSPECV_HTM_TTEST + UNSPECV_HTM_MFSPR + UNSPECV_HTM_MTSPR + ]) + +(define_expand "tabort" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] + UNSPECV_HTM_TABORT)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*tabort" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] + UNSPECV_HTM_TABORT)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabort. %0" + [(set_attr "type" "htmsimple") + (set_attr "length" "4")]) + +(define_expand "tabort<wd>c" + [(parallel + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")] + UNSPECV_HTM_TABORTXC)) + (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[4]) = 1; +}) + +(define_insn "*tabort<wd>c" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")] + UNSPECV_HTM_TABORTXC)) + (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabort<wd>c. %0,%1,%2" + [(set_attr "type" "htmsimple") + (set_attr "length" "4")]) + +(define_expand "tabort<wd>ci" + [(parallel + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand 2 "s5bit_cint_operand" "n")] + UNSPECV_HTM_TABORTXCI)) + (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[4]) = 1; +}) + +(define_insn "*tabort<wd>ci" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand 2 "s5bit_cint_operand" "n")] + UNSPECV_HTM_TABORTXCI)) + (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabort<wd>ci. %0,%1,%2" + [(set_attr "type" "htmsimple") + (set_attr "length" "4")]) + +(define_expand "tbegin" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TBEGIN)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*tbegin" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TBEGIN)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tbegin. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +(define_expand "tcheck" + [(parallel + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) + (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; +}) + +(define_insn "*tcheck" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) + (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tcheck %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +(define_expand "tend" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TEND)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*tend" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TEND)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tend. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +(define_expand "trechkpt" + [(parallel + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) + (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; +}) + +(define_insn "*trechkpt" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) + (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "trechkpt." + [(set_attr "type" "htmsimple") + (set_attr "length" "4")]) + +(define_expand "treclaim" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] + UNSPECV_HTM_TRECLAIM)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*treclaim" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] + UNSPECV_HTM_TRECLAIM)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "treclaim. %0" + [(set_attr "type" "htmsimple") + (set_attr "length" "4")]) + +(define_expand "tsr" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TSR)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*tsr" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TSR)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tsr. %0" + [(set_attr "type" "htmsimple") + (set_attr "length" "4")]) + +(define_expand "ttest" + [(parallel + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) + (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; +}) + +(define_insn "*ttest" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) + (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabortwci. 0,1,0" + [(set_attr "type" "htmsimple") + (set_attr "length" "4")]) + +(define_insn "htm_mfspr_<mode>" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (unspec_volatile:GPR [(match_operand 1 "u10bit_cint_operand" "n") + (match_operand:GPR 2 "htm_spr_reg_operand" "")] + UNSPECV_HTM_MFSPR))] + "TARGET_HTM" + "mfspr %0,%1"; + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +(define_insn "htm_mtspr_<mode>" + [(set (match_operand:GPR 2 "htm_spr_reg_operand" "") + (unspec_volatile:GPR [(match_operand:GPR 0 "gpc_reg_operand" "r") + (match_operand 1 "u10bit_cint_operand" "n")] + UNSPECV_HTM_MTSPR))] + "TARGET_HTM" + "mtspr %1,%0"; + [(set_attr "type" "htm") + (set_attr "length" "4")]) diff --git a/gcc/config/powerpcspe/htmintrin.h b/gcc/config/powerpcspe/htmintrin.h new file mode 100644 index 000000000000..55d5a661811f --- /dev/null +++ b/gcc/config/powerpcspe/htmintrin.h @@ -0,0 +1,131 @@ +/* Hardware Transactional Memory (HTM) intrinsics. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Peter Bergner <bergner@vnet.ibm.com>. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef __HTM__ +# error "HTM instruction set not enabled" +#endif /* __HTM__ */ + +#ifndef _HTMINTRIN_H +#define _HTMINTRIN_H + +#include <stdint.h> + +typedef uint64_t texasr_t; +typedef uint32_t texasru_t; +typedef uint32_t texasrl_t; +typedef uintptr_t tfiar_t; +typedef uintptr_t tfhar_t; + +#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3) +#define _HTM_NONTRANSACTIONAL 0x0 +#define _HTM_SUSPENDED 0x1 +#define _HTM_TRANSACTIONAL 0x2 + +/* The following macros use the IBM bit numbering for BITNUM + as used in the ISA documentation. */ + +#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ + (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1)) +#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ + (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1)) + +#define _TEXASR_FAILURE_CODE(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 7, 8) +#define _TEXASRU_FAILURE_CODE(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8) + +#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 7, 1) +#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1) + +#define _TEXASR_DISALLOWED(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 8, 1) +#define _TEXASRU_DISALLOWED(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1) + +#define _TEXASR_NESTING_OVERFLOW(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 9, 1) +#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1) + +#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 10, 1) +#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1) + +#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 11, 1) +#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1) + +#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 12, 1) +#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1) + +#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 13, 1) +#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1) + +#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 14, 1) +#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1) + +#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 15, 1) +#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1) + +#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 16, 1) +#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1) + +#define _TEXASR_ABORT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 31, 1) +#define _TEXASRU_ABORT(TEXASRU) \ + _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1) + + +#define _TEXASR_SUSPENDED(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 32, 1) + +#define _TEXASR_PRIVILEGE(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 35, 2) + +#define _TEXASR_FAILURE_SUMMARY(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 36, 1) + +#define _TEXASR_TFIAR_EXACT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 37, 1) + +#define _TEXASR_ROT(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 38, 1) + +#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \ + _TEXASR_EXTRACT_BITS(TEXASR, 63, 12) + +#endif /* _HTMINTRIN_H */ diff --git a/gcc/config/powerpcspe/htmxlintrin.h b/gcc/config/powerpcspe/htmxlintrin.h new file mode 100644 index 000000000000..d6fc583b4829 --- /dev/null +++ b/gcc/config/powerpcspe/htmxlintrin.h @@ -0,0 +1,214 @@ +/* XL compiler Hardware Transactional Memory (HTM) execution intrinsics. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Peter Bergner <bergner@vnet.ibm.com>. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef __HTM__ +# error "HTM instruction set not enabled" +#endif /* __HTM__ */ + +#ifndef _HTMXLINTRIN_H +#define _HTMXLINTRIN_H + +#include <stdint.h> +#include <htmintrin.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define _TEXASR_PTR(TM_BUF) \ + ((texasr_t *)((TM_BUF)+0)) +#define _TEXASRU_PTR(TM_BUF) \ + ((texasru_t *)((TM_BUF)+0)) +#define _TEXASRL_PTR(TM_BUF) \ + ((texasrl_t *)((TM_BUF)+4)) +#define _TFIAR_PTR(TM_BUF) \ + ((tfiar_t *)((TM_BUF)+8)) + +typedef char TM_buff_type[16]; + +/* Compatibility macro with s390. This macro can be used to determine + whether a transaction was successfully started from the __TM_begin() + and __TM_simple_begin() intrinsic functions below. */ +#define _HTM_TBEGIN_STARTED 1 + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_simple_begin (void) +{ + if (__builtin_expect (__builtin_tbegin (0), 1)) + return _HTM_TBEGIN_STARTED; + return 0; +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_begin (void* const TM_buff) +{ + *_TEXASRL_PTR (TM_buff) = 0; + if (__builtin_expect (__builtin_tbegin (0), 1)) + return _HTM_TBEGIN_STARTED; +#ifdef __powerpc64__ + *_TEXASR_PTR (TM_buff) = __builtin_get_texasr (); +#else + *_TEXASRU_PTR (TM_buff) = __builtin_get_texasru (); + *_TEXASRL_PTR (TM_buff) = __builtin_get_texasr (); +#endif + *_TFIAR_PTR (TM_buff) = __builtin_get_tfiar (); + return 0; +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_end (void) +{ + unsigned char status = _HTM_STATE (__builtin_tend (0)); + if (__builtin_expect (status, _HTM_TRANSACTIONAL)) + return 1; + return 0; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_abort (void) +{ + __builtin_tabort (0); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_named_abort (unsigned char const code) +{ + __builtin_tabort (code); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_resume (void) +{ + __builtin_tresume (); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_suspend (void) +{ + __builtin_tsuspend (); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_user_abort (void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_ABORT (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_named_user_abort (void* const TM_buff, unsigned char *code) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + + *code = _TEXASRU_FAILURE_CODE (texasru); + return _TEXASRU_ABORT (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_illegal (void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_DISALLOWED (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_footprint_exceeded (void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_FOOTPRINT_OVERFLOW (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_nesting_depth (void* const TM_buff) +{ + texasrl_t texasrl; + + if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL) + { + texasrl = *_TEXASRL_PTR (TM_buff); + if (!_TEXASR_FAILURE_SUMMARY (texasrl)) + texasrl = 0; + } + else + texasrl = (texasrl_t) __builtin_get_texasr (); + + return _TEXASR_TRANSACTION_LEVEL (texasrl); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_nested_too_deep(void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_NESTING_OVERFLOW (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_conflict(void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + /* Return TEXASR bits 11 (Self-Induced Conflict) through + 14 (Translation Invalidation Conflict). */ + return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0; +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_failure_persistent(void* const TM_buff) +{ + texasru_t texasru = *_TEXASRU_PTR (TM_buff); + return _TEXASRU_FAILURE_PERSISTENT (texasru); +} + +extern __inline long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_address(void* const TM_buff) +{ + return *_TFIAR_PTR (TM_buff); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_code(void* const TM_buff) +{ + return *_TEXASR_PTR (TM_buff); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _HTMXLINTRIN_H */ diff --git a/gcc/config/powerpcspe/linux.h b/gcc/config/powerpcspe/linux.h new file mode 100644 index 000000000000..684afd6c1901 --- /dev/null +++ b/gcc/config/powerpcspe/linux.h @@ -0,0 +1,140 @@ +/* Definitions of target machine for GNU compiler, + for PowerPC machines running Linux. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Michael Meissner (meissner@cygnus.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Linux doesn't support saving and restoring 64-bit regs in a 32-bit + process. */ +#define OS_MISSING_POWERPC64 1 + +/* We use glibc _mcount for profiling. */ +#define NO_PROFILE_COUNTERS 1 + +#ifdef SINGLE_LIBC +#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) +#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) +#undef OPTION_MUSL +#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) +#else +#define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) +#define OPTION_BIONIC (linux_libc == LIBC_BIONIC) +#undef OPTION_MUSL +#define OPTION_MUSL (linux_libc == LIBC_MUSL) +#endif + +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("PPC"); \ + builtin_define_std ("powerpc"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + while (0) + +#undef CPP_OS_DEFAULT_SPEC +#define CPP_OS_DEFAULT_SPEC "%(cpp_os_linux)" + +#undef LINK_SHLIB_SPEC +#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}" + +#undef LIB_DEFAULT_SPEC +#define LIB_DEFAULT_SPEC "%(lib_linux)" + +#undef STARTFILE_DEFAULT_SPEC +#define STARTFILE_DEFAULT_SPEC "%(startfile_linux)" + +#undef ENDFILE_DEFAULT_SPEC +#define ENDFILE_DEFAULT_SPEC "%(endfile_linux)" + +#undef LINK_START_DEFAULT_SPEC +#define LINK_START_DEFAULT_SPEC "%(link_start_linux)" + +#undef LINK_OS_DEFAULT_SPEC +#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)" + +#undef DEFAULT_ASM_ENDIAN +#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN) +#define DEFAULT_ASM_ENDIAN " -mlittle" +#define LINK_OS_LINUX_EMUL ENDIAN_SELECT(" -m elf32ppclinux", \ + " -m elf32lppclinux", \ + " -m elf32lppclinux") +#else +#define DEFAULT_ASM_ENDIAN " -mbig" +#define LINK_OS_LINUX_EMUL ENDIAN_SELECT(" -m elf32ppclinux", \ + " -m elf32lppclinux", \ + " -m elf32ppclinux") +#endif + +#undef LINK_OS_LINUX_SPEC +#define LINK_OS_LINUX_SPEC LINK_OS_LINUX_EMUL " %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}" + +/* For backward compatibility, we must continue to use the AIX + structure return convention. */ +#undef DRAFT_V4_STRUCT_RET +#define DRAFT_V4_STRUCT_RET 1 + +/* We are 32-bit all the time, so optimize a little. */ +#undef TARGET_64BIT +#define TARGET_64BIT 0 + +/* We don't need to generate entries in .fixup, except when + -mrelocatable or -mrelocatable-lib is given. */ +#undef RELOCATABLE_NEEDS_FIXUP +#define RELOCATABLE_NEEDS_FIXUP \ + (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE) + +#ifdef TARGET_LIBC_PROVIDES_SSP +/* ppc32 glibc provides __stack_chk_guard in -0x7008(2). */ +#define TARGET_THREAD_SSP_OFFSET -0x7008 +#endif + +#define POWERPC_LINUX + +/* ppc linux has 128-bit long double support in glibc 2.4 and later. */ +#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 +#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128 +#endif + +/* Static stack checking is supported by means of probes. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +/* Software floating point support for exceptions and rounding modes + depends on the C library in use. */ +#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P +#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ + rs6000_linux_float_exceptions_rounding_supported_p + +/* Support for TARGET_ATOMIC_ASSIGN_EXPAND_FENV without FPRs depends + on glibc 2.19 or greater. */ +#if TARGET_GLIBC_MAJOR > 2 \ + || (TARGET_GLIBC_MAJOR == 2 && TARGET_GLIBC_MINOR >= 19) +#define RS6000_GLIBC_ATOMIC_FENV 1 +#endif diff --git a/gcc/config/powerpcspe/linux64.h b/gcc/config/powerpcspe/linux64.h new file mode 100644 index 000000000000..71e35b709adf --- /dev/null +++ b/gcc/config/powerpcspe/linux64.h @@ -0,0 +1,642 @@ +/* Definitions of target machine for GNU compiler, + for 64 bit PowerPC linux. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef RS6000_BI_ARCH + +#undef TARGET_64BIT +#define TARGET_64BIT 1 + +#define DEFAULT_ARCH64_P 1 +#define RS6000_BI_ARCH_P 0 + +#else + +#define DEFAULT_ARCH64_P (TARGET_DEFAULT & MASK_64BIT) +#define RS6000_BI_ARCH_P 1 + +#endif + +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +#ifdef __powerpc64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif + +#undef TARGET_AIX +#define TARGET_AIX TARGET_64BIT + +#ifdef HAVE_LD_NO_DOT_SYMS +/* New ABI uses a local sym for the function entry point. */ +extern int dot_symbols; +#undef DOT_SYMBOLS +#define DOT_SYMBOLS dot_symbols +#endif + +#define TARGET_PROFILE_KERNEL profile_kernel + +#undef TARGET_KEEP_LEAF_WHEN_PROFILED +#define TARGET_KEEP_LEAF_WHEN_PROFILED rs6000_keep_leaf_when_profiled + +#define TARGET_USES_LINUX64_OPT 1 +#ifdef HAVE_LD_LARGE_TOC +#undef TARGET_CMODEL +#define TARGET_CMODEL rs6000_current_cmodel +#define SET_CMODEL(opt) rs6000_current_cmodel = opt +#else +#define SET_CMODEL(opt) do {} while (0) +#endif + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_POWER7 +#undef PROCESSOR_DEFAULT64 +#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8 + +/* We don't need to generate entries in .fixup, except when + -mrelocatable or -mrelocatable-lib is given. */ +#undef RELOCATABLE_NEEDS_FIXUP +#define RELOCATABLE_NEEDS_FIXUP \ + (rs6000_isa_flags & rs6000_isa_flags_explicit & OPTION_MASK_RELOCATABLE) + +#undef RS6000_ABI_NAME +#define RS6000_ABI_NAME "linux" + +#define INVALID_64BIT "-m%s not supported in this configuration" +#define INVALID_32BIT INVALID_64BIT + +#ifdef LINUX64_DEFAULT_ABI_ELFv2 +#define ELFv2_ABI_CHECK (rs6000_elf_abi != 1) +#else +#define ELFv2_ABI_CHECK (rs6000_elf_abi == 2) +#endif + +#undef SUBSUBTARGET_OVERRIDE_OPTIONS +#define SUBSUBTARGET_OVERRIDE_OPTIONS \ + do \ + { \ + if (!global_options_set.x_rs6000_alignment_flags) \ + rs6000_alignment_flags = MASK_ALIGN_NATURAL; \ + if (rs6000_isa_flags & OPTION_MASK_64BIT) \ + { \ + if (DEFAULT_ABI != ABI_AIX) \ + { \ + rs6000_current_abi = ABI_AIX; \ + error (INVALID_64BIT, "call"); \ + } \ + dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \ + if (ELFv2_ABI_CHECK) \ + { \ + rs6000_current_abi = ABI_ELFv2; \ + if (dot_symbols) \ + error ("-mcall-aixdesc incompatible with -mabi=elfv2"); \ + } \ + if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \ + { \ + rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \ + error (INVALID_64BIT, "relocatable"); \ + } \ + if (rs6000_isa_flags & OPTION_MASK_EABI) \ + { \ + rs6000_isa_flags &= ~OPTION_MASK_EABI; \ + error (INVALID_64BIT, "eabi"); \ + } \ + if (TARGET_PROTOTYPE) \ + { \ + target_prototype = 0; \ + error (INVALID_64BIT, "prototype"); \ + } \ + if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + error ("-m64 requires a PowerPC64 cpu"); \ + } \ + if ((rs6000_isa_flags_explicit \ + & OPTION_MASK_MINIMAL_TOC) != 0) \ + { \ + if (global_options_set.x_rs6000_current_cmodel \ + && rs6000_current_cmodel != CMODEL_SMALL) \ + error ("-mcmodel incompatible with other toc options"); \ + SET_CMODEL (CMODEL_SMALL); \ + } \ + else \ + { \ + if (!global_options_set.x_rs6000_current_cmodel) \ + SET_CMODEL (CMODEL_MEDIUM); \ + if (rs6000_current_cmodel != CMODEL_SMALL) \ + { \ + if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \ + TARGET_NO_FP_IN_TOC \ + = rs6000_current_cmodel == CMODEL_MEDIUM; \ + if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \ + TARGET_NO_SUM_IN_TOC = 0; \ + } \ + } \ + } \ + else \ + { \ + if (!RS6000_BI_ARCH_P) \ + error (INVALID_32BIT, "32"); \ + if (TARGET_PROFILE_KERNEL) \ + { \ + TARGET_PROFILE_KERNEL = 0; \ + error (INVALID_32BIT, "profile-kernel"); \ + } \ + if (global_options_set.x_rs6000_current_cmodel) \ + { \ + SET_CMODEL (CMODEL_SMALL); \ + error (INVALID_32BIT, "cmodel"); \ + } \ + } \ + } \ + while (0) + +#undef ASM_DEFAULT_SPEC +#undef ASM_SPEC +#undef LINK_OS_LINUX_SPEC +#undef LINK_SECURE_PLT_SPEC + +#ifndef RS6000_BI_ARCH +#define ASM_DEFAULT_SPEC "-mppc64" +#define ASM_SPEC "%(asm_spec64) %(asm_spec_common)" +#define LINK_OS_LINUX_SPEC "%(link_os_linux_spec64)" +#define LINK_SECURE_PLT_SPEC "" +#else +#if DEFAULT_ARCH64_P +#define ASM_DEFAULT_SPEC "-mppc%{!m32:64}" +#define ASM_SPEC "%{m32:%(asm_spec32)}%{!m32:%(asm_spec64)} %(asm_spec_common)" +#define LINK_OS_LINUX_SPEC "%{m32:%(link_os_linux_spec32)}%{!m32:%(link_os_linux_spec64)}" +#define LINK_SECURE_PLT_SPEC "%{m32: " LINK_SECURE_PLT_DEFAULT_SPEC "}" +#else +#define ASM_DEFAULT_SPEC "-mppc%{m64:64}" +#define ASM_SPEC "%{!m64:%(asm_spec32)}%{m64:%(asm_spec64)} %(asm_spec_common)" +#define LINK_OS_LINUX_SPEC "%{!m64:%(link_os_linux_spec32)}%{m64:%(link_os_linux_spec64)}" +#define LINK_SECURE_PLT_SPEC "%{!m64: " LINK_SECURE_PLT_DEFAULT_SPEC "}" +#endif +#endif + +#define ASM_SPEC32 "-a32 \ +%{mrelocatable} %{mrelocatable-lib} %{" FPIE_OR_FPIC_SPEC ":-K PIC} \ +%{memb|msdata=eabi: -memb}" + +#define ASM_SPEC64 "-a64" + +#define ASM_SPEC_COMMON "%(asm_cpu) \ +%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}}" \ + ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) + +#undef SUBSUBTARGET_EXTRA_SPECS +#define SUBSUBTARGET_EXTRA_SPECS \ + { "asm_spec_common", ASM_SPEC_COMMON }, \ + { "asm_spec32", ASM_SPEC32 }, \ + { "asm_spec64", ASM_SPEC64 }, \ + { "link_os_linux_spec32", LINK_OS_LINUX_SPEC32 }, \ + { "link_os_linux_spec64", LINK_OS_LINUX_SPEC64 }, \ + { "link_os_extra_spec32", LINK_OS_EXTRA_SPEC32 }, \ + { "link_os_extra_spec64", LINK_OS_EXTRA_SPEC64 }, \ + { "link_os_new_dtags", LINK_OS_NEW_DTAGS_SPEC }, \ + { "include_extra", INCLUDE_EXTRA_SPEC }, \ + { "dynamic_linker_prefix", DYNAMIC_LINKER_PREFIX }, + +/* Optional specs used for overriding the system include directory, default + -rpath links, and prefix for the dynamic linker. Normally, there are not + defined, but if the user configure with the --with-advance-toolchain=<xxx> + option, the advance-toolchain.h file will override these. */ +#ifndef INCLUDE_EXTRA_SPEC +#define INCLUDE_EXTRA_SPEC "" +#endif + +#ifndef LINK_OS_EXTRA_SPEC32 +#define LINK_OS_EXTRA_SPEC32 "" +#endif + +#ifndef LINK_OS_EXTRA_SPEC64 +#define LINK_OS_EXTRA_SPEC64 "" +#endif + +#ifndef LINK_OS_NEW_DTAGS_SPEC +#define LINK_OS_NEW_DTAGS_SPEC "" +#endif + +#ifndef DYNAMIC_LINKER_PREFIX +#define DYNAMIC_LINKER_PREFIX "" +#endif + +#undef MULTILIB_DEFAULTS +#if DEFAULT_ARCH64_P +#define MULTILIB_DEFAULTS { "m64" } +#else +#define MULTILIB_DEFAULTS { "m32" } +#endif + +/* Split stack is only supported for 64 bit, and requires glibc >= 2.18. */ +#if TARGET_GLIBC_MAJOR * 1000 + TARGET_GLIBC_MINOR >= 2018 +# ifndef RS6000_BI_ARCH +# define TARGET_CAN_SPLIT_STACK +# else +# if DEFAULT_ARCH64_P +/* Supported, and the default is -m64 */ +# define TARGET_CAN_SPLIT_STACK_64BIT 1 +# else +/* Supported, and the default is -m32 */ +# define TARGET_CAN_SPLIT_STACK_64BIT 0 +# endif +# endif +#endif + +#ifndef RS6000_BI_ARCH + +/* 64-bit PowerPC Linux always has a TOC. */ +#undef TARGET_TOC +#define TARGET_TOC 1 + +/* Some things from sysv4.h we don't do when 64 bit. */ +#undef OPTION_RELOCATABLE +#define OPTION_RELOCATABLE 0 +#undef OPTION_EABI +#define OPTION_EABI 0 +#undef OPTION_PROTOTYPE +#define OPTION_PROTOTYPE 0 +#undef RELOCATABLE_NEEDS_FIXUP +#define RELOCATABLE_NEEDS_FIXUP 0 + +#endif + +/* We use glibc _mcount for profiling. */ +#define NO_PROFILE_COUNTERS 1 +#define PROFILE_HOOK(LABEL) \ + do { if (TARGET_64BIT) output_profile_hook (LABEL); } while (0) + +/* PowerPC64 Linux word-aligns FP doubles when -malign-power is given. */ +#undef ADJUST_FIELD_ALIGN +#define ADJUST_FIELD_ALIGN(FIELD, TYPE, COMPUTED) \ + (rs6000_special_adjust_field_align_p ((TYPE), (COMPUTED)) \ + ? 128 \ + : (TARGET_64BIT \ + && TARGET_ALIGN_NATURAL == 0 \ + && TYPE_MODE (strip_array_types (TYPE)) == DFmode) \ + ? MIN ((COMPUTED), 32) \ + : (COMPUTED)) + +/* PowerPC64 Linux increases natural record alignment to doubleword if + the first field is an FP double, only if in power alignment mode. */ +#undef ROUND_TYPE_ALIGN +#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) \ + ((TARGET_64BIT \ + && (TREE_CODE (STRUCT) == RECORD_TYPE \ + || TREE_CODE (STRUCT) == UNION_TYPE \ + || TREE_CODE (STRUCT) == QUAL_UNION_TYPE) \ + && TARGET_ALIGN_NATURAL == 0) \ + ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED) \ + : MAX ((COMPUTED), (SPECIFIED))) + +/* Use the default for compiling target libs. */ +#ifdef IN_TARGET_LIBS +#undef TARGET_ALIGN_NATURAL +#define TARGET_ALIGN_NATURAL 1 +#endif + +/* Indicate that jump tables go in the text section. */ +#undef JUMP_TABLES_IN_TEXT_SECTION +#define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT + +/* The linux ppc64 ABI isn't explicit on whether aggregates smaller + than a doubleword should be padded upward or downward. You could + reasonably assume that they follow the normal rules for structure + layout treating the parameter area as any other block of memory, + then map the reg param area to registers. i.e. pad upward. + Setting both of the following defines results in this behavior. + Setting just the first one will result in aggregates that fit in a + doubleword being padded downward, and others being padded upward. + Not a bad idea as this results in struct { int x; } being passed + the same way as an int. */ +#define AGGREGATE_PADDING_FIXED TARGET_64BIT +#define AGGREGATES_PAD_UPWARD_ALWAYS 0 + +/* Specify padding for the last element of a block move between + registers and memory. FIRST is nonzero if this is the only + element. */ +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE)) + +/* Linux doesn't support saving and restoring 64-bit regs in a 32-bit + process. */ +#define OS_MISSING_POWERPC64 !TARGET_64BIT + +#ifdef SINGLE_LIBC +#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) +#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) +#undef OPTION_MUSL +#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL) +#else +#define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) +#define OPTION_BIONIC (linux_libc == LIBC_BIONIC) +#undef OPTION_MUSL +#define OPTION_MUSL (linux_libc == LIBC_MUSL) +#endif + +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + if (TARGET_64BIT) \ + { \ + builtin_define ("__PPC__"); \ + builtin_define ("__PPC64__"); \ + builtin_define ("__powerpc__"); \ + builtin_define ("__powerpc64__"); \ + if (!DOT_SYMBOLS) \ + builtin_define ("_CALL_LINUX"); \ + builtin_assert ("cpu=powerpc64"); \ + builtin_assert ("machine=powerpc64"); \ + } \ + else \ + { \ + builtin_define_std ("PPC"); \ + builtin_define_std ("powerpc"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + } \ + while (0) + +#undef CPP_OS_DEFAULT_SPEC +#define CPP_OS_DEFAULT_SPEC "%(cpp_os_linux) %(include_extra)" + +#undef LINK_SHLIB_SPEC +#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}" + +#undef LIB_DEFAULT_SPEC +#define LIB_DEFAULT_SPEC "%(lib_linux)" + +#undef STARTFILE_DEFAULT_SPEC +#define STARTFILE_DEFAULT_SPEC "%(startfile_linux)" + +#undef ENDFILE_DEFAULT_SPEC +#define ENDFILE_DEFAULT_SPEC "%(endfile_linux)" + +#undef LINK_START_DEFAULT_SPEC +#define LINK_START_DEFAULT_SPEC "%(link_start_linux)" + +#undef LINK_OS_DEFAULT_SPEC +#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)" + +#define GLIBC_DYNAMIC_LINKER32 "%(dynamic_linker_prefix)/lib/ld.so.1" + +#ifdef LINUX64_DEFAULT_ABI_ELFv2 +#define GLIBC_DYNAMIC_LINKER64 \ +"%{mabi=elfv1:%(dynamic_linker_prefix)/lib64/ld64.so.1;" \ +":%(dynamic_linker_prefix)/lib64/ld64.so.2}" +#else +#define GLIBC_DYNAMIC_LINKER64 \ +"%{mabi=elfv2:%(dynamic_linker_prefix)/lib64/ld64.so.2;" \ +":%(dynamic_linker_prefix)/lib64/ld64.so.1}" +#endif + +#define MUSL_DYNAMIC_LINKER32 \ + "/lib/ld-musl-powerpc" MUSL_DYNAMIC_LINKER_E "%{msoft-float:-sf}.so.1" +#define MUSL_DYNAMIC_LINKER64 \ + "/lib/ld-musl-powerpc64" MUSL_DYNAMIC_LINKER_E "%{msoft-float:-sf}.so.1" + +#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0" +#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0" +#if DEFAULT_LIBC == LIBC_UCLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U, M) \ + "%{mglibc:" G ";:%{mmusl:" M ";:" U "}}" +#elif DEFAULT_LIBC == LIBC_GLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U, M) \ + "%{muclibc:" U ";:%{mmusl:" M ";:" G "}}" +#elif DEFAULT_LIBC == LIBC_MUSL +#define CHOOSE_DYNAMIC_LINKER(G, U, M) \ + "%{mglibc:" G ";:%{muclibc:" U ";:" M "}}" +#else +#error "Unsupported DEFAULT_LIBC" +#endif +#define GNU_USER_DYNAMIC_LINKER32 \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \ + MUSL_DYNAMIC_LINKER32) +#define GNU_USER_DYNAMIC_LINKER64 \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \ + MUSL_DYNAMIC_LINKER64) + +#undef DEFAULT_ASM_ENDIAN +#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN) +#define DEFAULT_ASM_ENDIAN " -mlittle" +#define LINK_OS_LINUX_EMUL32 ENDIAN_SELECT(" -m elf32ppclinux", \ + " -m elf32lppclinux", \ + " -m elf32lppclinux") +#define LINK_OS_LINUX_EMUL64 ENDIAN_SELECT(" -m elf64ppc", \ + " -m elf64lppc", \ + " -m elf64lppc") +#else +#define DEFAULT_ASM_ENDIAN " -mbig" +#define LINK_OS_LINUX_EMUL32 ENDIAN_SELECT(" -m elf32ppclinux", \ + " -m elf32lppclinux", \ + " -m elf32ppclinux") +#define LINK_OS_LINUX_EMUL64 ENDIAN_SELECT(" -m elf64ppc", \ + " -m elf64lppc", \ + " -m elf64ppc") +#endif + +#define LINK_OS_LINUX_SPEC32 LINK_OS_LINUX_EMUL32 " %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER32 "}} \ + %(link_os_extra_spec32)" + +#define LINK_OS_LINUX_SPEC64 LINK_OS_LINUX_EMUL64 " %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "}} \ + %(link_os_extra_spec64)" + +#undef TOC_SECTION_ASM_OP +#define TOC_SECTION_ASM_OP \ + (TARGET_64BIT \ + ? "\t.section\t\".toc\",\"aw\"" \ + : "\t.section\t\".got\",\"aw\"") + +#undef MINIMAL_TOC_SECTION_ASM_OP +#define MINIMAL_TOC_SECTION_ASM_OP \ + (TARGET_64BIT \ + ? "\t.section\t\".toc1\",\"aw\"" \ + : (flag_pic \ + ? "\t.section\t\".got2\",\"aw\"" \ + : "\t.section\t\".got1\",\"aw\"")) + +/* Must be at least as big as our pointer type. */ +#undef SIZE_TYPE +#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int") + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int") + +#undef WCHAR_TYPE +#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int") +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef RS6000_MCOUNT +#define RS6000_MCOUNT "_mcount" + +#ifdef __powerpc64__ +/* _init and _fini functions are built from bits spread across many + object files, each potentially with a different TOC pointer. For + that reason, place a nop after the call so that the linker can + restore the TOC pointer if a TOC adjusting call stub is needed. */ +#if DOT_SYMBOLS +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ +" bl ." #FUNC "\n" \ +" nop\n" \ +" .previous"); +#else +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ +" bl " #FUNC "\n" \ +" nop\n" \ +" .previous"); +#endif +#endif + +/* FP save and restore routines. */ +#undef SAVE_FP_PREFIX +#define SAVE_FP_PREFIX (TARGET_64BIT ? "._savef" : "_savefpr_") +#undef SAVE_FP_SUFFIX +#define SAVE_FP_SUFFIX "" +#undef RESTORE_FP_PREFIX +#define RESTORE_FP_PREFIX (TARGET_64BIT ? "._restf" : "_restfpr_") +#undef RESTORE_FP_SUFFIX +#define RESTORE_FP_SUFFIX "" + +/* Dwarf2 debugging. */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* This is how to declare the size of a function. */ +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + if (!flag_inhibit_size_directive) \ + { \ + fputs ("\t.size\t", (FILE)); \ + if (TARGET_64BIT && DOT_SYMBOLS) \ + putc ('.', (FILE)); \ + assemble_name ((FILE), (FNAME)); \ + fputs (",.-", (FILE)); \ + rs6000_output_function_entry (FILE, FNAME); \ + putc ('\n', (FILE)); \ + } \ + } \ + while (0) + +/* Return nonzero if this entry is to be written into the constant + pool in a special way. We do so if this is a SYMBOL_REF, LABEL_REF + or a CONST containing one of them. If -mfp-in-toc (the default), + we also do this for floating-point constants. We actually can only + do this if the FP formats of the target and host machines are the + same, but we can't check that since not every file that uses + the macros includes real.h. We also do this when we can write the + entry into the TOC and the entry is not larger than a TOC entry. */ + +#undef ASM_OUTPUT_SPECIAL_POOL_ENTRY_P +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) \ + (TARGET_TOC \ + && (GET_CODE (X) == SYMBOL_REF \ + || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF) \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST_INT \ + && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode)) \ + || (GET_CODE (X) == CONST_DOUBLE \ + && ((TARGET_64BIT \ + && (TARGET_MINIMAL_TOC \ + || (SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && ! TARGET_NO_FP_IN_TOC))) \ + || (!TARGET_64BIT \ + && !TARGET_NO_FP_IN_TOC \ + && SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && BITS_PER_WORD == HOST_BITS_PER_INT))))) + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. */ +#undef ASM_PREFERRED_EH_DATA_FORMAT +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (TARGET_64BIT || flag_pic \ + ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel \ + | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4)) \ + : DW_EH_PE_absptr) + +/* For backward compatibility, we must continue to use the AIX + structure return convention. */ +#undef DRAFT_V4_STRUCT_RET +#define DRAFT_V4_STRUCT_RET (!TARGET_64BIT) + +#ifdef TARGET_LIBC_PROVIDES_SSP +/* ppc32 glibc provides __stack_chk_guard in -0x7008(2), + ppc64 glibc provides it at -0x7010(13). */ +#define TARGET_THREAD_SSP_OFFSET (TARGET_64BIT ? -0x7010 : -0x7008) +#endif + +#define POWERPC_LINUX + +/* ppc{32,64} linux has 128-bit long double support in glibc 2.4 and later. */ +#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 +#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128 +#endif + +/* Static stack checking is supported by means of probes. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +/* The default value isn't sufficient in 64-bit mode. */ +#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024) + +/* Software floating point support for exceptions and rounding modes + depends on the C library in use. */ +#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P +#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ + rs6000_linux_float_exceptions_rounding_supported_p + +/* Support for TARGET_ATOMIC_ASSIGN_EXPAND_FENV without FPRs depends + on glibc 2.19 or greater. */ +#if TARGET_GLIBC_MAJOR > 2 \ + || (TARGET_GLIBC_MAJOR == 2 && TARGET_GLIBC_MINOR >= 19) +#define RS6000_GLIBC_ATOMIC_FENV 1 +#endif + +/* The IEEE 128-bit emulator is only built on Linux systems. Flag that we + should enable the type handling for KFmode on VSX systems even if we are not + enabling the __float128 keyword. */ +#undef TARGET_FLOAT128_ENABLE_TYPE +#define TARGET_FLOAT128_ENABLE_TYPE 1 diff --git a/gcc/config/powerpcspe/linux64.opt b/gcc/config/powerpcspe/linux64.opt new file mode 100644 index 000000000000..b0ca976fe184 --- /dev/null +++ b/gcc/config/powerpcspe/linux64.opt @@ -0,0 +1,42 @@ +; Options for 64-bit PowerPC Linux. +; +; Copyright (C) 2005-2017 Free Software Foundation, Inc. +; Contributed by Aldy Hernandez <aldy@quesejoda.com>. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +mprofile-kernel +Target Report Var(profile_kernel) Save +Call mcount for profiling before a function prologue. + +mcmodel= +Target RejectNegative Joined Enum(rs6000_cmodel) Var(rs6000_current_cmodel) +Select code model. + +Enum +Name(rs6000_cmodel) Type(enum rs6000_cmodel) +Known code models (for use with the -mcmodel= option): + +EnumValue +Enum(rs6000_cmodel) String(small) Value(CMODEL_SMALL) + +EnumValue +Enum(rs6000_cmodel) String(medium) Value(CMODEL_MEDIUM) + +EnumValue +Enum(rs6000_cmodel) String(large) Value(CMODEL_LARGE) + diff --git a/gcc/config/powerpcspe/linuxaltivec.h b/gcc/config/powerpcspe/linuxaltivec.h new file mode 100644 index 000000000000..fddb18d8e229 --- /dev/null +++ b/gcc/config/powerpcspe/linuxaltivec.h @@ -0,0 +1,32 @@ +/* Definitions of target machine for GNU compiler, + for AltiVec enhanced PowerPC machines running GNU/Linux. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (aldyh@redhat.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Override rs6000.h and sysv4.h definition. */ +#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN) +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_ALTIVEC | MASK_LITTLE_ENDIAN) +#else +#undef TARGET_DEFAULT +#define TARGET_DEFAULT MASK_ALTIVEC +#endif + +#undef SUBSUBTARGET_OVERRIDE_OPTIONS +#define SUBSUBTARGET_OVERRIDE_OPTIONS rs6000_altivec_abi = 1 diff --git a/gcc/config/powerpcspe/linuxspe.h b/gcc/config/powerpcspe/linuxspe.h new file mode 100644 index 000000000000..92efabfe664a --- /dev/null +++ b/gcc/config/powerpcspe/linuxspe.h @@ -0,0 +1,32 @@ +/* Definitions of target machine for GNU compiler, + for PowerPC e500 machines running GNU/Linux. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (aldy@quesejoda.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Override rs6000.h and sysv4.h definition. */ +#if (TARGET_DEFAULT & MASK_LITTLE_ENDIAN) +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_STRICT_ALIGN | MASK_LITTLE_ENDIAN) +#else +#undef TARGET_DEFAULT +#define TARGET_DEFAULT MASK_STRICT_ALIGN +#endif + +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc -mspe -me500" diff --git a/gcc/config/powerpcspe/lynx.h b/gcc/config/powerpcspe/lynx.h new file mode 100644 index 000000000000..2f9356f969d2 --- /dev/null +++ b/gcc/config/powerpcspe/lynx.h @@ -0,0 +1,120 @@ +/* Definitions for Rs6000 running LynxOS. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + Contributed by David Henkel-Wallace, Cygnus Support (gumby@cygnus.com) + Rewritten by Adam Nemet, LynuxWorks Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Undefine the definition to enable the LynxOS default from the + top-level lynx.h. */ + +#undef SUBTARGET_EXTRA_SPECS + +/* Get rid off the spec definitions from rs6000/sysv4.h. */ + +#undef CPP_SPEC +#define CPP_SPEC \ +"%{msoft-float: -D_SOFT_FLOAT} \ + %(cpp_cpu) \ + %(cpp_os_lynx)" + +/* LynxOS only supports big-endian on PPC so we override the + definition from sysv4.h. Since the LynxOS 4.0 compiler was set to + return every structure in memory regardless of their size we have + to emulate the same behavior here with disabling the SVR4 structure + returning. */ + +#undef CC1_SPEC +#define CC1_SPEC \ +"%{G*} %{mno-sdata:-msdata=none} \ + %{maltivec:-mabi=altivec} \ + -maix-struct-return" + +#undef ASM_SPEC +#define ASM_SPEC \ +"%(asm_cpu) \ + %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}}" + +#undef STARTFILE_SPEC +#undef ENDFILE_SPEC +#undef LIB_SPEC +#undef LINK_SPEC +#define LINK_SPEC \ +"%{!msdata=none:%{G*}} %{msdata=none:-G0} \ + %(link_os_lynx)" + +/* Override the definition from sysv4.h. */ + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__BIG_ENDIAN__"); \ + builtin_define ("__powerpc__"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + builtin_define ("__PPC__"); \ + } \ + while (0) + +/* Override the rs6000.h definition. */ + +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +/* Override the rs6000.h definition. */ + +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" + +/* LynxOS does not do anything with .fixup plus let's not create + writable section for linkonce.r and linkonce.t. */ + +#undef RELOCATABLE_NEEDS_FIXUP + +/* Override these from rs6000.h with the generic definition. */ + +#undef SIZE_TYPE +#undef ASM_OUTPUT_ALIGN +#undef PREFERRED_DEBUGGING_TYPE + +/* The file rs6000.c defines TARGET_HAVE_TLS unconditionally to the + value of HAVE_AS_TLS. HAVE_AS_TLS is true as gas support for TLS + is detected by configure. Override the definition to false. */ + +#undef HAVE_AS_TLS +#define HAVE_AS_TLS 0 + +/* Use standard DWARF numbering for DWARF debugging information. */ +#define RS6000_USE_DWARF_NUMBERING + +#ifdef CRT_BEGIN +/* This function is part of crtbegin*.o which is at the beginning of + the link and is called from .fini which is usually toward the end + of the executable. Make it longcall so that we don't limit the + text size of the executables to 32M. */ + +static void __do_global_dtors_aux (void) __attribute__ ((longcall)); +#endif /* CRT_BEGIN */ + +#ifdef CRT_END +/* Similarly here. This function resides in crtend*.o which is toward + to end of the link and is called from .init which is at the + beginning. */ + +static void __do_global_ctors_aux (void) __attribute__ ((longcall)); +#endif /* CRT_END */ diff --git a/gcc/config/powerpcspe/milli.exp b/gcc/config/powerpcspe/milli.exp new file mode 100644 index 000000000000..ea3a2b757fe3 --- /dev/null +++ b/gcc/config/powerpcspe/milli.exp @@ -0,0 +1,7 @@ +#! +__mulh 0x3100 +__mull 0x3180 +__divss 0x3200 +__divus 0x3280 +__quoss 0x3300 +__quous 0x3380 diff --git a/gcc/config/powerpcspe/mpc.md b/gcc/config/powerpcspe/mpc.md new file mode 100644 index 000000000000..d302b73a0ea8 --- /dev/null +++ b/gcc/config/powerpcspe/mpc.md @@ -0,0 +1,112 @@ +;; Scheduling description for Motorola PowerPC processor cores. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "mpc,mpcfp") +(define_cpu_unit "iu_mpc,mciu_mpc" "mpc") +(define_cpu_unit "fpu_mpc" "mpcfp") +(define_cpu_unit "lsu_mpc,bpu_mpc" "mpc") + +;; MPCCORE 32-bit SCIU, MCIU, LSU, FPU, BPU +;; 505/801/821/823 + +(define_insn_reservation "mpccore-load" 2 + (and (eq_attr "type" "load,load_l,store_c,sync") + (eq_attr "cpu" "mpccore")) + "lsu_mpc") + +(define_insn_reservation "mpccore-store" 2 + (and (eq_attr "type" "store,fpstore") + (eq_attr "cpu" "mpccore")) + "lsu_mpc") + +(define_insn_reservation "mpccore-fpload" 2 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "mpccore")) + "lsu_mpc") + +(define_insn_reservation "mpccore-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "mpccore")) + "iu_mpc") + +(define_insn_reservation "mpccore-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "mpccore")) + "iu_mpc,iu_mpc") + +(define_insn_reservation "mpccore-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "mpccore")) + "iu_mpc,iu_mpc,iu_mpc") + +(define_insn_reservation "mpccore-imul" 2 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "mpccore")) + "mciu_mpc") + +; Divide latency varies greatly from 2-11, use 6 as average +(define_insn_reservation "mpccore-idiv" 6 + (and (eq_attr "type" "div") + (eq_attr "cpu" "mpccore")) + "mciu_mpc*6") + +(define_insn_reservation "mpccore-compare" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "mpccore")) + "iu_mpc,nothing,bpu_mpc") + +(define_insn_reservation "mpccore-fpcompare" 2 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "mpccore")) + "fpu_mpc,bpu_mpc") + +(define_insn_reservation "mpccore-fp" 4 + (and (eq_attr "type" "fp,fpsimple") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*2") + +(define_insn_reservation "mpccore-dmul" 5 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*5") + +(define_insn_reservation "mpccore-sdiv" 10 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*10") + +(define_insn_reservation "mpccore-ddiv" 17 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*17") + +(define_insn_reservation "mpccore-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "mpccore")) + "bpu_mpc") + +(define_insn_reservation "mpccore-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr,mfcr,mtcr,isync") + (eq_attr "cpu" "mpccore")) + "bpu_mpc") + diff --git a/gcc/config/powerpcspe/netbsd.h b/gcc/config/powerpcspe/netbsd.h new file mode 100644 index 000000000000..3eb8baa5ffdf --- /dev/null +++ b/gcc/config/powerpcspe/netbsd.h @@ -0,0 +1,92 @@ +/* Definitions of target machine for GNU compiler, + for PowerPC NetBSD systems. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Wasabi Systems, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_OS_CPP_BUILTINS /* FIXME: sysv4.h should not define this! */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + builtin_define ("__powerpc__"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + } \ + while (0) + +/* Override the default from rs6000.h to avoid conflicts with macros + defined in NetBSD header files. */ + +#undef RS6000_CPU_CPP_ENDIAN_BUILTINS +#define RS6000_CPU_CPP_ENDIAN_BUILTINS() \ + do \ + { \ + if (BYTES_BIG_ENDIAN) \ + { \ + builtin_define ("__BIG_ENDIAN__"); \ + builtin_assert ("machine=bigendian"); \ + } \ + else \ + { \ + builtin_define ("__LITTLE_ENDIAN__"); \ + builtin_assert ("machine=littleendian"); \ + } \ + } \ + while (0) + +/* Make GCC agree with <machine/ansi.h>. */ + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* Undo the spec mess from sysv4.h, and just define the specs + the way NetBSD systems actually expect. */ + +#undef CPP_SPEC +#define CPP_SPEC NETBSD_CPP_SPEC + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{!msdata=none:%{G*}} %{msdata=none:-G0} \ + %(netbsd_link_spec)" + +#define NETBSD_ENTRY_POINT "_start" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC NETBSD_STARTFILE_SPEC + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "%(netbsd_endfile_spec)" + +#undef LIB_SPEC +#define LIB_SPEC NETBSD_LIB_SPEC + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, \ + { "netbsd_endfile_spec", NETBSD_ENDFILE_SPEC }, + + +/* Use standard DWARF numbering for DWARF debugging information. */ +#define RS6000_USE_DWARF_NUMBERING + diff --git a/gcc/config/powerpcspe/option-defaults.h b/gcc/config/powerpcspe/option-defaults.h new file mode 100644 index 000000000000..09ddc36a16d0 --- /dev/null +++ b/gcc/config/powerpcspe/option-defaults.h @@ -0,0 +1,64 @@ +/* Definitions of default options for config/rs6000 configurations. + Copyright (C) 1992-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This header needs to be included after any other headers affecting + TARGET_DEFAULT. */ + +#if TARGET_AIX_OS +#define OPT_64 "maix64" +#define OPT_32 "maix32" +#else +#define OPT_64 "m64" +#define OPT_32 "m32" +#endif + +#ifndef OPTION_MASK_64BIT +#define OPTION_MASK_64BIT 0 +#define MASK_64BIT 0 +#endif + +#if TARGET_DEFAULT & OPTION_MASK_64BIT +#define OPT_ARCH64 "!" OPT_32 +#define OPT_ARCH32 OPT_32 +#else +#define OPT_ARCH64 OPT_64 +#define OPT_ARCH32 "!" OPT_64 +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-cpu is ignored if -mcpu is specified; likewise --with-cpu-32 + and --with-cpu-64. + --with-tune is ignored if -mtune or -mcpu is specified; likewise + --with-tune-32 and --with-tune-64. + --with-float is ignored if -mhard-float or -msoft-float are + specified. */ +#define OPTION_DEFAULT_SPECS \ + {"abi", "%{!mabi=elfv*:-mabi=%(VALUE)}" }, \ + {"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \ + {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \ + {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \ + {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \ + {"cpu_32", "%{" OPT_ARCH32 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"cpu_64", "%{" OPT_ARCH64 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" } diff --git a/gcc/config/powerpcspe/paired.h b/gcc/config/powerpcspe/paired.h new file mode 100644 index 000000000000..e51162bf8e20 --- /dev/null +++ b/gcc/config/powerpcspe/paired.h @@ -0,0 +1,75 @@ +/* PowerPC 750CL user include file. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + Contributed by Revital Eres (eres@il.ibm.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _PAIRED_H +#define _PAIRED_H + +#define vector __attribute__((vector_size(8))) + +#define paired_msub __builtin_paired_msub +#define paired_madd __builtin_paired_madd +#define paired_nmsub __builtin_paired_nmsub +#define paired_nmadd __builtin_paired_nmadd +#define paired_sum0 __builtin_paired_sum0 +#define paired_sum1 __builtin_paired_sum1 +#define paired_div __builtin_paired_divv2sf3 +#define paired_add __builtin_paired_addv2sf3 +#define paired_sub __builtin_paired_subv2sf3 +#define paired_mul __builtin_paired_mulv2sf3 +#define paired_muls0 __builtin_paired_muls0 +#define paired_muls1 __builtin_paired_muls1 +#define paired_madds0 __builtin_paired_madds0 +#define paired_madds1 __builtin_paired_madds1 +#define paired_merge00 __builtin_paired_merge00 +#define paired_merge01 __builtin_paired_merge01 +#define paired_merge10 __builtin_paired_merge10 +#define paired_merge11 __builtin_paired_merge11 +#define paired_abs __builtin_paired_absv2sf2 +#define paired_nabs __builtin_paired_nabsv2sf2 +#define paired_neg __builtin_paired_negv2sf2 +#define paired_sqrt __builtin_paired_sqrtv2sf2 +#define paired_res __builtin_paired_resv2sf2 +#define paired_stx __builtin_paired_stx +#define paired_lx __builtin_paired_lx +#define paired_cmpu0 __builtin_paired_cmpu0 +#define paired_cmpu1 __builtin_paired_cmpu1 +#define paired_sel __builtin_paired_selv2sf4 + +/* Condition register codes for Paired predicates. */ +#define LT 0 +#define GT 1 +#define EQ 2 +#define UN 3 + +#define paired_cmpu0_un(a,b) __builtin_paired_cmpu0 (UN, (a), (b)) +#define paired_cmpu0_eq(a,b) __builtin_paired_cmpu0 (EQ, (a), (b)) +#define paired_cmpu0_lt(a,b) __builtin_paired_cmpu0 (LT, (a), (b)) +#define paired_cmpu0_gt(a,b) __builtin_paired_cmpu0 (GT, (a), (b)) +#define paired_cmpu1_un(a,b) __builtin_paired_cmpu1 (UN, (a), (b)) +#define paired_cmpu1_eq(a,b) __builtin_paired_cmpu1 (EQ, (a), (b)) +#define paired_cmpu1_lt(a,b) __builtin_paired_cmpu1 (LT, (a), (b)) +#define paired_cmpu1_gt(a,b) __builtin_paired_cmpu1 (GT, (a), (b)) + +#endif /* _PAIRED_H */ diff --git a/gcc/config/powerpcspe/paired.md b/gcc/config/powerpcspe/paired.md new file mode 100644 index 000000000000..09123eec8871 --- /dev/null +++ b/gcc/config/powerpcspe/paired.md @@ -0,0 +1,505 @@ +;; PowerPC paired single and double hummer description +;; Copyright (C) 2007-2017 Free Software Foundation, Inc. +;; Contributed by David Edelsohn <edelsohn@gnu.org> and Revital Eres +;; <eres@il.ibm.com> + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_c_enum "unspec" + [UNSPEC_INTERHI_V2SF + UNSPEC_INTERLO_V2SF + UNSPEC_EXTEVEN_V2SF + UNSPEC_EXTODD_V2SF + ]) + +(define_insn "paired_negv2sf2" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_neg %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "sqrtv2sf2" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (sqrt:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_rsqrte %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "paired_absv2sf2" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_abs %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "nabsv2sf2" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (neg:V2SF (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f"))))] + "TARGET_PAIRED_FLOAT" + "ps_nabs %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "paired_addv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_add %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "paired_subv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_sub %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "paired_mulv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_mul %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "resv2sf2" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] + "TARGET_PAIRED_FLOAT && flag_finite_math_only" + "ps_res %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "paired_divv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_div %0,%1,%2" + [(set_attr "type" "sdiv")]) + +(define_insn "paired_madds0" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_concat:V2SF + (fma:SF + (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f") + (parallel [(const_int 0)]))) + (fma:SF + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 3) + (parallel [(const_int 1)])))))] + "TARGET_PAIRED_FLOAT" + "ps_madds0 %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "paired_madds1" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_concat:V2SF + (fma:SF + (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 1)])) + (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f") + (parallel [(const_int 0)]))) + (fma:SF + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:SF (match_dup 3) + (parallel [(const_int 1)])))))] + "TARGET_PAIRED_FLOAT" + "ps_madds1 %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "*paired_madd" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (match_operand:V2SF 3 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_madd %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "*paired_msub" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f"))))] + "TARGET_PAIRED_FLOAT" + "ps_msub %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "*paired_nmadd" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (neg:V2SF + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (match_operand:V2SF 3 "gpc_reg_operand" "f"))))] + "TARGET_PAIRED_FLOAT" + "ps_nmadd %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "*paired_nmsub" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (neg:V2SF + (fma:V2SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f")))))] + "TARGET_PAIRED_FLOAT" + "ps_nmsub %0,%1,%2,%3" + [(set_attr "type" "dmul")]) + +(define_insn "selv2sf4" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_concat:V2SF + (if_then_else:SF (ge (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (match_operand:SF 4 "zero_fp_constant" "F")) + (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f") + (parallel [(const_int 0)]))) + (if_then_else:SF (ge (vec_select:SF (match_dup 1) + (parallel [(const_int 1)])) + (match_dup 4)) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:SF (match_dup 3) + (parallel [(const_int 1)])))))] + + "TARGET_PAIRED_FLOAT" + "ps_sel %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "*movv2sf_paired" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=Z,f,f,Y,r,r,f") + (match_operand:V2SF 1 "input_operand" "f,Z,f,r,Y,r,W"))] + "TARGET_PAIRED_FLOAT + && (register_operand (operands[0], V2SFmode) + || register_operand (operands[1], V2SFmode))" +{ + switch (which_alternative) + { + case 0: return "psq_stx %1,%y0,0,0"; + case 1: return "psq_lx %0,%y1,0,0"; + case 2: return "ps_mr %0,%1"; + case 3: return "#"; + case 4: return "#"; + case 5: return "#"; + case 6: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "fpstore,fpload,fp,*,*,*,*")]) + +(define_insn "paired_stx" + [(set (match_operand:V2SF 0 "memory_operand" "=Z") + (match_operand:V2SF 1 "gpc_reg_operand" "f"))] + "TARGET_PAIRED_FLOAT" + "psq_stx %1,%y0,0,0" + [(set_attr "type" "fpstore")]) + +(define_insn "paired_lx" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (match_operand:V2SF 1 "memory_operand" "Z"))] + "TARGET_PAIRED_FLOAT" + "psq_lx %0,%y1,0,0" + [(set_attr "type" "fpload")]) + + +(define_split + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "input_operand" ""))] + "TARGET_PAIRED_FLOAT && reload_completed + && gpr_or_gpr_p (operands[0], operands[1])" + [(pc)] + { + rs6000_split_multireg_move (operands[0], operands[1]); DONE; + }) + +(define_insn "paired_cmpu0" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (vec_select:SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 0)]))))] + "TARGET_PAIRED_FLOAT" + "ps_cmpu0 %0,%1,%2" + [(set_attr "type" "fpcompare")]) + +(define_insn "paired_cmpu1" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (vec_select:SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 1)])) + (vec_select:SF + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 1)]))))] + "TARGET_PAIRED_FLOAT" + "ps_cmpu1 %0,%1,%2" + [(set_attr "type" "fpcompare")]) + +(define_insn "paired_merge00" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 0) (const_int 2)])))] + "TARGET_PAIRED_FLOAT" + "ps_merge00 %0, %1, %2" + [(set_attr "type" "fp")]) + +(define_insn "paired_merge01" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 0) (const_int 3)])))] + "TARGET_PAIRED_FLOAT" + "ps_merge01 %0, %1, %2" + [(set_attr "type" "fp")]) + +(define_insn "paired_merge10" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 1) (const_int 2)])))] + "TARGET_PAIRED_FLOAT" + "ps_merge10 %0, %1, %2" + [(set_attr "type" "fp")]) + +(define_insn "paired_merge11" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 1) (const_int 3)])))] + "TARGET_PAIRED_FLOAT" + "ps_merge11 %0, %1, %2" + [(set_attr "type" "fp")]) + +(define_expand "vec_perm_constv2sf" + [(match_operand:V2SF 0 "gpc_reg_operand" "") + (match_operand:V2SF 1 "gpc_reg_operand" "") + (match_operand:V2SF 2 "gpc_reg_operand" "") + (match_operand:V2SI 3 "" "")] + "TARGET_PAIRED_FLOAT" +{ + if (rs6000_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +(define_insn "paired_sum0" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_concat:V2SF (plus:SF (vec_select:SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 1)]))) + (vec_select:SF + (match_operand:V2SF 3 "gpc_reg_operand" "f") + (parallel [(const_int 1)]))))] + "TARGET_PAIRED_FLOAT" + "ps_sum0 %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "paired_sum1" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_concat:V2SF (vec_select:SF + (match_operand:V2SF 2 "gpc_reg_operand" "f") + (parallel [(const_int 1)])) + (plus:SF (vec_select:SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V2SF 3 "gpc_reg_operand" "f") + (parallel [(const_int 1)])))))] + "TARGET_PAIRED_FLOAT" + "ps_sum1 %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn "paired_muls0" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (mult:V2SF (match_operand:V2SF 2 "gpc_reg_operand" "f") + (vec_duplicate:V2SF + (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 0)])))))] + "TARGET_PAIRED_FLOAT" + "ps_muls0 %0, %1, %2" + [(set_attr "type" "fp")]) + + +(define_insn "paired_muls1" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (mult:V2SF (match_operand:V2SF 2 "gpc_reg_operand" "f") + (vec_duplicate:V2SF + (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (parallel [(const_int 1)])))))] + "TARGET_PAIRED_FLOAT" + "ps_muls1 %0, %1, %2" + [(set_attr "type" "fp")]) + +(define_expand "vec_initv2sf" + [(match_operand:V2SF 0 "gpc_reg_operand" "=f") + (match_operand 1 "" "")] + "TARGET_PAIRED_FLOAT" +{ + paired_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +(define_insn "*vconcatsf" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (vec_concat:V2SF + (match_operand:SF 1 "gpc_reg_operand" "f") + (match_operand:SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" + "ps_merge00 %0, %1, %2" + [(set_attr "type" "fp")]) + +(define_expand "sminv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (smin:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" +{ + rtx tmp = gen_reg_rtx (V2SFmode); + + emit_insn (gen_subv2sf3 (tmp, operands[1], operands[2])); + emit_insn (gen_selv2sf4 (operands[0], tmp, operands[2], operands[1], CONST0_RTX (SFmode))); + DONE; +}) + +(define_expand "smaxv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (smax:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT" +{ + rtx tmp = gen_reg_rtx (V2SFmode); + + emit_insn (gen_subv2sf3 (tmp, operands[1], operands[2])); + emit_insn (gen_selv2sf4 (operands[0], tmp, operands[1], operands[2], CONST0_RTX (SFmode))); + DONE; +}) + +(define_expand "reduc_smax_scal_v2sf" + [(match_operand:SF 0 "gpc_reg_operand" "=f") + (match_operand:V2SF 1 "gpc_reg_operand" "f")] + "TARGET_PAIRED_FLOAT" +{ + rtx tmp_swap = gen_reg_rtx (V2SFmode); + rtx tmp = gen_reg_rtx (V2SFmode); + rtx vec_res = gen_reg_rtx (V2SFmode); + rtx di_res = gen_reg_rtx (DImode); + + emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1])); + emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap)); + emit_insn (gen_selv2sf4 (vec_res, tmp, operands[1], tmp_swap, + CONST0_RTX (SFmode))); + emit_move_insn (di_res, simplify_gen_subreg (DImode, vec_res, V2SFmode, 0)); + emit_move_insn (operands[0], simplify_gen_subreg (SFmode, di_res, DImode, + BYTES_BIG_ENDIAN ? 4 : 0)); + + DONE; +}) + +(define_expand "reduc_smin_scal_v2sf" + [(match_operand:SF 0 "gpc_reg_operand" "=f") + (match_operand:V2SF 1 "gpc_reg_operand" "f")] + "TARGET_PAIRED_FLOAT" +{ + rtx tmp_swap = gen_reg_rtx (V2SFmode); + rtx tmp = gen_reg_rtx (V2SFmode); + rtx vec_res = gen_reg_rtx (V2SFmode); + rtx di_res = gen_reg_rtx (DImode); + + emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1])); + emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap)); + emit_insn (gen_selv2sf4 (vec_res, tmp, tmp_swap, operands[1], + CONST0_RTX (SFmode))); + emit_move_insn (di_res, simplify_gen_subreg (DImode, vec_res, V2SFmode, 0)); + emit_move_insn (operands[0], simplify_gen_subreg (SFmode, di_res, DImode, + BYTES_BIG_ENDIAN ? 4 : 0)); + + DONE; +}) + +(define_expand "reduc_plus_scal_v2sf" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (match_operand:V2SF 1 "gpc_reg_operand" "f"))] + "TARGET_PAIRED_FLOAT" +{ + rtx vec_res = gen_reg_rtx (V2SFmode); + rtx di_res = gen_reg_rtx (DImode); + + emit_insn (gen_paired_sum1 (vec_res, operands[1], operands[1], operands[1])); + emit_move_insn (di_res, simplify_gen_subreg (DImode, vec_res, V2SFmode, 0)); + emit_move_insn (operands[0], simplify_gen_subreg (SFmode, di_res, DImode, + BYTES_BIG_ENDIAN ? 4 : 0)); + DONE; +}) + +(define_expand "movmisalignv2sf" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "any_operand" ""))] + "TARGET_PAIRED_FLOAT" +{ + paired_expand_vector_move (operands); + DONE; +}) + +(define_expand "vcondv2sfv2sf" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") + (if_then_else:V2SF + (match_operator 3 "gpc_reg_operand" + [(match_operand:V2SF 4 "gpc_reg_operand" "f") + (match_operand:V2SF 5 "gpc_reg_operand" "f")]) + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")))] + "TARGET_PAIRED_FLOAT && flag_unsafe_math_optimizations" +{ + if (paired_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}) diff --git a/gcc/config/powerpcspe/power4.md b/gcc/config/powerpcspe/power4.md new file mode 100644 index 000000000000..962e1699376d --- /dev/null +++ b/gcc/config/powerpcspe/power4.md @@ -0,0 +1,451 @@ +;; Scheduling description for IBM Power4 and PowerPC 970 processors. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Sources: IBM Red Book and White Paper on POWER4 + +;; The POWER4 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip). +;; Instructions that update more than one register get broken into two +;; (split) or more internal ops. The chip can issue up to 5 +;; internal ops per cycle. + +(define_automaton "power4iu,power4fpu,power4vec,power4misc") + +(define_cpu_unit "iu1_power4,iu2_power4" "power4iu") +(define_cpu_unit "lsu1_power4,lsu2_power4" "power4misc") +(define_cpu_unit "fpu1_power4,fpu2_power4" "power4fpu") +(define_cpu_unit "bpu_power4,cru_power4" "power4misc") +(define_cpu_unit "vec_power4,vecperm_power4" "power4vec") +(define_cpu_unit "du1_power4,du2_power4,du3_power4,du4_power4,du5_power4" + "power4misc") + +(define_reservation "lsq_power4" + "(du1_power4,lsu1_power4)\ + |(du2_power4,lsu2_power4)\ + |(du3_power4,lsu2_power4)\ + |(du4_power4,lsu1_power4)") + +(define_reservation "lsuq_power4" + "((du1_power4+du2_power4,lsu1_power4)\ + |(du2_power4+du3_power4,lsu2_power4)\ + |(du3_power4+du4_power4,lsu2_power4))\ + +(nothing,iu2_power4|nothing,iu1_power4)") + +(define_reservation "iq_power4" + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + (iu1_power4|iu2_power4)") + +(define_reservation "fpq_power4" + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + (fpu1_power4|fpu2_power4)") + +(define_reservation "vq_power4" + "(du1_power4,vec_power4)\ + |(du2_power4,vec_power4)\ + |(du3_power4,vec_power4)\ + |(du4_power4,vec_power4)") + +(define_reservation "vpq_power4" + "(du1_power4,vecperm_power4)\ + |(du2_power4,vecperm_power4)\ + |(du3_power4,vecperm_power4)\ + |(du4_power4,vecperm_power4)") + + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du1_power4" "du2_power4,du3_power4,du4_power4,du5_power4") +(absence_set "du2_power4" "du3_power4,du4_power4,du5_power4") +(absence_set "du3_power4" "du4_power4,du5_power4") +(absence_set "du4_power4" "du5_power4") + + +; Load/store +(define_insn_reservation "power4-load" 4 ; 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "power4")) + "lsq_power4") + +(define_insn_reservation "power4-load-ext" 5 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "no") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4,lsu1_power4\ + |du2_power4+du3_power4,lsu2_power4\ + |du3_power4+du4_power4,lsu2_power4),\ + nothing,nothing,\ + (iu2_power4|iu1_power4)") + +(define_insn_reservation "power4-load-ext-update" 5 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4+du3_power4+du4_power4,\ + lsu1_power4+iu2_power4,nothing,nothing,iu2_power4") + +(define_insn_reservation "power4-load-ext-update-indexed" 5 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4+du3_power4+du4_power4,\ + iu1_power4,lsu2_power4+iu1_power4,nothing,nothing,iu2_power4") + +(define_insn_reservation "power4-load-update-indexed" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4+du3_power4+du4_power4,\ + iu1_power4,lsu2_power4+iu2_power4") + +(define_insn_reservation "power4-load-update" 4 ; 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power4")) + "lsuq_power4") + +(define_insn_reservation "power4-fpload" 6 ; 5 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "cpu" "power4")) + "lsq_power4") + +(define_insn_reservation "power4-fpload-update" 6 ; 5 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "cpu" "power4")) + "lsuq_power4") + +(define_insn_reservation "power4-vecload" 6 ; 5 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power4")) + "lsq_power4") + +(define_insn_reservation "power4-store" 12 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "cpu" "power4")) + "((du1_power4,lsu1_power4)\ + |(du2_power4,lsu2_power4)\ + |(du3_power4,lsu2_power4)\ + |(du4_power4,lsu1_power4)),\ + (iu1_power4|iu2_power4)") + +(define_insn_reservation "power4-store-update" 12 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power4")) + "((du1_power4+du2_power4,lsu1_power4)\ + |(du2_power4+du3_power4,lsu2_power4)\ + |(du3_power4+du4_power4,lsu2_power4))+\ + ((nothing,iu1_power4,iu2_power4)\ + |(nothing,iu2_power4,iu2_power4)\ + |(nothing,iu2_power4,iu1_power4))") + +(define_insn_reservation "power4-store-update-indexed" 12 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4+du3_power4+du4_power4,\ + iu1_power4,lsu2_power4+iu2_power4,iu2_power4") + +(define_insn_reservation "power4-fpstore" 12 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "power4")) + "((du1_power4,lsu1_power4)\ + |(du2_power4,lsu2_power4)\ + |(du3_power4,lsu2_power4)\ + |(du4_power4,lsu1_power4)),\ + (fpu1_power4|fpu2_power4)") + +(define_insn_reservation "power4-fpstore-update" 12 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "power4")) + "((du1_power4+du2_power4,lsu1_power4)\ + |(du2_power4+du3_power4,lsu2_power4)\ + |(du3_power4+du4_power4,lsu2_power4))\ + +(nothing,(iu1_power4|iu2_power4),(fpu1_power4|fpu2_power4))") + +(define_insn_reservation "power4-vecstore" 12 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power4")) + "(du1_power4,lsu1_power4,vec_power4)\ + |(du2_power4,lsu2_power4,vec_power4)\ + |(du3_power4,lsu2_power4,vec_power4)\ + |(du4_power4,lsu1_power4,vec_power4)") + +(define_insn_reservation "power4-llsc" 11 + (and (eq_attr "type" "load_l,store_c,sync") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4+du3_power4+du4_power4,lsu1_power4") + + +; Integer latency is 2 cycles +(define_insn_reservation "power4-integer" 2 + (and (ior (eq_attr "type" "integer,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no")) + (and (eq_attr "type" "insert") + (eq_attr "size" "64"))) + (eq_attr "cpu" "power4")) + "iq_power4") + +(define_insn_reservation "power4-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power4")) + "((du1_power4+du2_power4)\ + |(du2_power4+du3_power4)\ + |(du3_power4+du4_power4)\ + |(du4_power4+du1_power4)),\ + ((iu1_power4,nothing,iu2_power4)\ + |(iu2_power4,nothing,iu2_power4)\ + |(iu2_power4,nothing,iu1_power4)\ + |(iu1_power4,nothing,iu1_power4))") + +(define_insn_reservation "power4-three" 2 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4+du3_power4|du2_power4+du3_power4+du4_power4\ + |du3_power4+du4_power4+du1_power4|du4_power4+du1_power4+du2_power4),\ + ((iu1_power4,nothing,iu2_power4,nothing,iu2_power4)\ + |(iu2_power4,nothing,iu2_power4,nothing,iu1_power4)\ + |(iu2_power4,nothing,iu1_power4,nothing,iu1_power4)\ + |(iu1_power4,nothing,iu1_power4,nothing,iu2_power4))") + +(define_insn_reservation "power4-insert" 4 + (and (eq_attr "type" "insert") + (eq_attr "size" "32") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\ + ((iu1_power4,nothing,iu2_power4)\ + |(iu2_power4,nothing,iu2_power4)\ + |(iu2_power4,nothing,iu1_power4))") + +(define_insn_reservation "power4-cmp" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "power4")) + "iq_power4") + +(define_insn_reservation "power4-compare" 2 + (and (eq_attr "type" "shift,exts") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\ + ((iu1_power4,iu2_power4)\ + |(iu2_power4,iu2_power4)\ + |(iu2_power4,iu1_power4))") + +(define_bypass 4 "power4-compare" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf") + +(define_insn_reservation "power4-lmul-cmp" 7 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "size" "64") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\ + ((iu1_power4*6,iu2_power4)\ + |(iu2_power4*6,iu2_power4)\ + |(iu2_power4*6,iu1_power4))") + +(define_bypass 10 "power4-lmul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf") + +(define_insn_reservation "power4-imul-cmp" 5 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "size" "32") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\ + ((iu1_power4*4,iu2_power4)\ + |(iu2_power4*4,iu2_power4)\ + |(iu2_power4*4,iu1_power4))") + +(define_bypass 8 "power4-imul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf") + +(define_insn_reservation "power4-lmul" 7 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "64") + (eq_attr "cpu" "power4")) + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + (iu1_power4*6|iu2_power4*6)") + +(define_insn_reservation "power4-imul" 5 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "power4")) + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + (iu1_power4*4|iu2_power4*4)") + +(define_insn_reservation "power4-imul3" 4 + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16") + (eq_attr "cpu" "power4")) + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + (iu1_power4*3|iu2_power4*3)") + + +; SPR move only executes in first IU. +; Integer division only executes in second IU. +(define_insn_reservation "power4-idiv" 36 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4,iu2_power4*35") + +(define_insn_reservation "power4-ldiv" 68 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4,iu2_power4*67") + + +(define_insn_reservation "power4-mtjmpr" 3 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "power4")) + "du1_power4,bpu_power4") + + +; Branches take dispatch Slot 4. The presence_sets prevent other insn from +; grabbing previous dispatch slots once this is assigned. +(define_insn_reservation "power4-branch" 2 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power4")) + "(du5_power4\ + |du4_power4+du5_power4\ + |du3_power4+du4_power4+du5_power4\ + |du2_power4+du3_power4+du4_power4+du5_power4\ + |du1_power4+du2_power4+du3_power4+du4_power4+du5_power4),bpu_power4") + + +; Condition Register logical ops are split if non-destructive (RT != RB) +(define_insn_reservation "power4-crlogical" 2 + (and (eq_attr "type" "cr_logical") + (eq_attr "cpu" "power4")) + "du1_power4,cru_power4") + +(define_insn_reservation "power4-delayedcr" 4 + (and (eq_attr "type" "delayed_cr") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4,cru_power4,cru_power4") + +; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu +(define_insn_reservation "power4-mfcr" 6 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4+du3_power4+du4_power4,\ + du1_power4+du2_power4+du3_power4+du4_power4+cru_power4,\ + cru_power4,cru_power4,cru_power4") + +; mfcrf (1 field) +(define_insn_reservation "power4-mfcrf" 3 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power4")) + "du1_power4,cru_power4") + +; mtcrf (1 field) +(define_insn_reservation "power4-mtcr" 4 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power4")) + "du1_power4,iu1_power4") + +; Basic FP latency is 6 cycles +(define_insn_reservation "power4-fp" 6 + (and (eq_attr "type" "fp,fpsimple,dmul") + (eq_attr "cpu" "power4")) + "fpq_power4") + +(define_insn_reservation "power4-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power4")) + "fpq_power4") + +(define_insn_reservation "power4-sdiv" 33 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "power4")) + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + (fpu1_power4*28|fpu2_power4*28)") + +(define_insn_reservation "power4-sqrt" 40 + (and (eq_attr "type" "ssqrt,dsqrt") + (eq_attr "cpu" "power4")) + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + (fpu1_power4*35|fpu2_power4*35)") + +(define_insn_reservation "power4-isync" 2 + (and (eq_attr "type" "isync") + (eq_attr "cpu" "power4")) + "du1_power4+du2_power4+du3_power4+du4_power4,lsu1_power4") + + +; VMX +(define_insn_reservation "power4-vecsimple" 2 + (and (eq_attr "type" "vecsimple,veclogical,vecmove") + (eq_attr "cpu" "power4")) + "vq_power4") + +(define_insn_reservation "power4-veccomplex" 5 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power4")) + "vq_power4") + +; vecfp compare +(define_insn_reservation "power4-veccmp" 8 + (and (eq_attr "type" "veccmp,veccmpfx") + (eq_attr "cpu" "power4")) + "vq_power4") + +(define_insn_reservation "power4-vecfloat" 8 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "power4")) + "vq_power4") + +(define_insn_reservation "power4-vecperm" 2 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power4")) + "vpq_power4") + +(define_bypass 4 "power4-vecload" "power4-vecperm") + +(define_bypass 3 "power4-vecsimple" "power4-vecperm") +(define_bypass 6 "power4-veccomplex" "power4-vecperm") +(define_bypass 3 "power4-vecperm" + "power4-vecsimple,power4-veccomplex,power4-vecfloat") +(define_bypass 9 "power4-vecfloat" "power4-vecperm") + +(define_bypass 5 "power4-vecsimple,power4-veccomplex" + "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf") + +(define_bypass 4 "power4-vecsimple,power4-vecperm" "power4-vecstore") +(define_bypass 7 "power4-veccomplex" "power4-vecstore") +(define_bypass 10 "power4-vecfloat" "power4-vecstore") diff --git a/gcc/config/powerpcspe/power5.md b/gcc/config/powerpcspe/power5.md new file mode 100644 index 000000000000..6c3ed7fe5814 --- /dev/null +++ b/gcc/config/powerpcspe/power5.md @@ -0,0 +1,351 @@ +;; Scheduling description for IBM POWER5 processor. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Sources: IBM Red Book and White Paper on POWER5 + +;; The POWER5 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip). +;; Instructions that update more than one register get broken into two +;; (split) or more internal ops. The chip can issue up to 5 +;; internal ops per cycle. + +(define_automaton "power5iu,power5fpu,power5misc") + +(define_cpu_unit "iu1_power5,iu2_power5" "power5iu") +(define_cpu_unit "lsu1_power5,lsu2_power5" "power5misc") +(define_cpu_unit "fpu1_power5,fpu2_power5" "power5fpu") +(define_cpu_unit "bpu_power5,cru_power5" "power5misc") +(define_cpu_unit "du1_power5,du2_power5,du3_power5,du4_power5,du5_power5" + "power5misc") + +(define_reservation "lsq_power5" + "(du1_power5,lsu1_power5)\ + |(du2_power5,lsu2_power5)\ + |(du3_power5,lsu2_power5)\ + |(du4_power5,lsu1_power5)") + +(define_reservation "iq_power5" + "(du1_power5|du2_power5|du3_power5|du4_power5),\ + (iu1_power5|iu2_power5)") + +(define_reservation "fpq_power5" + "(du1_power5|du2_power5|du3_power5|du4_power5),\ + (fpu1_power5|fpu2_power5)") + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du1_power5" "du2_power5,du3_power5,du4_power5,du5_power5") +(absence_set "du2_power5" "du3_power5,du4_power5,du5_power5") +(absence_set "du3_power5" "du4_power5,du5_power5") +(absence_set "du4_power5" "du5_power5") + + +; Load/store +(define_insn_reservation "power5-load" 4 ; 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "power5")) + "lsq_power5") + +(define_insn_reservation "power5-load-ext" 5 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "no") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,lsu1_power5,nothing,nothing,iu2_power5") + +(define_insn_reservation "power5-load-ext-update" 5 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5+du3_power5+du4_power5,\ + lsu1_power5+iu2_power5,nothing,nothing,iu2_power5") + +(define_insn_reservation "power5-load-ext-update-indexed" 5 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5+du3_power5+du4_power5,\ + iu1_power5,lsu2_power5+iu1_power5,nothing,nothing,iu2_power5") + +(define_insn_reservation "power5-load-update-indexed" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5+du3_power5+du4_power5,\ + iu1_power5,lsu2_power5+iu2_power5") + +(define_insn_reservation "power5-load-update" 4 ; 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,lsu1_power5+iu2_power5") + +(define_insn_reservation "power5-fpload" 6 ; 5 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "cpu" "power5")) + "lsq_power5") + +(define_insn_reservation "power5-fpload-update" 6 ; 5 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,lsu1_power5+iu2_power5") + +(define_insn_reservation "power5-store" 12 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "cpu" "power5")) + "((du1_power5,lsu1_power5)\ + |(du2_power5,lsu2_power5)\ + |(du3_power5,lsu2_power5)\ + |(du4_power5,lsu1_power5)),\ + (iu1_power5|iu2_power5)") + +(define_insn_reservation "power5-store-update" 12 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,lsu1_power5+iu2_power5,iu1_power5") + +(define_insn_reservation "power5-store-update-indexed" 12 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5+du3_power5+du4_power5,\ + iu1_power5,lsu2_power5+iu2_power5,iu2_power5") + +(define_insn_reservation "power5-fpstore" 12 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "power5")) + "((du1_power5,lsu1_power5)\ + |(du2_power5,lsu2_power5)\ + |(du3_power5,lsu2_power5)\ + |(du4_power5,lsu1_power5)),\ + (fpu1_power5|fpu2_power5)") + +(define_insn_reservation "power5-fpstore-update" 12 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,lsu1_power5+iu2_power5,fpu1_power5") + +(define_insn_reservation "power5-llsc" 11 + (and (eq_attr "type" "load_l,store_c,sync") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5+du3_power5+du4_power5,\ + lsu1_power5") + + +; Integer latency is 2 cycles +(define_insn_reservation "power5-integer" 2 + (and (ior (eq_attr "type" "integer,trap,cntlz,isel,popcnt") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no")) + (and (eq_attr "type" "insert") + (eq_attr "size" "64"))) + (eq_attr "cpu" "power5")) + "iq_power5") + +(define_insn_reservation "power5-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power5")) + "((du1_power5+du2_power5)\ + |(du2_power5+du3_power5)\ + |(du3_power5+du4_power5)\ + |(du4_power5+du1_power5)),\ + ((iu1_power5,nothing,iu2_power5)\ + |(iu2_power5,nothing,iu2_power5)\ + |(iu2_power5,nothing,iu1_power5)\ + |(iu1_power5,nothing,iu1_power5))") + +(define_insn_reservation "power5-three" 2 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power5")) + "(du1_power5+du2_power5+du3_power5|du2_power5+du3_power5+du4_power5\ + |du3_power5+du4_power5+du1_power5|du4_power5+du1_power5+du2_power5),\ + ((iu1_power5,nothing,iu2_power5,nothing,iu2_power5)\ + |(iu2_power5,nothing,iu2_power5,nothing,iu1_power5)\ + |(iu2_power5,nothing,iu1_power5,nothing,iu1_power5)\ + |(iu1_power5,nothing,iu2_power5,nothing,iu2_power5))") + +(define_insn_reservation "power5-insert" 4 + (and (eq_attr "type" "insert") + (eq_attr "size" "32") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,iu1_power5,nothing,iu2_power5") + +(define_insn_reservation "power5-cmp" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "power5")) + "iq_power5") + +(define_insn_reservation "power5-compare" 2 + (and (eq_attr "type" "shift,exts") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,iu1_power5,iu2_power5") + +(define_bypass 4 "power5-compare" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf") + +(define_insn_reservation "power5-lmul-cmp" 7 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "size" "64") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,iu1_power5*6,iu2_power5") + +(define_bypass 10 "power5-lmul-cmp" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf") + +(define_insn_reservation "power5-imul-cmp" 5 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "size" "32") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,iu1_power5*4,iu2_power5") + +(define_bypass 8 "power5-imul-cmp" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf") + +(define_insn_reservation "power5-lmul" 7 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "64") + (eq_attr "cpu" "power5")) + "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*6|iu2_power5*6)") + +(define_insn_reservation "power5-imul" 5 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "power5")) + "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*4|iu2_power5*4)") + +(define_insn_reservation "power5-imul3" 4 + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16") + (eq_attr "cpu" "power5")) + "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*3|iu2_power5*3)") + + +; SPR move only executes in first IU. +; Integer division only executes in second IU. +(define_insn_reservation "power5-idiv" 36 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,iu2_power5*35") + +(define_insn_reservation "power5-ldiv" 68 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,iu2_power5*67") + + +(define_insn_reservation "power5-mtjmpr" 3 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "power5")) + "du1_power5,bpu_power5") + + +; Branches take dispatch Slot 4. The presence_sets prevent other insn from +; grabbing previous dispatch slots once this is assigned. +(define_insn_reservation "power5-branch" 2 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power5")) + "(du5_power5\ + |du4_power5+du5_power5\ + |du3_power5+du4_power5+du5_power5\ + |du2_power5+du3_power5+du4_power5+du5_power5\ + |du1_power5+du2_power5+du3_power5+du4_power5+du5_power5),bpu_power5") + + +; Condition Register logical ops are split if non-destructive (RT != RB) +(define_insn_reservation "power5-crlogical" 2 + (and (eq_attr "type" "cr_logical") + (eq_attr "cpu" "power5")) + "du1_power5,cru_power5") + +(define_insn_reservation "power5-delayedcr" 4 + (and (eq_attr "type" "delayed_cr") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5,cru_power5,cru_power5") + +; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu +(define_insn_reservation "power5-mfcr" 6 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5+du3_power5+du4_power5,\ + du1_power5+du2_power5+du3_power5+du4_power5+cru_power5,\ + cru_power5,cru_power5,cru_power5") + +; mfcrf (1 field) +(define_insn_reservation "power5-mfcrf" 3 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power5")) + "du1_power5,cru_power5") + +; mtcrf (1 field) +(define_insn_reservation "power5-mtcr" 4 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power5")) + "du1_power5,iu1_power5") + +; Basic FP latency is 6 cycles +(define_insn_reservation "power5-fp" 6 + (and (eq_attr "type" "fp,fpsimple,dmul") + (eq_attr "cpu" "power5")) + "fpq_power5") + +(define_insn_reservation "power5-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power5")) + "fpq_power5") + +(define_insn_reservation "power5-sdiv" 33 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "power5")) + "(du1_power5|du2_power5|du3_power5|du4_power5),\ + (fpu1_power5*28|fpu2_power5*28)") + +(define_insn_reservation "power5-sqrt" 40 + (and (eq_attr "type" "ssqrt,dsqrt") + (eq_attr "cpu" "power5")) + "(du1_power5|du2_power5|du3_power5|du4_power5),\ + (fpu1_power5*35|fpu2_power5*35)") + +(define_insn_reservation "power5-isync" 2 + (and (eq_attr "type" "isync") + (eq_attr "cpu" "power5")) + "du1_power5+du2_power5+du3_power5+du4_power5,\ + lsu1_power5") + diff --git a/gcc/config/powerpcspe/power6.md b/gcc/config/powerpcspe/power6.md new file mode 100644 index 000000000000..0d81cdebda52 --- /dev/null +++ b/gcc/config/powerpcspe/power6.md @@ -0,0 +1,629 @@ +;; Scheduling description for IBM POWER6 processor. +;; Copyright (C) 2006-2017 Free Software Foundation, Inc. +;; Contributed by Peter Steinmetz (steinmtz@us.ibm.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Sources: + +;; The POWER6 has 2 iu, 2 fpu, 2 lsu, and 1 bu/cru unit per engine +;; (2 engines per chip). The chip can issue up to 5 internal ops +;; per cycle. + +(define_automaton "power6iu,power6lsu,power6fpu,power6bu") + +(define_cpu_unit "iu1_power6,iu2_power6" "power6iu") +(define_cpu_unit "lsu1_power6,lsu2_power6" "power6lsu") +(define_cpu_unit "bpu_power6" "power6bu") +(define_cpu_unit "fpu1_power6,fpu2_power6" "power6fpu") + +(define_reservation "LS2_power6" + "lsu1_power6+lsu2_power6") + +(define_reservation "FPU_power6" + "fpu1_power6|fpu2_power6") + +(define_reservation "BRU_power6" + "bpu_power6") + +(define_reservation "LSU_power6" + "lsu1_power6|lsu2_power6") + +(define_reservation "LSF_power6" + "(lsu1_power6+fpu1_power6)\ + |(lsu1_power6+fpu2_power6)\ + |(lsu2_power6+fpu1_power6)\ + |(lsu2_power6+fpu2_power6)") + +(define_reservation "LX2_power6" + "(iu1_power6+iu2_power6+lsu1_power6)\ + |(iu1_power6+iu2_power6+lsu2_power6)") + +(define_reservation "FX2_power6" + "iu1_power6+iu2_power6") + +(define_reservation "X2F_power6" + "(iu1_power6+iu2_power6+fpu1_power6)\ + |(iu1_power6+iu2_power6+fpu2_power6)") + +(define_reservation "BX2_power6" + "iu1_power6+iu2_power6+bpu_power6") + +(define_reservation "LSX_power6" + "(iu1_power6+lsu1_power6)\ + |(iu1_power6+lsu2_power6)\ + |(iu2_power6+lsu1_power6)\ + |(iu2_power6+lsu2_power6)") + +(define_reservation "FXU_power6" + "iu1_power6|iu2_power6") + +(define_reservation "XLF_power6" + "(iu1_power6+lsu1_power6+fpu1_power6)\ + |(iu1_power6+lsu1_power6+fpu2_power6)\ + |(iu1_power6+lsu2_power6+fpu1_power6)\ + |(iu1_power6+lsu2_power6+fpu2_power6)\ + |(iu2_power6+lsu1_power6+fpu1_power6)\ + |(iu2_power6+lsu1_power6+fpu2_power6)\ + |(iu2_power6+lsu2_power6+fpu1_power6)\ + |(iu2_power6+lsu2_power6+fpu2_power6)") + +(define_reservation "BRX_power6" + "(bpu_power6+iu1_power6)\ + |(bpu_power6+iu2_power6)") + +; Load/store + +; The default for a value written by a fixed point load +; that is read/written by a subsequent fixed point op. +(define_insn_reservation "power6-load" 2 ; fx + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "power6")) + "LSU_power6") + +; define the bypass for the case where the value written +; by a fixed point load is used as the source value on +; a store. +(define_bypass 1 "power6-load,\ + power6-load-update,\ + power6-load-update-indexed" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-load-ext" 4 ; fx + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "no") + (eq_attr "cpu" "power6")) + "LSU_power6") + +; define the bypass for the case where the value written +; by a fixed point load ext is used as the source value on +; a store. +(define_bypass 1 "power6-load-ext,\ + power6-load-ext-update,\ + power6-load-ext-update-indexed" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-load-update" 2 ; fx + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-load-update-indexed" 2 ; fx + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-load-ext-update" 4 ; fx + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-load-ext-update-indexed" 4 ; fx + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-fpload" 1 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-fpload-update" 1 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-store" 14 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-store-update" 14 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-store-update-indexed" 14 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power6")) + "LX2_power6") + +(define_insn_reservation "power6-fpstore" 14 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "power6")) + "LSF_power6") + +(define_insn_reservation "power6-fpstore-update" 14 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "power6")) + "XLF_power6") + +(define_insn_reservation "power6-larx" 3 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power6")) + "LS2_power6") + +(define_insn_reservation "power6-stcx" 10 ; best case + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power6")) + "LSX_power6") + +(define_insn_reservation "power6-sync" 11 ; N/A + (and (eq_attr "type" "sync") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-integer" 1 + (and (ior (eq_attr "type" "integer") + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-isel" 1 + (and (eq_attr "type" "isel") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-exts" 1 + (and (eq_attr "type" "exts") + (eq_attr "dot" "no") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-shift" 1 + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "no") + (eq_attr "dot" "no") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-popcnt" 1 + (and (eq_attr "type" "popcnt") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-insert" 1 + (and (eq_attr "type" "insert") + (eq_attr "size" "32") + (eq_attr "cpu" "power6")) + "FX2_power6") + +(define_insn_reservation "power6-insert-dword" 1 + (and (eq_attr "type" "insert") + (eq_attr "size" "64") + (eq_attr "cpu" "power6")) + "FX2_power6") + +; define the bypass for the case where the value written +; by a fixed point op is used as the source value on a +; store. +(define_bypass 1 "power6-integer,\ + power6-exts,\ + power6-shift,\ + power6-insert,\ + power6-insert-dword" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-cntlz" 2 + (and (eq_attr "type" "cntlz") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_bypass 1 "power6-cntlz" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-var-rotate" 4 + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "yes") + (eq_attr "dot" "no") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-trap" 1 ; N/A + (and (eq_attr "type" "trap") + (eq_attr "cpu" "power6")) + "BRX_power6") + +(define_insn_reservation "power6-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power6")) + "(iu1_power6,iu1_power6)\ + |(iu1_power6+iu2_power6,nothing)\ + |(iu1_power6,iu2_power6)\ + |(iu2_power6,iu1_power6)\ + |(iu2_power6,iu2_power6)") + +(define_insn_reservation "power6-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power6")) + "(iu1_power6,iu1_power6,iu1_power6)\ + |(iu1_power6,iu1_power6,iu2_power6)\ + |(iu1_power6,iu2_power6,iu1_power6)\ + |(iu1_power6,iu2_power6,iu2_power6)\ + |(iu2_power6,iu1_power6,iu1_power6)\ + |(iu2_power6,iu1_power6,iu2_power6)\ + |(iu2_power6,iu2_power6,iu1_power6)\ + |(iu2_power6,iu2_power6,iu2_power6)\ + |(iu1_power6+iu2_power6,iu1_power6)\ + |(iu1_power6+iu2_power6,iu2_power6)\ + |(iu1_power6,iu1_power6+iu2_power6)\ + |(iu2_power6,iu1_power6+iu2_power6)") + +(define_insn_reservation "power6-cmp" 1 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-compare" 1 + (and (eq_attr "type" "exts") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-fast-compare" 1 + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power6")) + "FXU_power6") + +; define the bypass for the case where the value written +; by a fixed point rec form op is used as the source value +; on a store. +(define_bypass 1 "power6-compare,\ + power6-fast-compare" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-delayed-compare" 2 ; N/A + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "no") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-var-delayed-compare" 4 + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "yes") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-lmul-cmp" 16 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "size" "64") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-imul-cmp" 16 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "size" "32") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-lmul" 16 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "64") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-imul" 16 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_insn_reservation "power6-imul3" 16 + (and (eq_attr "type" "mul") + (eq_attr "size" "8,16") + (eq_attr "cpu" "power6")) + "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\ + |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)"); + +(define_bypass 9 "power6-imul,\ + power6-lmul,\ + power6-imul-cmp,\ + power6-lmul-cmp,\ + power6-imul3" + "power6-store,\ + power6-store-update,\ + power6-store-update-indexed,\ + power6-fpstore,\ + power6-fpstore-update" + "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-idiv" 44 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "power6")) + "(iu1_power6*44+iu2_power6*44+fpu1_power6*44)\ + |(iu1_power6*44+iu2_power6*44+fpu2_power6*44)"); + +; The latency for this bypass is yet to be defined +;(define_bypass ? "power6-idiv" +; "power6-store,\ +; power6-store-update,\ +; power6-store-update-indexed,\ +; power6-fpstore,\ +; power6-fpstore-update" +; "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-ldiv" 56 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "power6")) + "(iu1_power6*56+iu2_power6*56+fpu1_power6*56)\ + |(iu1_power6*56+iu2_power6*56+fpu2_power6*56)"); + +; The latency for this bypass is yet to be defined +;(define_bypass ? "power6-ldiv" +; "power6-store,\ +; power6-store-update,\ +; power6-store-update-indexed,\ +; power6-fpstore,\ +; power6-fpstore-update" +; "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-mtjmpr" 2 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "power6")) + "BX2_power6") + +(define_bypass 5 "power6-mtjmpr" "power6-branch") + +(define_insn_reservation "power6-branch" 2 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power6")) + "BRU_power6") + +(define_bypass 5 "power6-branch" "power6-mtjmpr") + +(define_insn_reservation "power6-crlogical" 3 + (and (eq_attr "type" "cr_logical") + (eq_attr "cpu" "power6")) + "BRU_power6") + +(define_bypass 3 "power6-crlogical" "power6-branch") + +(define_insn_reservation "power6-delayedcr" 3 + (and (eq_attr "type" "delayed_cr") + (eq_attr "cpu" "power6")) + "BRU_power6") + +(define_insn_reservation "power6-mfcr" 6 ; N/A + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power6")) + "BX2_power6") + +; mfcrf (1 field) +(define_insn_reservation "power6-mfcrf" 3 ; N/A + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power6")) + "BX2_power6") ; + +; mtcrf (1 field) +(define_insn_reservation "power6-mtcr" 4 ; N/A + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power6")) + "BX2_power6") + +(define_bypass 9 "power6-mtcr" "power6-branch") + +(define_insn_reservation "power6-fp" 6 + (and (eq_attr "type" "fp,fpsimple,dmul,dfp") + (eq_attr "cpu" "power6")) + "FPU_power6") + +; Any fp instruction that updates a CR has a latency +; of 6 to a dependent branch +(define_bypass 6 "power6-fp" "power6-branch") + +(define_bypass 1 "power6-fp" + "power6-fpstore,power6-fpstore-update" + "rs6000_store_data_bypass_p") + +(define_insn_reservation "power6-fpcompare" 8 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 12 "power6-fpcompare" + "power6-branch,power6-crlogical") + +(define_insn_reservation "power6-sdiv" 26 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-ddiv" 32 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-sqrt" 30 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-dsqrt" 42 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_insn_reservation "power6-isync" 2 ; N/A + (and (eq_attr "type" "isync") + (eq_attr "cpu" "power6")) + "FXU_power6") + +(define_insn_reservation "power6-vecload" 1 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power6")) + "LSU_power6") + +(define_insn_reservation "power6-vecstore" 1 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power6")) + "LSF_power6") + +(define_insn_reservation "power6-vecsimple" 3 + (and (eq_attr "type" "vecsimple,veclogical,vecmove") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 6 "power6-vecsimple" "power6-veccomplex,\ + power6-vecperm") + +(define_bypass 5 "power6-vecsimple" "power6-vecfloat") + +(define_bypass 4 "power6-vecsimple" "power6-vecstore" ) + +(define_insn_reservation "power6-veccmp" 1 + (and (eq_attr "type" "veccmp,veccmpfx") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 10 "power6-veccmp" "power6-branch") + +(define_insn_reservation "power6-vecfloat" 7 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 10 "power6-vecfloat" "power6-vecsimple") + +(define_bypass 11 "power6-vecfloat" "power6-veccomplex,\ + power6-vecperm") + +(define_bypass 9 "power6-vecfloat" "power6-vecstore" ) + +(define_insn_reservation "power6-veccomplex" 7 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 10 "power6-veccomplex" "power6-vecsimple,\ + power6-vecfloat" ) + +(define_bypass 9 "power6-veccomplex" "power6-vecperm" ) + +(define_bypass 8 "power6-veccomplex" "power6-vecstore" ) + +(define_insn_reservation "power6-vecperm" 4 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power6")) + "FPU_power6") + +(define_bypass 7 "power6-vecperm" "power6-vecsimple,\ + power6-vecfloat" ) + +(define_bypass 6 "power6-vecperm" "power6-veccomplex" ) + +(define_bypass 5 "power6-vecperm" "power6-vecstore" ) + +(define_insn_reservation "power6-mftgpr" 8 + (and (eq_attr "type" "mftgpr") + (eq_attr "cpu" "power6")) + "X2F_power6") + +(define_insn_reservation "power6-mffgpr" 14 + (and (eq_attr "type" "mffgpr") + (eq_attr "cpu" "power6")) + "LX2_power6") + +(define_bypass 4 "power6-mftgpr" "power6-imul,\ + power6-lmul,\ + power6-imul-cmp,\ + power6-lmul-cmp,\ + power6-imul3,\ + power6-idiv,\ + power6-ldiv" ) diff --git a/gcc/config/powerpcspe/power7.md b/gcc/config/powerpcspe/power7.md new file mode 100644 index 000000000000..723a7950bb35 --- /dev/null +++ b/gcc/config/powerpcspe/power7.md @@ -0,0 +1,366 @@ +;; Scheduling description for IBM POWER7 processor. +;; Copyright (C) 2009-2017 Free Software Foundation, Inc. +;; +;; Contributed by Pat Haugen (pthaugen@us.ibm.com). + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "power7iu,power7lsu,power7vsu,power7misc") + +(define_cpu_unit "iu1_power7,iu2_power7" "power7iu") +(define_cpu_unit "lsu1_power7,lsu2_power7" "power7lsu") +(define_cpu_unit "vsu1_power7,vsu2_power7" "power7vsu") +(define_cpu_unit "bpu_power7,cru_power7" "power7misc") +(define_cpu_unit "du1_power7,du2_power7,du3_power7,du4_power7,du5_power7" + "power7misc") + + +(define_reservation "DU_power7" + "du1_power7|du2_power7|du3_power7|du4_power7") + +(define_reservation "DU2F_power7" + "du1_power7+du2_power7") + +(define_reservation "DU4_power7" + "du1_power7+du2_power7+du3_power7+du4_power7") + +(define_reservation "FXU_power7" + "iu1_power7|iu2_power7") + +(define_reservation "VSU_power7" + "vsu1_power7|vsu2_power7") + +(define_reservation "LSU_power7" + "lsu1_power7|lsu2_power7") + + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du1_power7" "du2_power7,du3_power7,du4_power7,du5_power7") +(absence_set "du2_power7" "du3_power7,du4_power7,du5_power7") +(absence_set "du3_power7" "du4_power7,du5_power7") +(absence_set "du4_power7" "du5_power7") + + +; LS Unit +(define_insn_reservation "power7-load" 2 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7") + +(define_insn_reservation "power7-load-ext" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "no") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7,FXU_power7") + +(define_insn_reservation "power7-load-update" 2 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-load-update-indexed" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power7")) + "DU4_power7,FXU_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-load-ext-update" 4 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-load-ext-update-indexed" 4 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power7")) + "DU4_power7,FXU_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-fpload" 3 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7") + +(define_insn_reservation "power7-fpload-update" 3 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-store" 6 ; store-forwarding latency + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+FXU_power7") + +(define_insn_reservation "power7-store-update" 6 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power7")) + "DU2F_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-store-update-indexed" 6 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7+FXU_power7,FXU_power7") + +(define_insn_reservation "power7-fpstore" 6 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+VSU_power7") + +(define_insn_reservation "power7-fpstore-update" 6 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+VSU_power7+FXU_power7") + +(define_insn_reservation "power7-larx" 3 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7") + +(define_insn_reservation "power7-stcx" 10 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7") + +(define_insn_reservation "power7-vecload" 3 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7") + +(define_insn_reservation "power7-vecstore" 6 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power7")) + "DU_power7,LSU_power7+vsu2_power7") + +(define_insn_reservation "power7-sync" 11 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "power7")) + "DU4_power7,LSU_power7") + + +; FX Unit +(define_insn_reservation "power7-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,isel,popcnt") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-cntlz" 2 + (and (eq_attr "type" "cntlz") + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power7")) + "DU_power7+DU_power7,FXU_power7,FXU_power7") + +(define_insn_reservation "power7-three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power7")) + "DU_power7+DU_power7+DU_power7,FXU_power7,FXU_power7,FXU_power7") + +(define_insn_reservation "power7-cmp" 1 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-compare" 2 + (and (eq_attr "type" "shift,exts") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power7")) + "DU2F_power7,FXU_power7,FXU_power7") + +(define_bypass 3 "power7-cmp,power7-compare" "power7-crlogical,power7-delayedcr") + +(define_insn_reservation "power7-mul" 4 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "cpu" "power7")) + "DU_power7,FXU_power7") + +(define_insn_reservation "power7-mul-compare" 5 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power7")) + "DU2F_power7,FXU_power7,nothing*3,FXU_power7") + +(define_insn_reservation "power7-idiv" 36 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "power7")) + "DU2F_power7,iu1_power7*36|iu2_power7*36") + +(define_insn_reservation "power7-ldiv" 68 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "power7")) + "DU2F_power7,iu1_power7*68|iu2_power7*68") + +(define_insn_reservation "power7-isync" 1 ; + (and (eq_attr "type" "isync") + (eq_attr "cpu" "power7")) + "DU4_power7,FXU_power7") + + +; CR Unit +(define_insn_reservation "power7-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power7")) + "du1_power7,FXU_power7") + +(define_insn_reservation "power7-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7+FXU_power7") + +(define_insn_reservation "power7-crlogical" 3 + (and (eq_attr "type" "cr_logical") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-delayedcr" 3 + (and (eq_attr "type" "delayed_cr") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-mfcr" 6 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-mfcrf" 3 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power7")) + "du1_power7,cru_power7") + +(define_insn_reservation "power7-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power7")) + "DU4_power7,cru_power7+FXU_power7") + + +; BR Unit +; Branches take dispatch Slot 4. The presence_sets prevent other insn from +; grabbing previous dispatch slots once this is assigned. +(define_insn_reservation "power7-branch" 3 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power7")) + "(du5_power7\ + |du4_power7+du5_power7\ + |du3_power7+du4_power7+du5_power7\ + |du2_power7+du3_power7+du4_power7+du5_power7\ + |du1_power7+du2_power7+du3_power7+du4_power7+du5_power7),bpu_power7") + + +; VS Unit (includes FP/VSX/VMX/DFP) +(define_insn_reservation "power7-fp" 6 + (and (eq_attr "type" "fp,fpsimple,dmul,dfp") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_bypass 8 "power7-fp" "power7-branch") + +(define_insn_reservation "power7-fpcompare" 8 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-sdiv" 27 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-sqrt" 32 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-dsqrt" 44 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-vecsimple" 2 + (and (eq_attr "type" "vecsimple,veclogical,vecmove,veccmp,veccmpfx") + (eq_attr "cpu" "power7")) + "DU_power7,vsu1_power7") + +(define_insn_reservation "power7-vecfloat" 6 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "power7")) + "DU_power7,vsu1_power7") + +(define_bypass 7 "power7-vecfloat" "power7-vecsimple,power7-veccomplex,\ + power7-vecperm") + +(define_insn_reservation "power7-veccomplex" 7 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power7")) + "DU_power7,vsu1_power7") + +(define_insn_reservation "power7-vecperm" 3 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power7")) + "DU_power7,vsu2_power7") + +(define_insn_reservation "power7-vecdouble" 6 + (and (eq_attr "type" "vecdouble") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_bypass 7 "power7-vecdouble" "power7-vecsimple,power7-veccomplex,\ + power7-vecperm") + +(define_insn_reservation "power7-vecfdiv" 26 + (and (eq_attr "type" "vecfdiv") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + +(define_insn_reservation "power7-vecdiv" 32 + (and (eq_attr "type" "vecdiv") + (eq_attr "cpu" "power7")) + "DU_power7,VSU_power7") + diff --git a/gcc/config/powerpcspe/power8.md b/gcc/config/powerpcspe/power8.md new file mode 100644 index 000000000000..21ebab45d746 --- /dev/null +++ b/gcc/config/powerpcspe/power8.md @@ -0,0 +1,396 @@ +;; Scheduling description for IBM POWER8 processor. +;; Copyright (C) 2013-2017 Free Software Foundation, Inc. +;; +;; Contributed by Pat Haugen (pthaugen@us.ibm.com). + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "power8fxu,power8lsu,power8vsu,power8misc") + +(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu") +(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu") +(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu") +(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu") +(define_cpu_unit "bpu_power8,cru_power8" "power8misc") +(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\ + du5_power8,du6_power8" "power8misc") + + +; Dispatch group reservations +(define_reservation "DU_any_power8" + "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\ + du5_power8") + +; 2-way Cracked instructions go in slots 0-1 +; (can also have a second in slots 3-4 if insns are adjacent) +(define_reservation "DU_cracked_power8" + "du0_power8+du1_power8") + +; Insns that are first in group +(define_reservation "DU_first_power8" + "du0_power8") + +; Insns that are first and last in group +(define_reservation "DU_both_power8" + "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\ + du5_power8+du6_power8") + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\ + du5_power8,du6_power8") +(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\ + du6_power8") +(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8") +(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8") +(absence_set "du4_power8" "du5_power8,du6_power8") +(absence_set "du5_power8" "du6_power8") + + +; Execution unit reservations +(define_reservation "FXU_power8" + "fxu0_power8|fxu1_power8") + +(define_reservation "LU_power8" + "lu0_power8|lu1_power8") + +(define_reservation "LSU_power8" + "lsu0_power8|lsu1_power8") + +(define_reservation "LU_or_LSU_power8" + "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8") + +(define_reservation "VSU_power8" + "vsu0_power8|vsu1_power8") + + +; LS Unit +(define_insn_reservation "power8-load" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "power8")) + "DU_any_power8,LU_or_LSU_power8") + +(define_insn_reservation "power8-load-update" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_or_LSU_power8+FXU_power8") + +(define_insn_reservation "power8-load-ext" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "no") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_or_LSU_power8,FXU_power8") + +(define_insn_reservation "power8-load-ext-update" 3 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "cpu" "power8")) + "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8") + +(define_insn_reservation "power8-fpload" 5 + (and (ior (eq_attr "type" "vecload") + (and (eq_attr "type" "fpload") + (eq_attr "update" "no"))) + (eq_attr "cpu" "power8")) + "DU_any_power8,LU_power8") + +(define_insn_reservation "power8-fpload-update" 5 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LU_power8+FXU_power8") + +(define_insn_reservation "power8-store" 5 ; store-forwarding latency + (and (eq_attr "type" "store") + (not (and (eq_attr "update" "yes") + (eq_attr "indexed" "yes"))) + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-store-update-indexed" 5 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-fpstore" 5 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-fpstore-update" 5 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "power8")) + "DU_any_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-vecstore" 5 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,LSU_power8+VSU_power8") + +(define_insn_reservation "power8-larx" 3 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power8")) + "DU_both_power8,LU_or_LSU_power8") + +(define_insn_reservation "power8-stcx" 10 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power8")) + "DU_both_power8,LSU_power8+LU_power8") + +(define_insn_reservation "power8-sync" 1 + (and (eq_attr "type" "sync,isync") + (eq_attr "cpu" "power8")) + "DU_both_power8,LSU_power8") + + +; FX Unit +(define_insn_reservation "power8-1cyc" 1 + (and (ior (eq_attr "type" "integer,insert,trap,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 2 "power8-1cyc" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") +; "power8-load,power8-load-update,power8-load-ext,\ +; power8-load-ext-update,power8-fpload,power8-fpload-update,\ +; power8-store,power8-store-update,power8-store-update-indexed,\ +; power8-fpstore,power8-fpstore-update,power8-vecstore,\ +; power8-larx,power8-stcx") + +(define_insn_reservation "power8-2cyc" 2 + (and (eq_attr "type" "cntlz,popcnt") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +(define_insn_reservation "power8-two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power8")) + "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8") + +(define_insn_reservation "power8-three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power8")) + "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8") + +; cmp - Normal compare insns +(define_insn_reservation "power8-cmp" 2 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; add/logical with dot : add./and./nor./etc +(define_insn_reservation "power8-fast-compare" 2 + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +; exts/shift with dot : rldicl./exts./rlwinm./slwi./rlwnm./slw./etc +(define_insn_reservation "power8-compare" 2 + (and (eq_attr "type" "shift,exts") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,FXU_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 3 "power8-fast-compare,power8-compare" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") + +; 5 cycle CR latency +(define_bypass 5 "power8-fast-compare,power8-compare" + "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch") + +(define_insn_reservation "power8-mul" 4 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "cpu" "power8")) + "DU_any_power8,FXU_power8") + +(define_insn_reservation "power8-mul-compare" 4 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power8")) + "DU_cracked_power8,FXU_power8") + +; Extra cycle to LU/LSU +(define_bypass 5 "power8-mul,power8-mul-compare" + "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\ + power8-vecstore,power8-larx,power8-stcx") + +; 7 cycle CR latency +(define_bypass 7 "power8-mul,power8-mul-compare" + "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch") + +; FXU divides are not pipelined +(define_insn_reservation "power8-idiv" 37 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "power8")) + "DU_any_power8,fxu0_power8*37|fxu1_power8*37") + +(define_insn_reservation "power8-ldiv" 68 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "power8")) + "DU_any_power8,fxu0_power8*68|fxu1_power8*68") + +(define_insn_reservation "power8-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power8")) + "DU_first_power8,FXU_power8") + +; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode +(define_insn_reservation "power8-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power8")) + "DU_both_power8,FXU_power8") + + +; CR Unit +(define_insn_reservation "power8-mfjmpr" 5 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8+FXU_power8") + +(define_insn_reservation "power8-crlogical" 3 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8") + +(define_insn_reservation "power8-mfcr" 5 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power8")) + "DU_both_power8,cru_power8") + +(define_insn_reservation "power8-mfcrf" 3 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power8")) + "DU_first_power8,cru_power8") + + +; BR Unit +; Branches take dispatch slot 7, but reserve any remaining prior slots to +; prevent other insns from grabbing them once this is assigned. +(define_insn_reservation "power8-branch" 3 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power8")) + "(du6_power8\ + |du5_power8+du6_power8\ + |du4_power8+du5_power8+du6_power8\ + |du3_power8+du4_power8+du5_power8+du6_power8\ + |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\ + |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\ + |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\ + du6_power8),bpu_power8") + +; Branch updating LR/CTR feeding mf[lr|ctr] +(define_bypass 4 "power8-branch" "power8-mfjmpr") + + +; VS Unit (includes FP/VSX/VMX/DFP/Crypto) +(define_insn_reservation "power8-fp" 6 + (and (eq_attr "type" "fp,fpsimple,dmul,dfp") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +; Additional 3 cycles for any CR result +(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch") + +(define_insn_reservation "power8-fpcompare" 8 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-sdiv" 27 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-sqrt" 32 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-dsqrt" 44 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecsimple" 2 + (and (eq_attr "type" "vecperm,vecsimple,veclogical,vecmove,veccmp, + veccmpfx") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecnormal" 6 + (and (eq_attr "type" "vecfloat,vecdouble") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_bypass 7 "power8-vecnormal" + "power8-vecsimple,power8-veccomplex,power8-fpstore*,\ + power8-vecstore") + +(define_insn_reservation "power8-veccomplex" 7 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecfdiv" 25 + (and (eq_attr "type" "vecfdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-vecdiv" 31 + (and (eq_attr "type" "vecdiv") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-mffgpr" 5 + (and (eq_attr "type" "mffgpr") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-mftgpr" 6 + (and (eq_attr "type" "mftgpr") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + +(define_insn_reservation "power8-crypto" 7 + (and (eq_attr "type" "crypto") + (eq_attr "cpu" "power8")) + "DU_any_power8,VSU_power8") + diff --git a/gcc/config/powerpcspe/power9.md b/gcc/config/powerpcspe/power9.md new file mode 100644 index 000000000000..217864faaed6 --- /dev/null +++ b/gcc/config/powerpcspe/power9.md @@ -0,0 +1,489 @@ +;; Scheduling description for IBM POWER9 processor. +;; Copyright (C) 2016-2017 Free Software Foundation, Inc. +;; +;; Contributed by Pat Haugen (pthaugen@us.ibm.com). + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "power9dsp,power9lsu,power9vsu,power9misc") + +(define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu") +(define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu") +; Two vector permute units, part of vsu +(define_cpu_unit "prm0_power9,prm1_power9" "power9vsu") +; Two fixed point divide units, not pipelined +(define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc") +(define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc") + +(define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9, + x2_power9,x3_power9,xb0_power9,xb1_power9, + br0_power9,br1_power9" "power9dsp") + + +; Dispatch port reservations +; +; Power9 can dispatch a maximum of 6 iops per cycle with the following +; general restrictions (other restrictions also apply): +; 1) At most 2 iops per execution slice +; 2) At most 2 iops to the branch unit +; Note that insn position in a dispatch group of 6 insns does not infer which +; execution slice the insn is routed to. The units are used to infer the +; conflicts that exist (i.e. an 'even' requirement will preclude dispatch +; with 2 insns with 'superslice' requirement). + +; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but +; are listed as separate units to allow those insns that preclude its use to +; still be scheduled two to a superslice while reserving the 3rd slot. The +; same applies for xb0/xb1. +(define_reservation "DU_xa_power9" "xa0_power9+xa1_power9") +(define_reservation "DU_xb_power9" "xb0_power9+xb1_power9") + +; Any execution slice dispatch +(define_reservation "DU_any_power9" + "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9| + DU_xb_power9") + +; Even slice, actually takes even/odd slots +(define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9") + +; Slice plus 3rd slot +(define_reservation "DU_slice_3_power9" + "x0_power9+xa0_power9|x1_power9+xa1_power9| + x2_power9+xb0_power9|x3_power9+xb1_power9") + +; Superslice +(define_reservation "DU_super_power9" + "x0_power9+x1_power9|x2_power9+x3_power9") + +; 2-way cracked +(define_reservation "DU_C2_power9" "x0_power9+x1_power9| + x1_power9+DU_xa_power9| + x1_power9+x2_power9| + DU_xa_power9+x2_power9| + x2_power9+x3_power9| + x3_power9+DU_xb_power9") + +; 2-way cracked plus 3rd slot +(define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9| + x1_power9+x2_power9+xa0_power9| + x1_power9+x2_power9+xb0_power9| + x2_power9+x3_power9+xb0_power9") + +; 3-way cracked (consumes whole decode/dispatch cycle) +(define_reservation "DU_C3_power9" + "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+ + x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9") + +; Branch ports +(define_reservation "DU_branch_power9" "br0_power9|br1_power9") + + +; Execution unit reservations +(define_reservation "LSU_power9" + "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9") + +(define_reservation "LSU_pair_power9" + "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9| + lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9") + +(define_reservation "VSU_power9" + "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9") + +(define_reservation "VSU_super_power9" + "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9") + +(define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9") + + +; LS Unit +(define_insn_reservation "power9-load" 4 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_power9") + +(define_insn_reservation "power9-load-update" 4 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-load-ext" 6 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "no") + (eq_attr "cpu" "power9")) + "DU_C2_power9,LSU_power9") + +(define_insn_reservation "power9-load-ext-update" 6 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "cpu" "power9")) + "DU_C3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-fpload-double" 4 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "size" "64") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +(define_insn_reservation "power9-fpload-update-double" 4 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "size" "64") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +; SFmode loads are cracked and have additional 2 cycles over DFmode +(define_insn_reservation "power9-fpload-single" 6 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9") + +(define_insn_reservation "power9-fpload-update-single" 6 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "size" "32") + (eq_attr "cpu" "power9")) + "DU_C3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-vecload" 5 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_pair_power9") + +; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store +(define_insn_reservation "power9-store" 0 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +(define_insn_reservation "power9-store-indexed" 0 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +; Update forms have 2 cycle latency for updated addr reg +(define_insn_reservation "power9-store-update" 2 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +; Update forms have 2 cycle latency for updated addr reg +(define_insn_reservation "power9-store-update-indexed" 2 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-fpstore" 0 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +; Update forms have 2 cycle latency for updated addr reg +(define_insn_reservation "power9-fpstore-update" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-vecstore" 0 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power9")) + "DU_super_power9,LSU_pair_power9") + +(define_insn_reservation "power9-larx" 4 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_power9") + +(define_insn_reservation "power9-stcx" 2 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-sync" 4 + (and (eq_attr "type" "sync,isync") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_power9") + + +; VSU Execution Unit + +; Fixed point ops + +; Most ALU insns are simple 2 cycle, including record form +(define_insn_reservation "power9-alu" 2 + (and (ior (eq_attr "type" "add,exts,integer,logical,isel") + (and (eq_attr "type" "insert,shift") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") +; 5 cycle CR latency +(define_bypass 5 "power9-alu" + "power9-crlogical,power9-mfcr,power9-mfcrf") + +; Record form rotate/shift are cracked +(define_insn_reservation "power9-cracked-alu" 2 + (and (eq_attr "type" "insert,shift") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_power9,VSU_power9") +; 7 cycle CR latency +(define_bypass 7 "power9-cracked-alu" + "power9-crlogical,power9-mfcr,power9-mfcrf") + +(define_insn_reservation "power9-alu2" 3 + (and (eq_attr "type" "cntlz,popcnt,trap") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") +; 6 cycle CR latency +(define_bypass 6 "power9-alu2" + "power9-crlogical,power9-mfcr,power9-mfcrf") + +(define_insn_reservation "power9-cmp" 2 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + + +; Treat 'two' and 'three' types as 2 or 3 way cracked +(define_insn_reservation "power9-two" 4 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power9")) + "DU_C2_power9,VSU_power9") + +(define_insn_reservation "power9-three" 6 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power9")) + "DU_C3_power9,VSU_power9") + +(define_insn_reservation "power9-mul" 5 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +(define_insn_reservation "power9-mul-compare" 5 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_power9,VSU_power9") +; 10 cycle CR latency +(define_bypass 10 "power9-mul-compare" + "power9-crlogical,power9-mfcr,power9-mfcrf") + +; Fixed point divides reserve the divide units for a minimum of 8 cycles +(define_insn_reservation "power9-idiv" 16 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "power9")) + "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") + +(define_insn_reservation "power9-ldiv" 24 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "power9")) + "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") + +(define_insn_reservation "power9-crlogical" 2 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +(define_insn_reservation "power9-mfcrf" 2 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +(define_insn_reservation "power9-mfcr" 6 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power9")) + "DU_C3_power9,VSU_power9") + +; Should differentiate between 1 cr field and > 1 since target of > 1 cr +; is cracked +(define_insn_reservation "power9-mtcr" 2 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +; Move to LR/CTR are executed in VSU +(define_insn_reservation "power9-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +; Floating point/Vector ops +(define_insn_reservation "power9-fpsimple" 2 + (and (eq_attr "type" "fpsimple") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-fp" 7 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-fpcompare" 3 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other +; divide insns, but for the most part do not block pipelined ops. +(define_insn_reservation "power9-sdiv" 22 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-sqrt" 26 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-dsqrt" 36 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-vec-2cyc" 2 + (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-veccmp" 3 + (and (eq_attr "type" "veccmp") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecsimple" 3 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecnormal" 7 + (and (eq_attr "type" "vecfloat,vecdouble") + (eq_attr "size" "!128") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +; Quad-precision FP ops, execute in DFU +(define_insn_reservation "power9-qp" 12 + (and (eq_attr "type" "vecfloat,vecdouble") + (eq_attr "size" "128") + (eq_attr "cpu" "power9")) + "DU_super_power9,dfu_power9") + +(define_insn_reservation "power9-vecperm" 3 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_PRM_power9") + +(define_insn_reservation "power9-veccomplex" 7 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecfdiv" 28 + (and (eq_attr "type" "vecfdiv") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecdiv" 32 + (and (eq_attr "type" "vecdiv") + (eq_attr "size" "!128") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-qpdiv" 56 + (and (eq_attr "type" "vecdiv") + (eq_attr "size" "128") + (eq_attr "cpu" "power9")) + "DU_super_power9,dfu_power9") + +(define_insn_reservation "power9-mffgpr" 2 + (and (eq_attr "type" "mffgpr") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-mftgpr" 2 + (and (eq_attr "type" "mftgpr") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + + +; Branch Unit +; Move from LR/CTR are executed in BRU but consume a writeback port from an +; execution slice. +(define_insn_reservation "power9-mfjmpr" 6 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "power9")) + "DU_branch_power9,bru_power9+VSU_power9") + +; Branch is 2 cycles +(define_insn_reservation "power9-branch" 2 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power9")) + "DU_branch_power9,bru_power9") + + +; Crypto Unit +(define_insn_reservation "power9-crypto" 6 + (and (eq_attr "type" "crypto") + (eq_attr "cpu" "power9")) + "DU_super_power9,cryptu_power9") + + +; HTM Unit +(define_insn_reservation "power9-htm" 4 + (and (eq_attr "type" "htm") + (eq_attr "cpu" "power9")) + "DU_C2_power9,LSU_power9") + +(define_insn_reservation "power9-htm-simple" 2 + (and (eq_attr "type" "htmsimple") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + + +; DFP Unit +(define_insn_reservation "power9-dfp" 12 + (and (eq_attr "type" "dfp") + (eq_attr "cpu" "power9")) + "DU_even_power9,dfu_power9") + diff --git a/gcc/config/powerpcspe/powerpcspe-builtin.def b/gcc/config/powerpcspe/powerpcspe-builtin.def new file mode 100644 index 000000000000..ebe005afb20a --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-builtin.def @@ -0,0 +1,2674 @@ +/* Builtin functions for rs6000/powerpc. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + Contributed by Michael Meissner (meissner@linux.vnet.ibm.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Before including this file, some macros must be defined: + RS6000_BUILTIN_0 -- 0 arg builtins + RS6000_BUILTIN_1 -- 1 arg builtins + RS6000_BUILTIN_2 -- 2 arg builtins + RS6000_BUILTIN_3 -- 3 arg builtins + RS6000_BUILTIN_A -- ABS builtins + RS6000_BUILTIN_D -- DST builtins + RS6000_BUILTIN_E -- SPE EVSEL builtins. + RS6000_BUILTIN_H -- HTM builtins + RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins + RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins + RS6000_BUILTIN_S -- SPE predicate builtins + RS6000_BUILTIN_X -- special builtins + + Each of the above macros takes 4 arguments: + ENUM Enumeration name + NAME String literal for the name + MASK Mask of bits that indicate which options enables the builtin + ATTR builtin attribute information. + ICODE Insn code of the function that implements the builtin. */ + +#ifndef RS6000_BUILTIN_0 + #error "RS6000_BUILTIN_0 is not defined." +#endif + +#ifndef RS6000_BUILTIN_1 + #error "RS6000_BUILTIN_1 is not defined." +#endif + +#ifndef RS6000_BUILTIN_2 + #error "RS6000_BUILTIN_2 is not defined." +#endif + +#ifndef RS6000_BUILTIN_3 + #error "RS6000_BUILTIN_3 is not defined." +#endif + +#ifndef RS6000_BUILTIN_A + #error "RS6000_BUILTIN_A is not defined." +#endif + +#ifndef RS6000_BUILTIN_D + #error "RS6000_BUILTIN_D is not defined." +#endif + +#ifndef RS6000_BUILTIN_E + #error "RS6000_BUILTIN_E is not defined." +#endif + +#ifndef RS6000_BUILTIN_H + #error "RS6000_BUILTIN_H is not defined." +#endif + +#ifndef RS6000_BUILTIN_P + #error "RS6000_BUILTIN_P is not defined." +#endif + +#ifndef RS6000_BUILTIN_Q + #error "RS6000_BUILTIN_Q is not defined." +#endif + +#ifndef RS6000_BUILTIN_S + #error "RS6000_BUILTIN_S is not defined." +#endif + +#ifndef RS6000_BUILTIN_X + #error "RS6000_BUILTIN_X is not defined." +#endif + +#ifndef BU_AV_1 +/* Define convenience macros using token pasting to allow fitting everything in + one line. */ + +/* Altivec convenience macros. */ +#define BU_ALTIVEC_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_ALTIVEC_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_ALTIVEC_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_ALTIVEC_A(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_A (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_ABS), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_ALTIVEC_D(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_D (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_DST), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* All builtins defined with the RS6000_BUILTIN_P macro expect three + arguments, the first of which is an integer constant that clarifies + the implementation's use of CR6 flags. The integer constant + argument may have four values: __CR6_EQ (0) means the predicate is + considered true if the equality-test flag of the CR6 condition + register is true following execution of the code identified by the + ICODE pattern, __CR_EQ_REV (1) means the predicate is considered + true if the equality-test flag is false, __CR6_LT (2) means the + predicate is considered true if the less-than-test flag is true, and + __CR6_LT_REV (3) means the predicate is considered true if the + less-than-test flag is false. For all builtins defined by this + macro, the pattern selected by ICODE expects three operands, a + target and two inputs and is presumed to overwrite the flags of + condition register CR6 as a side effect of computing a result into + the target register. However, the built-in invocation provides + four operands, a target, an integer constant mode, and two inputs. + The second and third operands of the built-in function's invocation + are automatically mapped into operands 1 and 2 of the pattern + identifed by the ICODE argument and additional code is emitted, + depending on the value of the constant integer first argument. + This special processing happens within the implementation of + altivec_expand_predicate_builtin(), which is defined within + rs6000.c. The implementation of altivec_expand_predicate_builtin() + allocates a scratch register having the same mode as operand 0 to hold + the result produced by evaluating ICODE. */ + +#define BU_ALTIVEC_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_ALTIVEC_X(ENUM, NAME, ATTR) \ + RS6000_BUILTIN_X (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_ALTIVEC_C(ENUM, NAME, ATTR) \ + RS6000_BUILTIN_X (ALTIVEC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + (RS6000_BTM_ALTIVEC /* MASK */ \ + | RS6000_BTM_CELL), \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +/* Altivec overloaded builtin function macros. */ +#define BU_ALTIVEC_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (ALTIVEC_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_ALTIVEC_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (ALTIVEC_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_ALTIVEC_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (ALTIVEC_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_ALTIVEC_OVERLOAD_A(ENUM, NAME) \ + RS6000_BUILTIN_A (ALTIVEC_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_ABS), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_ALTIVEC_OVERLOAD_D(ENUM, NAME) \ + RS6000_BUILTIN_D (ALTIVEC_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_DST), \ + CODE_FOR_nothing) /* ICODE */ + +/* See the comment on BU_ALTIVEC_P. */ +#define BU_ALTIVEC_OVERLOAD_P(ENUM, NAME) \ + RS6000_BUILTIN_P (ALTIVEC_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_ALTIVEC_OVERLOAD_X(ENUM, NAME) \ + RS6000_BUILTIN_X (ALTIVEC_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +/* VSX convenience macros. */ +#define BU_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_VSX_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_VSX_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_VSX_A(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_A (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_ABS), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* See the comment on BU_ALTIVEC_P. */ +#define BU_VSX_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_VSX_X(ENUM, NAME, ATTR) \ + RS6000_BUILTIN_X (VSX_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +/* VSX overloaded builtin function macros. */ +#define BU_VSX_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (VSX_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_VSX_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (VSX_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_VSX_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (VSX_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* xxpermdi and xxsldwi are overloaded functions, but had __builtin_vsx names + instead of __builtin_vec. */ +#define BU_VSX_OVERLOAD_3V(ENUM, NAME) \ + RS6000_BUILTIN_3 (VSX_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_VSX_OVERLOAD_X(ENUM, NAME) \ + RS6000_BUILTIN_X (VSX_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_VSX, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +/* ISA 2.05 (power6) convenience macros. */ +/* For functions that depend on the CMPB instruction */ +#define BU_P6_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P6_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_p6_" NAME, /* NAME */ \ + RS6000_BTM_CMPB, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* For functions that depend on 64-BIT support and on the CMPB instruction */ +#define BU_P6_64BIT_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P6_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_p6_" NAME, /* NAME */ \ + RS6000_BTM_CMPB \ + | RS6000_BTM_64BIT, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P6_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P6_OV_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_CMPB, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* ISA 2.07 (power8) vector convenience macros. */ +/* For the instructions that are encoded as altivec instructions use + __builtin_altivec_ as the builtin name. */ +#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_AV_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* See the comment on BU_ALTIVEC_P. */ +#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* For the instructions encoded as VSX instructions use __builtin_vsx as the + builtin name. */ +#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P8V_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P8V_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P8V_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* Crypto convenience macros. */ +#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_2A(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_3A(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_2A(ENUM, NAME) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_CRYPTO, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_CRYPTO_OVERLOAD_3A(ENUM, NAME) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* HTM convenience macros. */ +#define BU_HTM_0(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_HTM, /* MASK */ \ + RS6000_BTC_ ## ATTR, /* ATTR */ \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_HTM_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_HTM, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_HTM_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_HTM, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_HTM_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_HTM, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_HTM_V1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_HTM, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY \ + | RS6000_BTC_VOID), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* SPE convenience macros. */ +#define BU_SPE_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_spe_" NAME, /* NAME */ \ + RS6000_BTM_SPE, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_SPE_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_spe_" NAME, /* NAME */ \ + RS6000_BTM_SPE, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_SPE_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_spe_" NAME, /* NAME */ \ + RS6000_BTM_SPE, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_SPE_E(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_E (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_spe_" NAME, /* NAME */ \ + RS6000_BTM_SPE, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_EVSEL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_SPE_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_S (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_spe_" NAME, /* NAME */ \ + RS6000_BTM_SPE, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_SPE_X(ENUM, NAME, ATTR) \ + RS6000_BUILTIN_X (SPE_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_spe_" NAME, /* NAME */ \ + RS6000_BTM_SPE, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +/* Paired floating point convenience macros. */ +#define BU_PAIRED_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (PAIRED_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_paired_" NAME, /* NAME */ \ + RS6000_BTM_PAIRED, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_PAIRED_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (PAIRED_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_paired_" NAME, /* NAME */ \ + RS6000_BTM_PAIRED, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_PAIRED_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (PAIRED_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_paired_" NAME, /* NAME */ \ + RS6000_BTM_PAIRED, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_PAIRED_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_Q (PAIRED_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_paired_" NAME, /* NAME */ \ + RS6000_BTM_PAIRED, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_PAIRED_X(ENUM, NAME, ATTR) \ + RS6000_BUILTIN_X (PAIRED_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_paired_" NAME, /* NAME */ \ + RS6000_BTM_PAIRED, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_SPECIAL_X(ENUM, NAME, MASK, ATTR) \ + RS6000_BUILTIN_X (ENUM, /* ENUM */ \ + NAME, /* NAME */ \ + MASK, /* MASK */ \ + (ATTR | RS6000_BTC_SPECIAL), /* ATTR */ \ + CODE_FOR_nothing) /* ICODE */ + + +/* Decimal floating point builtins for instructions. */ +#define BU_DFP_MISC_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_DFP, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_DFP_MISC_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_DFP, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + + +/* Miscellaneous builtins for instructions added in ISA 2.06. These + instructions don't require either the DFP or VSX options, just the basic ISA + 2.06 (popcntd) enablement since they operate on general purpose + registers. */ +#define BU_P7_MISC_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_POPCNTD, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P7_MISC_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_POPCNTD, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + + +/* Miscellaneous builtins for instructions added in ISA 2.07. These + instructions do require the ISA 2.07 vector support, but they aren't vector + instructions. */ +#define BU_P8V_MISC_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* 128-bit long double floating point builtins. */ +#define BU_LDBL128_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + (RS6000_BTM_HARD_FLOAT /* MASK */ \ + | RS6000_BTM_LDBL128), \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* IEEE 128-bit floating-point builtins. */ +#define BU_FLOAT128_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_FLOAT128_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Miscellaneous builtins for instructions added in ISA 3.0. These + instructions don't require either the DFP or VSX options, just the basic + ISA 3.0 enablement since they operate on general purpose registers. */ +#define BU_P9_MISC_0(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_0 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9_MISC_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Miscellaneous builtins for instructions added in ISA 3.0. These + instructions don't require either the DFP or VSX options, just the basic + ISA 3.0 enablement since they operate on general purpose registers, + and they require 64-bit addressing. */ +#define BU_P9_64BIT_MISC_0(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_0 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC \ + | RS6000_BTM_64BIT, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Miscellaneous builtins for decimal floating point instructions + added in ISA 3.0. These instructions don't require the VSX + options, just the basic ISA 3.0 enablement since they operate on + general purpose registers. */ +#define BU_P9_DFP_MISC_0(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_0 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9_DFP_MISC_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9_DFP_MISC_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Decimal floating point overloaded functions added in ISA 3.0 */ +#define BU_P9_DFP_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P9_BUILTIN_DFP_ ## ENUM, /* ENUM */ \ + "__builtin_dfp_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9_DFP_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P9_BUILTIN_DFP_ ## ENUM, /* ENUM */ \ + "__builtin_dfp_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9_DFP_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (P9_BUILTIN_DFP_ ## ENUM, /* ENUM */ \ + "__builtin_dfp_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +/* ISA 3.0 (power9) vector convenience macros. */ +/* For the instructions that are encoded as altivec instructions use + __builtin_altivec_ as the builtin name. */ +#define BU_P9V_AV_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_AV_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_AV_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* See the comment on BU_ALTIVEC_P. */ +#define BU_P9V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_AV_X(ENUM, NAME, ATTR) \ + RS6000_BUILTIN_X (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9V_64BIT_AV_X(ENUM, NAME, ATTR) \ + RS6000_BUILTIN_X (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + (RS6000_BTM_P9_VECTOR \ + | RS6000_BTM_64BIT), /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_nothing) /* ICODE */ + +/* For the instructions encoded as VSX instructions use __builtin_vsx as the + builtin name. */ +#define BU_P9V_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_64BIT_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + (RS6000_BTM_64BIT \ + | RS6000_BTM_P9_VECTOR), /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_VSX_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_64BIT_VSX_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + (RS6000_BTM_64BIT \ + | RS6000_BTM_P9_VECTOR), /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_VSX_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_64BIT_VSX_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + (RS6000_BTM_64BIT \ + | RS6000_BTM_P9_VECTOR), /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* See the comment on BU_ALTIVEC_P. */ +#define BU_P9V_OVERLOAD_P(ENUM, NAME) \ + RS6000_BUILTIN_P (P9V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_ALTIVEC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9_BUILTIN_SCALAR_ ## ENUM, /* ENUM */ \ + "__builtin_scalar_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9_64BIT_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9_BUILTIN_SCALAR_ ## ENUM, /* ENUM */ \ + "__builtin_scalar_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR \ + | RS6000_BTM_64BIT, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9V_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9V_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (P9V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P9_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#endif + + +/* Insure 0 is not a legitimate index. */ +BU_SPECIAL_X (RS6000_BUILTIN_NONE, NULL, 0, RS6000_BTC_MISC) + +/* 3 argument Altivec builtins. */ +BU_ALTIVEC_3 (VMADDFP, "vmaddfp", FP, fmav4sf4) +BU_ALTIVEC_3 (VMHADDSHS, "vmhaddshs", SAT, altivec_vmhaddshs) +BU_ALTIVEC_3 (VMHRADDSHS, "vmhraddshs", SAT, altivec_vmhraddshs) +BU_ALTIVEC_3 (VMLADDUHM, "vmladduhm", CONST, altivec_vmladduhm) +BU_ALTIVEC_3 (VMSUMUBM, "vmsumubm", CONST, altivec_vmsumubm) +BU_ALTIVEC_3 (VMSUMMBM, "vmsummbm", CONST, altivec_vmsummbm) +BU_ALTIVEC_3 (VMSUMUHM, "vmsumuhm", CONST, altivec_vmsumuhm) +BU_ALTIVEC_3 (VMSUMSHM, "vmsumshm", CONST, altivec_vmsumshm) +BU_ALTIVEC_3 (VMSUMUHS, "vmsumuhs", SAT, altivec_vmsumuhs) +BU_ALTIVEC_3 (VMSUMSHS, "vmsumshs", SAT, altivec_vmsumshs) +BU_ALTIVEC_3 (VNMSUBFP, "vnmsubfp", FP, nfmsv4sf4) +BU_ALTIVEC_3 (VPERM_1TI, "vperm_1ti", CONST, altivec_vperm_v1ti) +BU_ALTIVEC_3 (VPERM_2DF, "vperm_2df", CONST, altivec_vperm_v2df) +BU_ALTIVEC_3 (VPERM_2DI, "vperm_2di", CONST, altivec_vperm_v2di) +BU_ALTIVEC_3 (VPERM_4SF, "vperm_4sf", CONST, altivec_vperm_v4sf) +BU_ALTIVEC_3 (VPERM_4SI, "vperm_4si", CONST, altivec_vperm_v4si) +BU_ALTIVEC_3 (VPERM_8HI, "vperm_8hi", CONST, altivec_vperm_v8hi) +BU_ALTIVEC_3 (VPERM_16QI, "vperm_16qi", CONST, altivec_vperm_v16qi_uns) +BU_ALTIVEC_3 (VPERM_1TI_UNS, "vperm_1ti_uns", CONST, altivec_vperm_v1ti_uns) +BU_ALTIVEC_3 (VPERM_2DI_UNS, "vperm_2di_uns", CONST, altivec_vperm_v2di_uns) +BU_ALTIVEC_3 (VPERM_4SI_UNS, "vperm_4si_uns", CONST, altivec_vperm_v4si_uns) +BU_ALTIVEC_3 (VPERM_8HI_UNS, "vperm_8hi_uns", CONST, altivec_vperm_v8hi_uns) +BU_ALTIVEC_3 (VPERM_16QI_UNS, "vperm_16qi_uns", CONST, altivec_vperm_v16qi_uns) +BU_ALTIVEC_3 (VSEL_4SF, "vsel_4sf", CONST, vector_select_v4sf) +BU_ALTIVEC_3 (VSEL_4SI, "vsel_4si", CONST, vector_select_v4si) +BU_ALTIVEC_3 (VSEL_8HI, "vsel_8hi", CONST, vector_select_v8hi) +BU_ALTIVEC_3 (VSEL_16QI, "vsel_16qi", CONST, vector_select_v16qi) +BU_ALTIVEC_3 (VSEL_2DF, "vsel_2df", CONST, vector_select_v2df) +BU_ALTIVEC_3 (VSEL_2DI, "vsel_2di", CONST, vector_select_v2di) +BU_ALTIVEC_3 (VSEL_1TI, "vsel_1ti", CONST, vector_select_v1ti) +BU_ALTIVEC_3 (VSEL_4SI_UNS, "vsel_4si_uns", CONST, vector_select_v4si_uns) +BU_ALTIVEC_3 (VSEL_8HI_UNS, "vsel_8hi_uns", CONST, vector_select_v8hi_uns) +BU_ALTIVEC_3 (VSEL_16QI_UNS, "vsel_16qi_uns", CONST, vector_select_v16qi_uns) +BU_ALTIVEC_3 (VSEL_2DI_UNS, "vsel_2di_uns", CONST, vector_select_v2di_uns) +BU_ALTIVEC_3 (VSEL_1TI_UNS, "vsel_1ti_uns", CONST, vector_select_v1ti_uns) +BU_ALTIVEC_3 (VSLDOI_16QI, "vsldoi_16qi", CONST, altivec_vsldoi_v16qi) +BU_ALTIVEC_3 (VSLDOI_8HI, "vsldoi_8hi", CONST, altivec_vsldoi_v8hi) +BU_ALTIVEC_3 (VSLDOI_4SI, "vsldoi_4si", CONST, altivec_vsldoi_v4si) +BU_ALTIVEC_3 (VSLDOI_4SF, "vsldoi_4sf", CONST, altivec_vsldoi_v4sf) +BU_ALTIVEC_3 (VSLDOI_2DF, "vsldoi_2df", CONST, altivec_vsldoi_v2df) + +/* Altivec DST builtins. */ +BU_ALTIVEC_D (DST, "dst", MISC, altivec_dst) +BU_ALTIVEC_D (DSTT, "dstt", MISC, altivec_dstt) +BU_ALTIVEC_D (DSTST, "dstst", MISC, altivec_dstst) +BU_ALTIVEC_D (DSTSTT, "dststt", MISC, altivec_dststt) + +/* Altivec 2 argument builtin functions. */ +BU_ALTIVEC_2 (VADDUBM, "vaddubm", CONST, addv16qi3) +BU_ALTIVEC_2 (VADDUHM, "vadduhm", CONST, addv8hi3) +BU_ALTIVEC_2 (VADDUWM, "vadduwm", CONST, addv4si3) +BU_ALTIVEC_2 (VADDFP, "vaddfp", CONST, addv4sf3) +BU_ALTIVEC_2 (VADDCUW, "vaddcuw", CONST, altivec_vaddcuw) +BU_ALTIVEC_2 (VADDUBS, "vaddubs", CONST, altivec_vaddubs) +BU_ALTIVEC_2 (VADDSBS, "vaddsbs", CONST, altivec_vaddsbs) +BU_ALTIVEC_2 (VADDUHS, "vadduhs", CONST, altivec_vadduhs) +BU_ALTIVEC_2 (VADDSHS, "vaddshs", CONST, altivec_vaddshs) +BU_ALTIVEC_2 (VADDUWS, "vadduws", CONST, altivec_vadduws) +BU_ALTIVEC_2 (VADDSWS, "vaddsws", CONST, altivec_vaddsws) +BU_ALTIVEC_2 (VAND, "vand", CONST, andv4si3) +BU_ALTIVEC_2 (VANDC, "vandc", CONST, andcv4si3) +BU_ALTIVEC_2 (VAVGUB, "vavgub", CONST, altivec_vavgub) +BU_ALTIVEC_2 (VAVGSB, "vavgsb", CONST, altivec_vavgsb) +BU_ALTIVEC_2 (VAVGUH, "vavguh", CONST, altivec_vavguh) +BU_ALTIVEC_2 (VAVGSH, "vavgsh", CONST, altivec_vavgsh) +BU_ALTIVEC_2 (VAVGUW, "vavguw", CONST, altivec_vavguw) +BU_ALTIVEC_2 (VAVGSW, "vavgsw", CONST, altivec_vavgsw) +BU_ALTIVEC_2 (VCFUX, "vcfux", CONST, altivec_vcfux) +BU_ALTIVEC_2 (VCFSX, "vcfsx", CONST, altivec_vcfsx) +BU_ALTIVEC_2 (VCMPBFP, "vcmpbfp", CONST, altivec_vcmpbfp) +BU_ALTIVEC_2 (VCMPEQUB, "vcmpequb", CONST, vector_eqv16qi) +BU_ALTIVEC_2 (VCMPEQUH, "vcmpequh", CONST, vector_eqv8hi) +BU_ALTIVEC_2 (VCMPEQUW, "vcmpequw", CONST, vector_eqv4si) +BU_ALTIVEC_2 (VCMPEQFP, "vcmpeqfp", CONST, vector_eqv4sf) +BU_ALTIVEC_2 (VCMPGEFP, "vcmpgefp", CONST, vector_gev4sf) +BU_ALTIVEC_2 (VCMPGTUB, "vcmpgtub", CONST, vector_gtuv16qi) +BU_ALTIVEC_2 (VCMPGTSB, "vcmpgtsb", CONST, vector_gtv16qi) +BU_ALTIVEC_2 (VCMPGTUH, "vcmpgtuh", CONST, vector_gtuv8hi) +BU_ALTIVEC_2 (VCMPGTSH, "vcmpgtsh", CONST, vector_gtv8hi) +BU_ALTIVEC_2 (VCMPGTUW, "vcmpgtuw", CONST, vector_gtuv4si) +BU_ALTIVEC_2 (VCMPGTSW, "vcmpgtsw", CONST, vector_gtv4si) +BU_ALTIVEC_2 (VCMPGTFP, "vcmpgtfp", CONST, vector_gtv4sf) +BU_ALTIVEC_2 (VCTSXS, "vctsxs", CONST, altivec_vctsxs) +BU_ALTIVEC_2 (VCTUXS, "vctuxs", CONST, altivec_vctuxs) +BU_ALTIVEC_2 (VMAXUB, "vmaxub", CONST, umaxv16qi3) +BU_ALTIVEC_2 (VMAXSB, "vmaxsb", CONST, smaxv16qi3) +BU_ALTIVEC_2 (VMAXUH, "vmaxuh", CONST, umaxv8hi3) +BU_ALTIVEC_2 (VMAXSH, "vmaxsh", CONST, smaxv8hi3) +BU_ALTIVEC_2 (VMAXUW, "vmaxuw", CONST, umaxv4si3) +BU_ALTIVEC_2 (VMAXSW, "vmaxsw", CONST, smaxv4si3) +BU_ALTIVEC_2 (VMAXFP, "vmaxfp", CONST, smaxv4sf3) +BU_ALTIVEC_2 (VMRGHB, "vmrghb", CONST, altivec_vmrghb) +BU_ALTIVEC_2 (VMRGHH, "vmrghh", CONST, altivec_vmrghh) +BU_ALTIVEC_2 (VMRGHW, "vmrghw", CONST, altivec_vmrghw) +BU_ALTIVEC_2 (VMRGLB, "vmrglb", CONST, altivec_vmrglb) +BU_ALTIVEC_2 (VMRGLH, "vmrglh", CONST, altivec_vmrglh) +BU_ALTIVEC_2 (VMRGLW, "vmrglw", CONST, altivec_vmrglw) +BU_ALTIVEC_2 (VMINUB, "vminub", CONST, uminv16qi3) +BU_ALTIVEC_2 (VMINSB, "vminsb", CONST, sminv16qi3) +BU_ALTIVEC_2 (VMINUH, "vminuh", CONST, uminv8hi3) +BU_ALTIVEC_2 (VMINSH, "vminsh", CONST, sminv8hi3) +BU_ALTIVEC_2 (VMINUW, "vminuw", CONST, uminv4si3) +BU_ALTIVEC_2 (VMINSW, "vminsw", CONST, sminv4si3) +BU_ALTIVEC_2 (VMINFP, "vminfp", CONST, sminv4sf3) +BU_ALTIVEC_2 (VMULEUB, "vmuleub", CONST, vec_widen_umult_even_v16qi) +BU_ALTIVEC_2 (VMULESB, "vmulesb", CONST, vec_widen_smult_even_v16qi) +BU_ALTIVEC_2 (VMULEUH, "vmuleuh", CONST, vec_widen_umult_even_v8hi) +BU_ALTIVEC_2 (VMULESH, "vmulesh", CONST, vec_widen_smult_even_v8hi) +BU_ALTIVEC_2 (VMULOUB, "vmuloub", CONST, vec_widen_umult_odd_v16qi) +BU_ALTIVEC_2 (VMULOSB, "vmulosb", CONST, vec_widen_smult_odd_v16qi) +BU_ALTIVEC_2 (VMULOUH, "vmulouh", CONST, vec_widen_umult_odd_v8hi) +BU_ALTIVEC_2 (VMULOSH, "vmulosh", CONST, vec_widen_smult_odd_v8hi) +BU_ALTIVEC_2 (VNOR, "vnor", CONST, norv4si3) +BU_ALTIVEC_2 (VOR, "vor", CONST, iorv4si3) +BU_ALTIVEC_2 (VPKUHUM, "vpkuhum", CONST, altivec_vpkuhum) +BU_ALTIVEC_2 (VPKUWUM, "vpkuwum", CONST, altivec_vpkuwum) +BU_ALTIVEC_2 (VPKPX, "vpkpx", CONST, altivec_vpkpx) +BU_ALTIVEC_2 (VPKSHSS, "vpkshss", CONST, altivec_vpkshss) +BU_ALTIVEC_2 (VPKSWSS, "vpkswss", CONST, altivec_vpkswss) +BU_ALTIVEC_2 (VPKUHUS, "vpkuhus", CONST, altivec_vpkuhus) +BU_ALTIVEC_2 (VPKSHUS, "vpkshus", CONST, altivec_vpkshus) +BU_ALTIVEC_2 (VPKUWUS, "vpkuwus", CONST, altivec_vpkuwus) +BU_ALTIVEC_2 (VPKSWUS, "vpkswus", CONST, altivec_vpkswus) +BU_ALTIVEC_2 (VRECIPFP, "vrecipdivfp", CONST, recipv4sf3) +BU_ALTIVEC_2 (VRLB, "vrlb", CONST, vrotlv16qi3) +BU_ALTIVEC_2 (VRLH, "vrlh", CONST, vrotlv8hi3) +BU_ALTIVEC_2 (VRLW, "vrlw", CONST, vrotlv4si3) +BU_ALTIVEC_2 (VSLB, "vslb", CONST, vashlv16qi3) +BU_ALTIVEC_2 (VSLH, "vslh", CONST, vashlv8hi3) +BU_ALTIVEC_2 (VSLW, "vslw", CONST, vashlv4si3) +BU_ALTIVEC_2 (VSL, "vsl", CONST, altivec_vsl) +BU_ALTIVEC_2 (VSLO, "vslo", CONST, altivec_vslo) +BU_ALTIVEC_2 (VSPLTB, "vspltb", CONST, altivec_vspltb) +BU_ALTIVEC_2 (VSPLTH, "vsplth", CONST, altivec_vsplth) +BU_ALTIVEC_2 (VSPLTW, "vspltw", CONST, altivec_vspltw) +BU_ALTIVEC_2 (VSRB, "vsrb", CONST, vlshrv16qi3) +BU_ALTIVEC_2 (VSRH, "vsrh", CONST, vlshrv8hi3) +BU_ALTIVEC_2 (VSRW, "vsrw", CONST, vlshrv4si3) +BU_ALTIVEC_2 (VSRAB, "vsrab", CONST, vashrv16qi3) +BU_ALTIVEC_2 (VSRAH, "vsrah", CONST, vashrv8hi3) +BU_ALTIVEC_2 (VSRAW, "vsraw", CONST, vashrv4si3) +BU_ALTIVEC_2 (VSR, "vsr", CONST, altivec_vsr) +BU_ALTIVEC_2 (VSRO, "vsro", CONST, altivec_vsro) +BU_ALTIVEC_2 (VSUBUBM, "vsububm", CONST, subv16qi3) +BU_ALTIVEC_2 (VSUBUHM, "vsubuhm", CONST, subv8hi3) +BU_ALTIVEC_2 (VSUBUWM, "vsubuwm", CONST, subv4si3) +BU_ALTIVEC_2 (VSUBFP, "vsubfp", CONST, subv4sf3) +BU_ALTIVEC_2 (VSUBCUW, "vsubcuw", CONST, altivec_vsubcuw) +BU_ALTIVEC_2 (VSUBUBS, "vsububs", CONST, altivec_vsububs) +BU_ALTIVEC_2 (VSUBSBS, "vsubsbs", CONST, altivec_vsubsbs) +BU_ALTIVEC_2 (VSUBUHS, "vsubuhs", CONST, altivec_vsubuhs) +BU_ALTIVEC_2 (VSUBSHS, "vsubshs", CONST, altivec_vsubshs) +BU_ALTIVEC_2 (VSUBUWS, "vsubuws", CONST, altivec_vsubuws) +BU_ALTIVEC_2 (VSUBSWS, "vsubsws", CONST, altivec_vsubsws) +BU_ALTIVEC_2 (VSUM4UBS, "vsum4ubs", CONST, altivec_vsum4ubs) +BU_ALTIVEC_2 (VSUM4SBS, "vsum4sbs", CONST, altivec_vsum4sbs) +BU_ALTIVEC_2 (VSUM4SHS, "vsum4shs", CONST, altivec_vsum4shs) +BU_ALTIVEC_2 (VSUM2SWS, "vsum2sws", CONST, altivec_vsum2sws) +BU_ALTIVEC_2 (VSUMSWS, "vsumsws", CONST, altivec_vsumsws) +BU_ALTIVEC_2 (VXOR, "vxor", CONST, xorv4si3) +BU_ALTIVEC_2 (COPYSIGN_V4SF, "copysignfp", CONST, vector_copysignv4sf3) + +/* Altivec ABS functions. */ +BU_ALTIVEC_A (ABS_V4SI, "abs_v4si", CONST, absv4si2) +BU_ALTIVEC_A (ABS_V8HI, "abs_v8hi", CONST, absv8hi2) +BU_ALTIVEC_A (ABS_V4SF, "abs_v4sf", CONST, absv4sf2) +BU_ALTIVEC_A (ABS_V16QI, "abs_v16qi", CONST, absv16qi2) +BU_ALTIVEC_A (ABSS_V4SI, "abss_v4si", SAT, altivec_abss_v4si) +BU_ALTIVEC_A (ABSS_V8HI, "abss_v8hi", SAT, altivec_abss_v8hi) +BU_ALTIVEC_A (ABSS_V16QI, "abss_v16qi", SAT, altivec_abss_v16qi) + +/* Altivec NABS functions. */ +BU_ALTIVEC_A (NABS_V2DI, "nabs_v2di", CONST, nabsv2di2) +BU_ALTIVEC_A (NABS_V4SI, "nabs_v4si", CONST, nabsv4si2) +BU_ALTIVEC_A (NABS_V8HI, "nabs_v8hi", CONST, nabsv8hi2) +BU_ALTIVEC_A (NABS_V16QI, "nabs_v16qi", CONST, nabsv16qi2) +BU_ALTIVEC_A (NABS_V4SF, "nabs_v4sf", CONST, vsx_nabsv4sf2) +BU_ALTIVEC_A (NABS_V2DF, "nabs_v2df", CONST, vsx_nabsv2df2) + +/* Altivec NEG functions. */ +BU_ALTIVEC_A (NEG_V2DI, "neg_v2di", CONST, negv2di2) +BU_ALTIVEC_A (NEG_V4SI, "neg_v4si", CONST, negv4si2) +BU_ALTIVEC_A (NEG_V8HI, "neg_v8hi", CONST, negv8hi2) +BU_ALTIVEC_A (NEG_V16QI, "neg_v16qi", CONST, negv16qi2) +BU_ALTIVEC_A (NEG_V4SF, "neg_v4sf", CONST, negv4sf2) +BU_ALTIVEC_A (NEG_V2DF, "neg_v2df", CONST, negv2df2) + +/* 1 argument Altivec builtin functions. */ +BU_ALTIVEC_1 (VEXPTEFP, "vexptefp", FP, altivec_vexptefp) +BU_ALTIVEC_1 (VLOGEFP, "vlogefp", FP, altivec_vlogefp) +BU_ALTIVEC_1 (VREFP, "vrefp", FP, rev4sf2) +BU_ALTIVEC_1 (VRFIM, "vrfim", FP, vector_floorv4sf2) +BU_ALTIVEC_1 (VRFIN, "vrfin", FP, altivec_vrfin) +BU_ALTIVEC_1 (VRFIP, "vrfip", FP, vector_ceilv4sf2) +BU_ALTIVEC_1 (VRFIZ, "vrfiz", FP, vector_btruncv4sf2) +BU_ALTIVEC_1 (VRSQRTFP, "vrsqrtfp", FP, rsqrtv4sf2) +BU_ALTIVEC_1 (VRSQRTEFP, "vrsqrtefp", FP, rsqrtev4sf2) +BU_ALTIVEC_1 (VSPLTISB, "vspltisb", CONST, altivec_vspltisb) +BU_ALTIVEC_1 (VSPLTISH, "vspltish", CONST, altivec_vspltish) +BU_ALTIVEC_1 (VSPLTISW, "vspltisw", CONST, altivec_vspltisw) +BU_ALTIVEC_1 (VUPKHSB, "vupkhsb", CONST, altivec_vupkhsb) +BU_ALTIVEC_1 (VUPKHPX, "vupkhpx", CONST, altivec_vupkhpx) +BU_ALTIVEC_1 (VUPKHSH, "vupkhsh", CONST, altivec_vupkhsh) +BU_ALTIVEC_1 (VUPKLSB, "vupklsb", CONST, altivec_vupklsb) +BU_ALTIVEC_1 (VUPKLPX, "vupklpx", CONST, altivec_vupklpx) +BU_ALTIVEC_1 (VUPKLSH, "vupklsh", CONST, altivec_vupklsh) + +BU_ALTIVEC_1 (FLOAT_V4SI_V4SF, "float_sisf", FP, floatv4siv4sf2) +BU_ALTIVEC_1 (UNSFLOAT_V4SI_V4SF, "uns_float_sisf", FP, floatunsv4siv4sf2) +BU_ALTIVEC_1 (FIX_V4SF_V4SI, "fix_sfsi", FP, fix_truncv4sfv4si2) +BU_ALTIVEC_1 (FIXUNS_V4SF_V4SI, "fixuns_sfsi", FP, fixuns_truncv4sfv4si2) + +/* Altivec predicate functions. */ +BU_ALTIVEC_P (VCMPBFP_P, "vcmpbfp_p", CONST, altivec_vcmpbfp_p) +BU_ALTIVEC_P (VCMPEQFP_P, "vcmpeqfp_p", CONST, vector_eq_v4sf_p) +BU_ALTIVEC_P (VCMPGEFP_P, "vcmpgefp_p", CONST, vector_ge_v4sf_p) +BU_ALTIVEC_P (VCMPGTFP_P, "vcmpgtfp_p", CONST, vector_gt_v4sf_p) +BU_ALTIVEC_P (VCMPEQUW_P, "vcmpequw_p", CONST, vector_eq_v4si_p) +BU_ALTIVEC_P (VCMPGTSW_P, "vcmpgtsw_p", CONST, vector_gt_v4si_p) +BU_ALTIVEC_P (VCMPGTUW_P, "vcmpgtuw_p", CONST, vector_gtu_v4si_p) +BU_ALTIVEC_P (VCMPEQUH_P, "vcmpequh_p", CONST, vector_eq_v8hi_p) +BU_ALTIVEC_P (VCMPGTSH_P, "vcmpgtsh_p", CONST, vector_gt_v8hi_p) +BU_ALTIVEC_P (VCMPGTUH_P, "vcmpgtuh_p", CONST, vector_gtu_v8hi_p) +BU_ALTIVEC_P (VCMPEQUB_P, "vcmpequb_p", CONST, vector_eq_v16qi_p) +BU_ALTIVEC_P (VCMPGTSB_P, "vcmpgtsb_p", CONST, vector_gt_v16qi_p) +BU_ALTIVEC_P (VCMPGTUB_P, "vcmpgtub_p", CONST, vector_gtu_v16qi_p) + +/* AltiVec builtins that are handled as special cases. */ +BU_ALTIVEC_X (ST_INTERNAL_4si, "st_internal_4si", MEM) +BU_ALTIVEC_X (LD_INTERNAL_4si, "ld_internal_4si", MEM) +BU_ALTIVEC_X (ST_INTERNAL_8hi, "st_internal_8hi", MEM) +BU_ALTIVEC_X (LD_INTERNAL_8hi, "ld_internal_8hi", MEM) +BU_ALTIVEC_X (ST_INTERNAL_16qi, "st_internal_16qi", MEM) +BU_ALTIVEC_X (LD_INTERNAL_16qi, "ld_internal_16qi", MEM) +BU_ALTIVEC_X (ST_INTERNAL_4sf, "st_internal_16qi", MEM) +BU_ALTIVEC_X (LD_INTERNAL_4sf, "ld_internal_4sf", MEM) +BU_ALTIVEC_X (ST_INTERNAL_2df, "st_internal_4sf", MEM) +BU_ALTIVEC_X (LD_INTERNAL_2df, "ld_internal_2df", MEM) +BU_ALTIVEC_X (ST_INTERNAL_2di, "st_internal_2di", MEM) +BU_ALTIVEC_X (LD_INTERNAL_2di, "ld_internal_2di", MEM) +BU_ALTIVEC_X (ST_INTERNAL_1ti, "st_internal_1ti", MEM) +BU_ALTIVEC_X (LD_INTERNAL_1ti, "ld_internal_1ti", MEM) +BU_ALTIVEC_X (MTVSCR, "mtvscr", MISC) +BU_ALTIVEC_X (MFVSCR, "mfvscr", MISC) +BU_ALTIVEC_X (DSSALL, "dssall", MISC) +BU_ALTIVEC_X (DSS, "dss", MISC) +BU_ALTIVEC_X (LVSL, "lvsl", MEM) +BU_ALTIVEC_X (LVSR, "lvsr", MEM) +BU_ALTIVEC_X (LVEBX, "lvebx", MEM) +BU_ALTIVEC_X (LVEHX, "lvehx", MEM) +BU_ALTIVEC_X (LVEWX, "lvewx", MEM) +BU_ALTIVEC_X (LVXL, "lvxl", MEM) +BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df", MEM) +BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di", MEM) +BU_ALTIVEC_X (LVXL_V4SF, "lvxl_v4sf", MEM) +BU_ALTIVEC_X (LVXL_V4SI, "lvxl_v4si", MEM) +BU_ALTIVEC_X (LVXL_V8HI, "lvxl_v8hi", MEM) +BU_ALTIVEC_X (LVXL_V16QI, "lvxl_v16qi", MEM) +BU_ALTIVEC_X (LVX, "lvx", MEM) +BU_ALTIVEC_X (LVX_V2DF, "lvx_v2df", MEM) +BU_ALTIVEC_X (LVX_V2DI, "lvx_v2di", MEM) +BU_ALTIVEC_X (LVX_V4SF, "lvx_v4sf", MEM) +BU_ALTIVEC_X (LVX_V4SI, "lvx_v4si", MEM) +BU_ALTIVEC_X (LVX_V8HI, "lvx_v8hi", MEM) +BU_ALTIVEC_X (LVX_V16QI, "lvx_v16qi", MEM) +BU_ALTIVEC_X (STVX, "stvx", MEM) +BU_ALTIVEC_X (STVX_V2DF, "stvx_v2df", MEM) +BU_ALTIVEC_X (STVX_V2DI, "stvx_v2di", MEM) +BU_ALTIVEC_X (STVX_V4SF, "stvx_v4sf", MEM) +BU_ALTIVEC_X (STVX_V4SI, "stvx_v4si", MEM) +BU_ALTIVEC_X (STVX_V8HI, "stvx_v8hi", MEM) +BU_ALTIVEC_X (STVX_V16QI, "stvx_v16qi", MEM) +BU_ALTIVEC_C (LVLX, "lvlx", MEM) +BU_ALTIVEC_C (LVLXL, "lvlxl", MEM) +BU_ALTIVEC_C (LVRX, "lvrx", MEM) +BU_ALTIVEC_C (LVRXL, "lvrxl", MEM) +BU_ALTIVEC_X (STVEBX, "stvebx", MEM) +BU_ALTIVEC_X (STVEHX, "stvehx", MEM) +BU_ALTIVEC_X (STVEWX, "stvewx", MEM) +BU_ALTIVEC_X (STVXL, "stvxl", MEM) +BU_ALTIVEC_X (STVXL_V2DF, "stvxl_v2df", MEM) +BU_ALTIVEC_X (STVXL_V2DI, "stvxl_v2di", MEM) +BU_ALTIVEC_X (STVXL_V4SF, "stvxl_v4sf", MEM) +BU_ALTIVEC_X (STVXL_V4SI, "stvxl_v4si", MEM) +BU_ALTIVEC_X (STVXL_V8HI, "stvxl_v8hi", MEM) +BU_ALTIVEC_X (STVXL_V16QI, "stvxl_v16qi", MEM) +BU_ALTIVEC_C (STVLX, "stvlx", MEM) +BU_ALTIVEC_C (STVLXL, "stvlxl", MEM) +BU_ALTIVEC_C (STVRX, "stvrx", MEM) +BU_ALTIVEC_C (STVRXL, "stvrxl", MEM) +BU_ALTIVEC_X (MASK_FOR_LOAD, "mask_for_load", MISC) +BU_ALTIVEC_X (MASK_FOR_STORE, "mask_for_store", MISC) +BU_ALTIVEC_X (VEC_INIT_V4SI, "vec_init_v4si", CONST) +BU_ALTIVEC_X (VEC_INIT_V8HI, "vec_init_v8hi", CONST) +BU_ALTIVEC_X (VEC_INIT_V16QI, "vec_init_v16qi", CONST) +BU_ALTIVEC_X (VEC_INIT_V4SF, "vec_init_v4sf", CONST) +BU_ALTIVEC_X (VEC_SET_V4SI, "vec_set_v4si", CONST) +BU_ALTIVEC_X (VEC_SET_V8HI, "vec_set_v8hi", CONST) +BU_ALTIVEC_X (VEC_SET_V16QI, "vec_set_v16qi", CONST) +BU_ALTIVEC_X (VEC_SET_V4SF, "vec_set_v4sf", CONST) +BU_ALTIVEC_X (VEC_EXT_V4SI, "vec_ext_v4si", CONST) +BU_ALTIVEC_X (VEC_EXT_V8HI, "vec_ext_v8hi", CONST) +BU_ALTIVEC_X (VEC_EXT_V16QI, "vec_ext_v16qi", CONST) +BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CONST) + +/* Altivec overloaded builtins. */ +/* For now, don't set the classification for overloaded functions. + The function should be converted to the type specific instruction + before we get to the point about classifying the builtin type. */ + +/* 3 argument Altivec overloaded builtins. */ +BU_ALTIVEC_OVERLOAD_3 (MADD, "madd") +BU_ALTIVEC_OVERLOAD_3 (MADDS, "madds") +BU_ALTIVEC_OVERLOAD_3 (MLADD, "mladd") +BU_ALTIVEC_OVERLOAD_3 (MRADDS, "mradds") +BU_ALTIVEC_OVERLOAD_3 (MSUM, "msum") +BU_ALTIVEC_OVERLOAD_3 (MSUMS, "msums") +BU_ALTIVEC_OVERLOAD_3 (NMSUB, "nmsub") +BU_ALTIVEC_OVERLOAD_3 (PERM, "perm") +BU_ALTIVEC_OVERLOAD_3 (SEL, "sel") +BU_ALTIVEC_OVERLOAD_3 (VMSUMMBM, "vmsummbm") +BU_ALTIVEC_OVERLOAD_3 (VMSUMSHM, "vmsumshm") +BU_ALTIVEC_OVERLOAD_3 (VMSUMSHS, "vmsumshs") +BU_ALTIVEC_OVERLOAD_3 (VMSUMUBM, "vmsumubm") +BU_ALTIVEC_OVERLOAD_3 (VMSUMUHM, "vmsumuhm") +BU_ALTIVEC_OVERLOAD_3 (VMSUMUHS, "vmsumuhs") + +/* Altivec DST overloaded builtins. */ +BU_ALTIVEC_OVERLOAD_D (DST, "dst") +BU_ALTIVEC_OVERLOAD_D (DSTT, "dstt") +BU_ALTIVEC_OVERLOAD_D (DSTST, "dstst") +BU_ALTIVEC_OVERLOAD_D (DSTSTT, "dststt") + +/* 2 argument Altivec overloaded builtins. */ +BU_ALTIVEC_OVERLOAD_2 (ADD, "add") +BU_ALTIVEC_OVERLOAD_2 (ADDC, "addc") +BU_ALTIVEC_OVERLOAD_2 (ADDS, "adds") +BU_ALTIVEC_OVERLOAD_2 (AND, "and") +BU_ALTIVEC_OVERLOAD_2 (ANDC, "andc") +BU_ALTIVEC_OVERLOAD_2 (AVG, "avg") +BU_ALTIVEC_OVERLOAD_2 (CMPB, "cmpb") +BU_ALTIVEC_OVERLOAD_2 (CMPEQ, "cmpeq") +BU_ALTIVEC_OVERLOAD_2 (CMPGE, "cmpge") +BU_ALTIVEC_OVERLOAD_2 (CMPGT, "cmpgt") +BU_ALTIVEC_OVERLOAD_2 (CMPLE, "cmple") +BU_ALTIVEC_OVERLOAD_2 (CMPLT, "cmplt") +BU_ALTIVEC_OVERLOAD_2 (COPYSIGN, "copysign") +BU_ALTIVEC_OVERLOAD_2 (MAX, "max") +BU_ALTIVEC_OVERLOAD_2 (MERGEH, "mergeh") +BU_ALTIVEC_OVERLOAD_2 (MERGEL, "mergel") +BU_ALTIVEC_OVERLOAD_2 (MIN, "min") +BU_ALTIVEC_OVERLOAD_2 (MULE, "mule") +BU_ALTIVEC_OVERLOAD_2 (MULO, "mulo") +BU_ALTIVEC_OVERLOAD_2 (NOR, "nor") +BU_ALTIVEC_OVERLOAD_2 (OR, "or") +BU_ALTIVEC_OVERLOAD_2 (PACK, "pack") +BU_ALTIVEC_OVERLOAD_2 (PACKPX, "packpx") +BU_ALTIVEC_OVERLOAD_2 (PACKS, "packs") +BU_ALTIVEC_OVERLOAD_2 (PACKSU, "packsu") +BU_ALTIVEC_OVERLOAD_2 (RECIP, "recipdiv") +BU_ALTIVEC_OVERLOAD_2 (RL, "rl") +BU_ALTIVEC_OVERLOAD_2 (SL, "sl") +BU_ALTIVEC_OVERLOAD_2 (SLL, "sll") +BU_ALTIVEC_OVERLOAD_2 (SLO, "slo") +BU_ALTIVEC_OVERLOAD_2 (SR, "sr") +BU_ALTIVEC_OVERLOAD_2 (SRA, "sra") +BU_ALTIVEC_OVERLOAD_2 (SRL, "srl") +BU_ALTIVEC_OVERLOAD_2 (SRO, "sro") +BU_ALTIVEC_OVERLOAD_2 (SUB, "sub") +BU_ALTIVEC_OVERLOAD_2 (SUBC, "subc") +BU_ALTIVEC_OVERLOAD_2 (SUBS, "subs") +BU_ALTIVEC_OVERLOAD_2 (SUM2S, "sum2s") +BU_ALTIVEC_OVERLOAD_2 (SUM4S, "sum4s") +BU_ALTIVEC_OVERLOAD_2 (SUMS, "sums") +BU_ALTIVEC_OVERLOAD_2 (VADDFP, "vaddfp") +BU_ALTIVEC_OVERLOAD_2 (VADDSBS, "vaddsbs") +BU_ALTIVEC_OVERLOAD_2 (VADDSHS, "vaddshs") +BU_ALTIVEC_OVERLOAD_2 (VADDSWS, "vaddsws") +BU_ALTIVEC_OVERLOAD_2 (VADDUBM, "vaddubm") +BU_ALTIVEC_OVERLOAD_2 (VADDUBS, "vaddubs") +BU_ALTIVEC_OVERLOAD_2 (VADDUHM, "vadduhm") +BU_ALTIVEC_OVERLOAD_2 (VADDUHS, "vadduhs") +BU_ALTIVEC_OVERLOAD_2 (VADDUWM, "vadduwm") +BU_ALTIVEC_OVERLOAD_2 (VADDUWS, "vadduws") +BU_ALTIVEC_OVERLOAD_2 (VAVGSB, "vavgsb") +BU_ALTIVEC_OVERLOAD_2 (VAVGSH, "vavgsh") +BU_ALTIVEC_OVERLOAD_2 (VAVGSW, "vavgsw") +BU_ALTIVEC_OVERLOAD_2 (VAVGUB, "vavgub") +BU_ALTIVEC_OVERLOAD_2 (VAVGUH, "vavguh") +BU_ALTIVEC_OVERLOAD_2 (VAVGUW, "vavguw") +BU_ALTIVEC_OVERLOAD_2 (VCMPEQFP, "vcmpeqfp") +BU_ALTIVEC_OVERLOAD_2 (VCMPEQUB, "vcmpequb") +BU_ALTIVEC_OVERLOAD_2 (VCMPEQUH, "vcmpequh") +BU_ALTIVEC_OVERLOAD_2 (VCMPEQUW, "vcmpequw") +BU_ALTIVEC_OVERLOAD_2 (VCMPGTFP, "vcmpgtfp") +BU_ALTIVEC_OVERLOAD_2 (VCMPGTSB, "vcmpgtsb") +BU_ALTIVEC_OVERLOAD_2 (VCMPGTSH, "vcmpgtsh") +BU_ALTIVEC_OVERLOAD_2 (VCMPGTSW, "vcmpgtsw") +BU_ALTIVEC_OVERLOAD_2 (VCMPGTUB, "vcmpgtub") +BU_ALTIVEC_OVERLOAD_2 (VCMPGTUH, "vcmpgtuh") +BU_ALTIVEC_OVERLOAD_2 (VCMPGTUW, "vcmpgtuw") +BU_ALTIVEC_OVERLOAD_2 (VMAXFP, "vmaxfp") +BU_ALTIVEC_OVERLOAD_2 (VMAXSB, "vmaxsb") +BU_ALTIVEC_OVERLOAD_2 (VMAXSH, "vmaxsh") +BU_ALTIVEC_OVERLOAD_2 (VMAXSW, "vmaxsw") +BU_ALTIVEC_OVERLOAD_2 (VMAXUB, "vmaxub") +BU_ALTIVEC_OVERLOAD_2 (VMAXUH, "vmaxuh") +BU_ALTIVEC_OVERLOAD_2 (VMAXUW, "vmaxuw") +BU_ALTIVEC_OVERLOAD_2 (VMINFP, "vminfp") +BU_ALTIVEC_OVERLOAD_2 (VMINSB, "vminsb") +BU_ALTIVEC_OVERLOAD_2 (VMINSH, "vminsh") +BU_ALTIVEC_OVERLOAD_2 (VMINSW, "vminsw") +BU_ALTIVEC_OVERLOAD_2 (VMINUB, "vminub") +BU_ALTIVEC_OVERLOAD_2 (VMINUH, "vminuh") +BU_ALTIVEC_OVERLOAD_2 (VMINUW, "vminuw") +BU_ALTIVEC_OVERLOAD_2 (VMRGHB, "vmrghb") +BU_ALTIVEC_OVERLOAD_2 (VMRGHH, "vmrghh") +BU_ALTIVEC_OVERLOAD_2 (VMRGHW, "vmrghw") +BU_ALTIVEC_OVERLOAD_2 (VMRGLB, "vmrglb") +BU_ALTIVEC_OVERLOAD_2 (VMRGLH, "vmrglh") +BU_ALTIVEC_OVERLOAD_2 (VMRGLW, "vmrglw") +BU_ALTIVEC_OVERLOAD_2 (VMULESB, "vmulesb") +BU_ALTIVEC_OVERLOAD_2 (VMULESH, "vmulesh") +BU_ALTIVEC_OVERLOAD_2 (VMULEUB, "vmuleub") +BU_ALTIVEC_OVERLOAD_2 (VMULEUH, "vmuleuh") +BU_ALTIVEC_OVERLOAD_2 (VMULOSB, "vmulosb") +BU_ALTIVEC_OVERLOAD_2 (VMULOSH, "vmulosh") +BU_ALTIVEC_OVERLOAD_2 (VMULOUB, "vmuloub") +BU_ALTIVEC_OVERLOAD_2 (VMULOUH, "vmulouh") +BU_ALTIVEC_OVERLOAD_2 (VPKSHSS, "vpkshss") +BU_ALTIVEC_OVERLOAD_2 (VPKSHUS, "vpkshus") +BU_ALTIVEC_OVERLOAD_2 (VPKSWSS, "vpkswss") +BU_ALTIVEC_OVERLOAD_2 (VPKSWUS, "vpkswus") +BU_ALTIVEC_OVERLOAD_2 (VPKUHUM, "vpkuhum") +BU_ALTIVEC_OVERLOAD_2 (VPKUHUS, "vpkuhus") +BU_ALTIVEC_OVERLOAD_2 (VPKUWUM, "vpkuwum") +BU_ALTIVEC_OVERLOAD_2 (VPKUWUS, "vpkuwus") +BU_ALTIVEC_OVERLOAD_2 (VRLB, "vrlb") +BU_ALTIVEC_OVERLOAD_2 (VRLH, "vrlh") +BU_ALTIVEC_OVERLOAD_2 (VRLW, "vrlw") +BU_ALTIVEC_OVERLOAD_2 (VSLB, "vslb") +BU_ALTIVEC_OVERLOAD_2 (VSLH, "vslh") +BU_ALTIVEC_OVERLOAD_2 (VSLW, "vslw") +BU_ALTIVEC_OVERLOAD_2 (VSRAB, "vsrab") +BU_ALTIVEC_OVERLOAD_2 (VSRAH, "vsrah") +BU_ALTIVEC_OVERLOAD_2 (VSRAW, "vsraw") +BU_ALTIVEC_OVERLOAD_2 (VSRB, "vsrb") +BU_ALTIVEC_OVERLOAD_2 (VSRH, "vsrh") +BU_ALTIVEC_OVERLOAD_2 (VSRW, "vsrw") +BU_ALTIVEC_OVERLOAD_2 (VSUBFP, "vsubfp") +BU_ALTIVEC_OVERLOAD_2 (VSUBSBS, "vsubsbs") +BU_ALTIVEC_OVERLOAD_2 (VSUBSHS, "vsubshs") +BU_ALTIVEC_OVERLOAD_2 (VSUBSWS, "vsubsws") +BU_ALTIVEC_OVERLOAD_2 (VSUBUBM, "vsububm") +BU_ALTIVEC_OVERLOAD_2 (VSUBUBS, "vsububs") +BU_ALTIVEC_OVERLOAD_2 (VSUBUHM, "vsubuhm") +BU_ALTIVEC_OVERLOAD_2 (VSUBUHS, "vsubuhs") +BU_ALTIVEC_OVERLOAD_2 (VSUBUWM, "vsubuwm") +BU_ALTIVEC_OVERLOAD_2 (VSUBUWS, "vsubuws") +BU_ALTIVEC_OVERLOAD_2 (VSUM4SBS, "vsum4sbs") +BU_ALTIVEC_OVERLOAD_2 (VSUM4SHS, "vsum4shs") +BU_ALTIVEC_OVERLOAD_2 (VSUM4UBS, "vsum4ubs") +BU_ALTIVEC_OVERLOAD_2 (XOR, "xor") + +/* 1 argument Altivec overloaded functions. */ +BU_ALTIVEC_OVERLOAD_1 (ABS, "abs") +BU_ALTIVEC_OVERLOAD_1 (NABS, "nabs") +BU_ALTIVEC_OVERLOAD_1 (ABSS, "abss") +BU_ALTIVEC_OVERLOAD_1 (CEIL, "ceil") +BU_ALTIVEC_OVERLOAD_1 (EXPTE, "expte") +BU_ALTIVEC_OVERLOAD_1 (FLOOR, "floor") +BU_ALTIVEC_OVERLOAD_1 (LOGE, "loge") +BU_ALTIVEC_OVERLOAD_1 (MTVSCR, "mtvscr") +BU_ALTIVEC_OVERLOAD_1 (NEARBYINT, "nearbyint") +BU_ALTIVEC_OVERLOAD_1 (NEG, "neg") +BU_ALTIVEC_OVERLOAD_1 (RE, "re") +BU_ALTIVEC_OVERLOAD_1 (RINT, "rint") +BU_ALTIVEC_OVERLOAD_1 (ROUND, "round") +BU_ALTIVEC_OVERLOAD_1 (RSQRT, "rsqrt") +BU_ALTIVEC_OVERLOAD_1 (RSQRTE, "rsqrte") +BU_ALTIVEC_OVERLOAD_1 (SQRT, "sqrt") +BU_ALTIVEC_OVERLOAD_1 (TRUNC, "trunc") +BU_ALTIVEC_OVERLOAD_1 (UNPACKH, "unpackh") +BU_ALTIVEC_OVERLOAD_1 (UNPACKL, "unpackl") +BU_ALTIVEC_OVERLOAD_1 (VUPKHPX, "vupkhpx") +BU_ALTIVEC_OVERLOAD_1 (VUPKHSB, "vupkhsb") +BU_ALTIVEC_OVERLOAD_1 (VUPKHSH, "vupkhsh") +BU_ALTIVEC_OVERLOAD_1 (VUPKLPX, "vupklpx") +BU_ALTIVEC_OVERLOAD_1 (VUPKLSB, "vupklsb") +BU_ALTIVEC_OVERLOAD_1 (VUPKLSH, "vupklsh") + +/* Overloaded altivec predicates. */ +BU_ALTIVEC_OVERLOAD_P (VCMPEQ_P, "vcmpeq_p") +BU_ALTIVEC_OVERLOAD_P (VCMPGT_P, "vcmpgt_p") +BU_ALTIVEC_OVERLOAD_P (VCMPGE_P, "vcmpge_p") + +/* Overloaded Altivec builtins that are handled as special cases. */ +BU_ALTIVEC_OVERLOAD_X (ADDE, "adde") +BU_ALTIVEC_OVERLOAD_X (ADDEC, "addec") +BU_ALTIVEC_OVERLOAD_X (CMPNE, "cmpne") +BU_ALTIVEC_OVERLOAD_X (CTF, "ctf") +BU_ALTIVEC_OVERLOAD_X (CTS, "cts") +BU_ALTIVEC_OVERLOAD_X (CTU, "ctu") +BU_ALTIVEC_OVERLOAD_X (EXTRACT, "extract") +BU_ALTIVEC_OVERLOAD_X (INSERT, "insert") +BU_ALTIVEC_OVERLOAD_X (LD, "ld") +BU_ALTIVEC_OVERLOAD_X (LDE, "lde") +BU_ALTIVEC_OVERLOAD_X (LDL, "ldl") +BU_ALTIVEC_OVERLOAD_X (LVEBX, "lvebx") +BU_ALTIVEC_OVERLOAD_X (LVEHX, "lvehx") +BU_ALTIVEC_OVERLOAD_X (LVEWX, "lvewx") +BU_ALTIVEC_OVERLOAD_X (LVLX, "lvlx") +BU_ALTIVEC_OVERLOAD_X (LVLXL, "lvlxl") +BU_ALTIVEC_OVERLOAD_X (LVRX, "lvrx") +BU_ALTIVEC_OVERLOAD_X (LVRXL, "lvrxl") +BU_ALTIVEC_OVERLOAD_X (LVSL, "lvsl") +BU_ALTIVEC_OVERLOAD_X (LVSR, "lvsr") +BU_ALTIVEC_OVERLOAD_X (MUL, "mul") +BU_ALTIVEC_OVERLOAD_X (PROMOTE, "promote") +BU_ALTIVEC_OVERLOAD_X (SLD, "sld") +BU_ALTIVEC_OVERLOAD_X (SLDW, "sldw") +BU_ALTIVEC_OVERLOAD_X (SPLAT, "splat") +BU_ALTIVEC_OVERLOAD_X (SPLATS, "splats") +BU_ALTIVEC_OVERLOAD_X (ST, "st") +BU_ALTIVEC_OVERLOAD_X (STE, "ste") +BU_ALTIVEC_OVERLOAD_X (STEP, "step") +BU_ALTIVEC_OVERLOAD_X (STL, "stl") +BU_ALTIVEC_OVERLOAD_X (STVEBX, "stvebx") +BU_ALTIVEC_OVERLOAD_X (STVEHX, "stvehx") +BU_ALTIVEC_OVERLOAD_X (STVEWX, "stvewx") +BU_ALTIVEC_OVERLOAD_X (STVLX, "stvlx") +BU_ALTIVEC_OVERLOAD_X (STVLXL, "stvlxl") +BU_ALTIVEC_OVERLOAD_X (STVRX, "stvrx") +BU_ALTIVEC_OVERLOAD_X (STVRXL, "stvrxl") +BU_ALTIVEC_OVERLOAD_X (VCFSX, "vcfsx") +BU_ALTIVEC_OVERLOAD_X (VCFUX, "vcfux") +BU_ALTIVEC_OVERLOAD_X (VSPLTB, "vspltb") +BU_ALTIVEC_OVERLOAD_X (VSPLTH, "vsplth") +BU_ALTIVEC_OVERLOAD_X (VSPLTW, "vspltw") + +/* 3 argument VSX builtins. */ +BU_VSX_3 (XVMADDSP, "xvmaddsp", CONST, fmav4sf4) +BU_VSX_3 (XVMSUBSP, "xvmsubsp", CONST, fmsv4sf4) +BU_VSX_3 (XVNMADDSP, "xvnmaddsp", CONST, nfmav4sf4) +BU_VSX_3 (XVNMSUBSP, "xvnmsubsp", CONST, nfmsv4sf4) + +BU_VSX_3 (XVMADDDP, "xvmadddp", CONST, fmav2df4) +BU_VSX_3 (XVMSUBDP, "xvmsubdp", CONST, fmsv2df4) +BU_VSX_3 (XVNMADDDP, "xvnmadddp", CONST, nfmav2df4) +BU_VSX_3 (XVNMSUBDP, "xvnmsubdp", CONST, nfmsv2df4) + +BU_VSX_3 (XXSEL_1TI, "xxsel_1ti", CONST, vector_select_v1ti) +BU_VSX_3 (XXSEL_2DI, "xxsel_2di", CONST, vector_select_v2di) +BU_VSX_3 (XXSEL_2DF, "xxsel_2df", CONST, vector_select_v2df) +BU_VSX_3 (XXSEL_4SF, "xxsel_4sf", CONST, vector_select_v4sf) +BU_VSX_3 (XXSEL_4SI, "xxsel_4si", CONST, vector_select_v4si) +BU_VSX_3 (XXSEL_8HI, "xxsel_8hi", CONST, vector_select_v8hi) +BU_VSX_3 (XXSEL_16QI, "xxsel_16qi", CONST, vector_select_v16qi) +BU_VSX_3 (XXSEL_1TI_UNS, "xxsel_1ti_uns", CONST, vector_select_v1ti_uns) +BU_VSX_3 (XXSEL_2DI_UNS, "xxsel_2di_uns", CONST, vector_select_v2di_uns) +BU_VSX_3 (XXSEL_4SI_UNS, "xxsel_4si_uns", CONST, vector_select_v4si_uns) +BU_VSX_3 (XXSEL_8HI_UNS, "xxsel_8hi_uns", CONST, vector_select_v8hi_uns) +BU_VSX_3 (XXSEL_16QI_UNS, "xxsel_16qi_uns", CONST, vector_select_v16qi_uns) + +BU_VSX_3 (VPERM_1TI, "vperm_1ti", CONST, altivec_vperm_v1ti) +BU_VSX_3 (VPERM_2DI, "vperm_2di", CONST, altivec_vperm_v2di) +BU_VSX_3 (VPERM_2DF, "vperm_2df", CONST, altivec_vperm_v2df) +BU_VSX_3 (VPERM_4SF, "vperm_4sf", CONST, altivec_vperm_v4sf) +BU_VSX_3 (VPERM_4SI, "vperm_4si", CONST, altivec_vperm_v4si) +BU_VSX_3 (VPERM_8HI, "vperm_8hi", CONST, altivec_vperm_v8hi) +BU_VSX_3 (VPERM_16QI, "vperm_16qi", CONST, altivec_vperm_v16qi) +BU_VSX_3 (VPERM_1TI_UNS, "vperm_1ti_uns", CONST, altivec_vperm_v1ti_uns) +BU_VSX_3 (VPERM_2DI_UNS, "vperm_2di_uns", CONST, altivec_vperm_v2di_uns) +BU_VSX_3 (VPERM_4SI_UNS, "vperm_4si_uns", CONST, altivec_vperm_v4si_uns) +BU_VSX_3 (VPERM_8HI_UNS, "vperm_8hi_uns", CONST, altivec_vperm_v8hi_uns) +BU_VSX_3 (VPERM_16QI_UNS, "vperm_16qi_uns", CONST, altivec_vperm_v16qi_uns) + +BU_VSX_3 (XXPERMDI_1TI, "xxpermdi_1ti", CONST, vsx_xxpermdi_v1ti) +BU_VSX_3 (XXPERMDI_2DF, "xxpermdi_2df", CONST, vsx_xxpermdi_v2df) +BU_VSX_3 (XXPERMDI_2DI, "xxpermdi_2di", CONST, vsx_xxpermdi_v2di) +BU_VSX_3 (XXPERMDI_4SF, "xxpermdi_4sf", CONST, vsx_xxpermdi_v4sf) +BU_VSX_3 (XXPERMDI_4SI, "xxpermdi_4si", CONST, vsx_xxpermdi_v4si) +BU_VSX_3 (XXPERMDI_8HI, "xxpermdi_8hi", CONST, vsx_xxpermdi_v8hi) +BU_VSX_3 (XXPERMDI_16QI, "xxpermdi_16qi", CONST, vsx_xxpermdi_v16qi) +BU_VSX_3 (SET_1TI, "set_1ti", CONST, vsx_set_v1ti) +BU_VSX_3 (SET_2DF, "set_2df", CONST, vsx_set_v2df) +BU_VSX_3 (SET_2DI, "set_2di", CONST, vsx_set_v2di) +BU_VSX_3 (XXSLDWI_2DI, "xxsldwi_2di", CONST, vsx_xxsldwi_v2di) +BU_VSX_3 (XXSLDWI_2DF, "xxsldwi_2df", CONST, vsx_xxsldwi_v2df) +BU_VSX_3 (XXSLDWI_4SF, "xxsldwi_4sf", CONST, vsx_xxsldwi_v4sf) +BU_VSX_3 (XXSLDWI_4SI, "xxsldwi_4si", CONST, vsx_xxsldwi_v4si) +BU_VSX_3 (XXSLDWI_8HI, "xxsldwi_8hi", CONST, vsx_xxsldwi_v8hi) +BU_VSX_3 (XXSLDWI_16QI, "xxsldwi_16qi", CONST, vsx_xxsldwi_v16qi) + +/* 2 argument VSX builtins. */ +BU_VSX_2 (XVADDDP, "xvadddp", FP, addv2df3) +BU_VSX_2 (XVSUBDP, "xvsubdp", FP, subv2df3) +BU_VSX_2 (XVMULDP, "xvmuldp", FP, mulv2df3) +BU_VSX_2 (XVDIVDP, "xvdivdp", FP, divv2df3) +BU_VSX_2 (RECIP_V2DF, "xvrecipdivdp", FP, recipv2df3) +BU_VSX_2 (XVMINDP, "xvmindp", CONST, sminv2df3) +BU_VSX_2 (XVMAXDP, "xvmaxdp", CONST, smaxv2df3) +BU_VSX_2 (XVTDIVDP_FE, "xvtdivdp_fe", CONST, vsx_tdivv2df3_fe) +BU_VSX_2 (XVTDIVDP_FG, "xvtdivdp_fg", CONST, vsx_tdivv2df3_fg) +BU_VSX_2 (XVCMPEQDP, "xvcmpeqdp", CONST, vector_eqv2df) +BU_VSX_2 (XVCMPGTDP, "xvcmpgtdp", CONST, vector_gtv2df) +BU_VSX_2 (XVCMPGEDP, "xvcmpgedp", CONST, vector_gev2df) + +BU_VSX_2 (XVADDSP, "xvaddsp", FP, addv4sf3) +BU_VSX_2 (XVSUBSP, "xvsubsp", FP, subv4sf3) +BU_VSX_2 (XVMULSP, "xvmulsp", FP, mulv4sf3) +BU_VSX_2 (XVDIVSP, "xvdivsp", FP, divv4sf3) +BU_VSX_2 (RECIP_V4SF, "xvrecipdivsp", FP, recipv4sf3) +BU_VSX_2 (XVMINSP, "xvminsp", CONST, sminv4sf3) +BU_VSX_2 (XVMAXSP, "xvmaxsp", CONST, smaxv4sf3) +BU_VSX_2 (XVTDIVSP_FE, "xvtdivsp_fe", CONST, vsx_tdivv4sf3_fe) +BU_VSX_2 (XVTDIVSP_FG, "xvtdivsp_fg", CONST, vsx_tdivv4sf3_fg) +BU_VSX_2 (XVCMPEQSP, "xvcmpeqsp", CONST, vector_eqv4sf) +BU_VSX_2 (XVCMPGTSP, "xvcmpgtsp", CONST, vector_gtv4sf) +BU_VSX_2 (XVCMPGESP, "xvcmpgesp", CONST, vector_gev4sf) + +BU_VSX_2 (XSMINDP, "xsmindp", CONST, smindf3) +BU_VSX_2 (XSMAXDP, "xsmaxdp", CONST, smaxdf3) +BU_VSX_2 (XSTDIVDP_FE, "xstdivdp_fe", CONST, vsx_tdivdf3_fe) +BU_VSX_2 (XSTDIVDP_FG, "xstdivdp_fg", CONST, vsx_tdivdf3_fg) +BU_VSX_2 (CPSGNDP, "cpsgndp", CONST, vector_copysignv2df3) +BU_VSX_2 (CPSGNSP, "cpsgnsp", CONST, vector_copysignv4sf3) + +BU_VSX_2 (CONCAT_2DF, "concat_2df", CONST, vsx_concat_v2df) +BU_VSX_2 (CONCAT_2DI, "concat_2di", CONST, vsx_concat_v2di) +BU_VSX_2 (SPLAT_2DF, "splat_2df", CONST, vsx_splat_v2df) +BU_VSX_2 (SPLAT_2DI, "splat_2di", CONST, vsx_splat_v2di) +BU_VSX_2 (XXMRGHW_4SF, "xxmrghw", CONST, vsx_xxmrghw_v4sf) +BU_VSX_2 (XXMRGHW_4SI, "xxmrghw_4si", CONST, vsx_xxmrghw_v4si) +BU_VSX_2 (XXMRGLW_4SF, "xxmrglw", CONST, vsx_xxmrglw_v4sf) +BU_VSX_2 (XXMRGLW_4SI, "xxmrglw_4si", CONST, vsx_xxmrglw_v4si) +BU_VSX_2 (VEC_MERGEL_V2DF, "mergel_2df", CONST, vsx_mergel_v2df) +BU_VSX_2 (VEC_MERGEL_V2DI, "mergel_2di", CONST, vsx_mergel_v2di) +BU_VSX_2 (VEC_MERGEH_V2DF, "mergeh_2df", CONST, vsx_mergeh_v2df) +BU_VSX_2 (VEC_MERGEH_V2DI, "mergeh_2di", CONST, vsx_mergeh_v2di) +BU_VSX_2 (XXSPLTD_V2DF, "xxspltd_2df", CONST, vsx_xxspltd_v2df) +BU_VSX_2 (XXSPLTD_V2DI, "xxspltd_2di", CONST, vsx_xxspltd_v2di) +BU_VSX_2 (DIV_V2DI, "div_2di", CONST, vsx_div_v2di) +BU_VSX_2 (UDIV_V2DI, "udiv_2di", CONST, vsx_udiv_v2di) +BU_VSX_2 (MUL_V2DI, "mul_2di", CONST, vsx_mul_v2di) + +BU_VSX_2 (XVCVSXDDP_SCALE, "xvcvsxddp_scale", CONST, vsx_xvcvsxddp_scale) +BU_VSX_2 (XVCVUXDDP_SCALE, "xvcvuxddp_scale", CONST, vsx_xvcvuxddp_scale) +BU_VSX_2 (XVCVDPSXDS_SCALE, "xvcvdpsxds_scale", CONST, vsx_xvcvdpsxds_scale) +BU_VSX_2 (XVCVDPUXDS_SCALE, "xvcvdpuxds_scale", CONST, vsx_xvcvdpuxds_scale) + +BU_VSX_2 (CMPGE_16QI, "cmpge_16qi", CONST, vector_nltv16qi) +BU_VSX_2 (CMPGE_8HI, "cmpge_8hi", CONST, vector_nltv8hi) +BU_VSX_2 (CMPGE_4SI, "cmpge_4si", CONST, vector_nltv4si) +BU_VSX_2 (CMPGE_2DI, "cmpge_2di", CONST, vector_nltv2di) +BU_VSX_2 (CMPGE_U16QI, "cmpge_u16qi", CONST, vector_nltuv16qi) +BU_VSX_2 (CMPGE_U8HI, "cmpge_u8hi", CONST, vector_nltuv8hi) +BU_VSX_2 (CMPGE_U4SI, "cmpge_u4si", CONST, vector_nltuv4si) +BU_VSX_2 (CMPGE_U2DI, "cmpge_u2di", CONST, vector_nltuv2di) + +BU_VSX_2 (CMPLE_16QI, "cmple_16qi", CONST, vector_ngtv16qi) +BU_VSX_2 (CMPLE_8HI, "cmple_8hi", CONST, vector_ngtv8hi) +BU_VSX_2 (CMPLE_4SI, "cmple_4si", CONST, vector_ngtv4si) +BU_VSX_2 (CMPLE_2DI, "cmple_2di", CONST, vector_ngtv2di) +BU_VSX_2 (CMPLE_U16QI, "cmple_u16qi", CONST, vector_ngtuv16qi) +BU_VSX_2 (CMPLE_U8HI, "cmple_u8hi", CONST, vector_ngtuv8hi) +BU_VSX_2 (CMPLE_U4SI, "cmple_u4si", CONST, vector_ngtuv4si) +BU_VSX_2 (CMPLE_U2DI, "cmple_u2di", CONST, vector_ngtuv2di) + +/* VSX abs builtin functions. */ +BU_VSX_A (XVABSDP, "xvabsdp", CONST, absv2df2) +BU_VSX_A (XVNABSDP, "xvnabsdp", CONST, vsx_nabsv2df2) +BU_VSX_A (XVABSSP, "xvabssp", CONST, absv4sf2) +BU_VSX_A (XVNABSSP, "xvnabssp", CONST, vsx_nabsv4sf2) + +/* 1 argument VSX builtin functions. */ +BU_VSX_1 (XVNEGDP, "xvnegdp", CONST, negv2df2) +BU_VSX_1 (XVSQRTDP, "xvsqrtdp", CONST, sqrtv2df2) +BU_VSX_1 (RSQRT_2DF, "xvrsqrtdp", CONST, rsqrtv2df2) +BU_VSX_1 (XVRSQRTEDP, "xvrsqrtedp", CONST, rsqrtev2df2) +BU_VSX_1 (XVTSQRTDP_FE, "xvtsqrtdp_fe", CONST, vsx_tsqrtv2df2_fe) +BU_VSX_1 (XVTSQRTDP_FG, "xvtsqrtdp_fg", CONST, vsx_tsqrtv2df2_fg) +BU_VSX_1 (XVREDP, "xvredp", CONST, vsx_frev2df2) + +BU_VSX_1 (XVNEGSP, "xvnegsp", CONST, negv4sf2) +BU_VSX_1 (XVSQRTSP, "xvsqrtsp", CONST, sqrtv4sf2) +BU_VSX_1 (RSQRT_4SF, "xvrsqrtsp", CONST, rsqrtv4sf2) +BU_VSX_1 (XVRSQRTESP, "xvrsqrtesp", CONST, rsqrtev4sf2) +BU_VSX_1 (XVTSQRTSP_FE, "xvtsqrtsp_fe", CONST, vsx_tsqrtv4sf2_fe) +BU_VSX_1 (XVTSQRTSP_FG, "xvtsqrtsp_fg", CONST, vsx_tsqrtv4sf2_fg) +BU_VSX_1 (XVRESP, "xvresp", CONST, vsx_frev4sf2) + +BU_VSX_1 (XSCVDPSP, "xscvdpsp", CONST, vsx_xscvdpsp) +BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvspdp) +BU_VSX_1 (XVCVDPSP, "xvcvdpsp", CONST, vsx_xvcvdpsp) +BU_VSX_1 (XVCVSPDP, "xvcvspdp", CONST, vsx_xvcvspdp) +BU_VSX_1 (XSTSQRTDP_FE, "xstsqrtdp_fe", CONST, vsx_tsqrtdf2_fe) +BU_VSX_1 (XSTSQRTDP_FG, "xstsqrtdp_fg", CONST, vsx_tsqrtdf2_fg) + +BU_VSX_1 (XVCVDPSXDS, "xvcvdpsxds", CONST, vsx_fix_truncv2dfv2di2) +BU_VSX_1 (XVCVDPUXDS, "xvcvdpuxds", CONST, vsx_fixuns_truncv2dfv2di2) +BU_VSX_1 (XVCVDPUXDS_UNS, "xvcvdpuxds_uns", CONST, vsx_fixuns_truncv2dfv2di2) +BU_VSX_1 (XVCVSXDDP, "xvcvsxddp", CONST, vsx_floatv2div2df2) +BU_VSX_1 (XVCVUXDDP, "xvcvuxddp", CONST, vsx_floatunsv2div2df2) +BU_VSX_1 (XVCVUXDDP_UNS, "xvcvuxddp_uns", CONST, vsx_floatunsv2div2df2) + +BU_VSX_1 (XVCVSPSXWS, "xvcvspsxws", CONST, vsx_fix_truncv4sfv4si2) +BU_VSX_1 (XVCVSPUXWS, "xvcvspuxws", CONST, vsx_fixuns_truncv4sfv4si2) +BU_VSX_1 (XVCVSXWSP, "xvcvsxwsp", CONST, vsx_floatv4siv4sf2) +BU_VSX_1 (XVCVUXWSP, "xvcvuxwsp", CONST, vsx_floatunsv4siv4sf2) + +BU_VSX_1 (XVCVDPSXWS, "xvcvdpsxws", CONST, vsx_xvcvdpsxws) +BU_VSX_1 (XVCVDPUXWS, "xvcvdpuxws", CONST, vsx_xvcvdpuxws) +BU_VSX_1 (XVCVSXWDP, "xvcvsxwdp", CONST, vsx_xvcvsxwdp) +BU_VSX_1 (XVCVUXWDP, "xvcvuxwdp", CONST, vsx_xvcvuxwdp) +BU_VSX_1 (XVRDPI, "xvrdpi", CONST, vsx_xvrdpi) +BU_VSX_1 (XVRDPIC, "xvrdpic", CONST, vsx_xvrdpic) +BU_VSX_1 (XVRDPIM, "xvrdpim", CONST, vsx_floorv2df2) +BU_VSX_1 (XVRDPIP, "xvrdpip", CONST, vsx_ceilv2df2) +BU_VSX_1 (XVRDPIZ, "xvrdpiz", CONST, vsx_btruncv2df2) + +BU_VSX_1 (XVCVSPSXDS, "xvcvspsxds", CONST, vsx_xvcvspsxds) +BU_VSX_1 (XVCVSPUXDS, "xvcvspuxds", CONST, vsx_xvcvspuxds) +BU_VSX_1 (XVCVSXDSP, "xvcvsxdsp", CONST, vsx_xvcvsxdsp) +BU_VSX_1 (XVCVUXDSP, "xvcvuxdsp", CONST, vsx_xvcvuxdsp) +BU_VSX_1 (XVRSPI, "xvrspi", CONST, vsx_xvrspi) +BU_VSX_1 (XVRSPIC, "xvrspic", CONST, vsx_xvrspic) +BU_VSX_1 (XVRSPIM, "xvrspim", CONST, vsx_floorv4sf2) +BU_VSX_1 (XVRSPIP, "xvrspip", CONST, vsx_ceilv4sf2) +BU_VSX_1 (XVRSPIZ, "xvrspiz", CONST, vsx_btruncv4sf2) + +BU_VSX_1 (XSRDPI, "xsrdpi", CONST, vsx_xsrdpi) +BU_VSX_1 (XSRDPIC, "xsrdpic", CONST, vsx_xsrdpic) +BU_VSX_1 (XSRDPIM, "xsrdpim", CONST, floordf2) +BU_VSX_1 (XSRDPIP, "xsrdpip", CONST, ceildf2) +BU_VSX_1 (XSRDPIZ, "xsrdpiz", CONST, btruncdf2) + +/* VSX predicate functions. */ +BU_VSX_P (XVCMPEQSP_P, "xvcmpeqsp_p", CONST, vector_eq_v4sf_p) +BU_VSX_P (XVCMPGESP_P, "xvcmpgesp_p", CONST, vector_ge_v4sf_p) +BU_VSX_P (XVCMPGTSP_P, "xvcmpgtsp_p", CONST, vector_gt_v4sf_p) +BU_VSX_P (XVCMPEQDP_P, "xvcmpeqdp_p", CONST, vector_eq_v2df_p) +BU_VSX_P (XVCMPGEDP_P, "xvcmpgedp_p", CONST, vector_ge_v2df_p) +BU_VSX_P (XVCMPGTDP_P, "xvcmpgtdp_p", CONST, vector_gt_v2df_p) + +/* VSX builtins that are handled as special cases. */ +BU_VSX_X (LXSDX, "lxsdx", MEM) +BU_VSX_X (LXVD2X_V1TI, "lxvd2x_v1ti", MEM) +BU_VSX_X (LXVD2X_V2DF, "lxvd2x_v2df", MEM) +BU_VSX_X (LXVD2X_V2DI, "lxvd2x_v2di", MEM) +BU_VSX_X (LXVDSX, "lxvdsx", MEM) +BU_VSX_X (LXVW4X_V4SF, "lxvw4x_v4sf", MEM) +BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM) +BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM) +BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM) +BU_VSX_X (STXSDX, "stxsdx", MEM) +BU_VSX_X (STXVD2X_V1TI, "stxvd2x_v1ti", MEM) +BU_VSX_X (STXVD2X_V2DF, "stxvd2x_v2df", MEM) +BU_VSX_X (STXVD2X_V2DI, "stxvd2x_v2di", MEM) +BU_VSX_X (STXVW4X_V4SF, "stxvw4x_v4sf", MEM) +BU_VSX_X (STXVW4X_V4SI, "stxvw4x_v4si", MEM) +BU_VSX_X (STXVW4X_V8HI, "stxvw4x_v8hi", MEM) +BU_VSX_X (STXVW4X_V16QI, "stxvw4x_v16qi", MEM) +BU_VSX_X (LD_ELEMREV_V2DF, "ld_elemrev_v2df", MEM) +BU_VSX_X (LD_ELEMREV_V2DI, "ld_elemrev_v2di", MEM) +BU_VSX_X (LD_ELEMREV_V4SF, "ld_elemrev_v4sf", MEM) +BU_VSX_X (LD_ELEMREV_V4SI, "ld_elemrev_v4si", MEM) +BU_VSX_X (LD_ELEMREV_V8HI, "ld_elemrev_v8hi", MEM) +BU_VSX_X (LD_ELEMREV_V16QI, "ld_elemrev_v16qi", MEM) +BU_VSX_X (ST_ELEMREV_V2DF, "st_elemrev_v2df", MEM) +BU_VSX_X (ST_ELEMREV_V2DI, "st_elemrev_v2di", MEM) +BU_VSX_X (ST_ELEMREV_V4SF, "st_elemrev_v4sf", MEM) +BU_VSX_X (ST_ELEMREV_V4SI, "st_elemrev_v4si", MEM) +BU_VSX_X (ST_ELEMREV_V8HI, "st_elemrev_v8hi", MEM) +BU_VSX_X (ST_ELEMREV_V16QI, "st_elemrev_v16qi", MEM) +BU_VSX_X (XSABSDP, "xsabsdp", CONST) +BU_VSX_X (XSADDDP, "xsadddp", FP) +BU_VSX_X (XSCMPODP, "xscmpodp", FP) +BU_VSX_X (XSCMPUDP, "xscmpudp", FP) +BU_VSX_X (XSCVDPSXDS, "xscvdpsxds", FP) +BU_VSX_X (XSCVDPSXWS, "xscvdpsxws", FP) +BU_VSX_X (XSCVDPUXDS, "xscvdpuxds", FP) +BU_VSX_X (XSCVDPUXWS, "xscvdpuxws", FP) +BU_VSX_X (XSCVSXDDP, "xscvsxddp", FP) +BU_VSX_X (XSCVUXDDP, "xscvuxddp", FP) +BU_VSX_X (XSDIVDP, "xsdivdp", FP) +BU_VSX_X (XSMADDADP, "xsmaddadp", FP) +BU_VSX_X (XSMADDMDP, "xsmaddmdp", FP) +BU_VSX_X (XSMOVDP, "xsmovdp", FP) +BU_VSX_X (XSMSUBADP, "xsmsubadp", FP) +BU_VSX_X (XSMSUBMDP, "xsmsubmdp", FP) +BU_VSX_X (XSMULDP, "xsmuldp", FP) +BU_VSX_X (XSNABSDP, "xsnabsdp", FP) +BU_VSX_X (XSNEGDP, "xsnegdp", FP) +BU_VSX_X (XSNMADDADP, "xsnmaddadp", FP) +BU_VSX_X (XSNMADDMDP, "xsnmaddmdp", FP) +BU_VSX_X (XSNMSUBADP, "xsnmsubadp", FP) +BU_VSX_X (XSNMSUBMDP, "xsnmsubmdp", FP) +BU_VSX_X (XSSUBDP, "xssubdp", FP) +BU_VSX_X (VEC_INIT_V1TI, "vec_init_v1ti", CONST) +BU_VSX_X (VEC_INIT_V2DF, "vec_init_v2df", CONST) +BU_VSX_X (VEC_INIT_V2DI, "vec_init_v2di", CONST) +BU_VSX_X (VEC_SET_V1TI, "vec_set_v1ti", CONST) +BU_VSX_X (VEC_SET_V2DF, "vec_set_v2df", CONST) +BU_VSX_X (VEC_SET_V2DI, "vec_set_v2di", CONST) +BU_VSX_X (VEC_EXT_V1TI, "vec_ext_v1ti", CONST) +BU_VSX_X (VEC_EXT_V2DF, "vec_ext_v2df", CONST) +BU_VSX_X (VEC_EXT_V2DI, "vec_ext_v2di", CONST) + +/* VSX overloaded builtins, add the overloaded functions not present in + Altivec. */ + +/* 3 argument VSX overloaded builtins. */ +BU_VSX_OVERLOAD_3 (MSUB, "msub") +BU_VSX_OVERLOAD_3 (NMADD, "nmadd") +BU_VSX_OVERLOAD_3V (XXPERMDI, "xxpermdi") +BU_VSX_OVERLOAD_3V (XXSLDWI, "xxsldwi") + +/* 2 argument VSX overloaded builtin functions. */ +BU_VSX_OVERLOAD_2 (DIV, "div") +BU_VSX_OVERLOAD_2 (XXMRGHW, "xxmrghw") +BU_VSX_OVERLOAD_2 (XXMRGLW, "xxmrglw") +BU_VSX_OVERLOAD_2 (XXSPLTD, "xxspltd") +BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw") + +/* 1 argument VSX overloaded builtin functions. */ +BU_VSX_OVERLOAD_1 (DOUBLE, "double") + +/* VSX builtins that are handled as special cases. */ +BU_VSX_OVERLOAD_X (LD, "ld") +BU_VSX_OVERLOAD_X (ST, "st") +BU_VSX_OVERLOAD_X (XL, "xl") +BU_VSX_OVERLOAD_X (XST, "xst") + +/* 2 argument CMPB instructions added in ISA 2.05. */ +BU_P6_2 (CMPB_32, "cmpb_32", CONST, cmpbsi3) +BU_P6_64BIT_2 (CMPB, "cmpb", CONST, cmpbdi3) + +/* 1 argument VSX instructions added in ISA 2.07. */ +BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) +BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn) + +/* 1 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2) +BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw) +BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw) +BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2) +BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2) +BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2) +BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2) +BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2) +BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2) +BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2) +BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2) +BU_P8V_AV_1 (VPOPCNTUB, "vpopcntub", CONST, popcountv16qi2) +BU_P8V_AV_1 (VPOPCNTUH, "vpopcntuh", CONST, popcountv8hi2) +BU_P8V_AV_1 (VPOPCNTUW, "vpopcntuw", CONST, popcountv4si2) +BU_P8V_AV_1 (VPOPCNTUD, "vpopcntud", CONST, popcountv2di2) +BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd) + +/* 2 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_2 (VADDCUQ, "vaddcuq", CONST, altivec_vaddcuq) +BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3) +BU_P8V_AV_2 (VADDUQM, "vadduqm", CONST, altivec_vadduqm) +BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3) +BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3) +BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3) +BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3) +BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew) +BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow) +BU_P8V_AV_2 (VBPERMQ, "vbpermq", CONST, altivec_vbpermq) +BU_P8V_AV_2 (VBPERMQ2, "vbpermq2", CONST, altivec_vbpermq2) +BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) +BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) +BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) +BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpksdus) +BU_P8V_AV_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_P8V_AV_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_P8V_AV_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_P8V_AV_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) +BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3) +BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3) +BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) +BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3) +BU_P8V_AV_2 (VSUBCUQ, "vsubcuq", CONST, altivec_vsubcuq) +BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3) +BU_P8V_AV_2 (VSUBUQM, "vsubuqm", CONST, altivec_vsubuqm) + +BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3) +BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3) +BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3) +BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3) +BU_P8V_AV_2 (EQV_V1TI, "eqv_v1ti", CONST, eqvv1ti3) +BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3) +BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3) + +BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3) +BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3) +BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3) +BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3) +BU_P8V_AV_2 (NAND_V1TI, "nand_v1ti", CONST, nandv1ti3) +BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3) +BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3) + +BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3) +BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3) +BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3) +BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3) +BU_P8V_AV_2 (ORC_V1TI, "orc_v1ti", CONST, orcv1ti3) +BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3) +BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3) + +/* 3 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_3 (VADDEUQM, "vaddeuqm", CONST, altivec_vaddeuqm) +BU_P8V_AV_3 (VADDECUQ, "vaddecuq", CONST, altivec_vaddecuq) +BU_P8V_AV_3 (VSUBEUQM, "vsubeuqm", CONST, altivec_vsubeuqm) +BU_P8V_AV_3 (VSUBECUQ, "vsubecuq", CONST, altivec_vsubecuq) + +/* Vector comparison instructions added in ISA 2.07. */ +BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di) +BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di) +BU_P8V_AV_2 (VCMPGTUD, "vcmpgtud", CONST, vector_gtuv2di) + +/* Vector comparison predicate instructions added in ISA 2.07. */ +BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p) +BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p) +BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p) + +/* ISA 2.05 overloaded 2 argument functions. */ +BU_P6_OVERLOAD_2 (CMPB, "cmpb") + +/* ISA 2.07 vector overloaded 1 argument functions. */ +BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw") +BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw") +BU_P8V_OVERLOAD_1 (VCLZ, "vclz") +BU_P8V_OVERLOAD_1 (VCLZB, "vclzb") +BU_P8V_OVERLOAD_1 (VCLZH, "vclzh") +BU_P8V_OVERLOAD_1 (VCLZW, "vclzw") +BU_P8V_OVERLOAD_1 (VCLZD, "vclzd") +BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt") +BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb") +BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth") +BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw") +BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd") +BU_P8V_OVERLOAD_1 (VPOPCNTU, "vpopcntu") +BU_P8V_OVERLOAD_1 (VPOPCNTUB, "vpopcntub") +BU_P8V_OVERLOAD_1 (VPOPCNTUH, "vpopcntuh") +BU_P8V_OVERLOAD_1 (VPOPCNTUW, "vpopcntuw") +BU_P8V_OVERLOAD_1 (VPOPCNTUD, "vpopcntud") +BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") + +/* ISA 2.07 vector overloaded 2 argument functions. */ +BU_P8V_OVERLOAD_2 (EQV, "eqv") +BU_P8V_OVERLOAD_2 (NAND, "nand") +BU_P8V_OVERLOAD_2 (ORC, "orc") +BU_P8V_OVERLOAD_2 (VADDCUQ, "vaddcuq") +BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm") +BU_P8V_OVERLOAD_2 (VADDUQM, "vadduqm") +BU_P8V_OVERLOAD_2 (VBPERMQ, "vbpermq") +BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd") +BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud") +BU_P8V_OVERLOAD_2 (VMINSD, "vminsd") +BU_P8V_OVERLOAD_2 (VMINUD, "vminud") +BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew") +BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow") +BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss") +BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus") +BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum") +BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus") +BU_P8V_OVERLOAD_2 (VPMSUM, "vpmsum") +BU_P8V_OVERLOAD_2 (VRLD, "vrld") +BU_P8V_OVERLOAD_2 (VSLD, "vsld") +BU_P8V_OVERLOAD_2 (VSRAD, "vsrad") +BU_P8V_OVERLOAD_2 (VSRD, "vsrd") +BU_P8V_OVERLOAD_2 (VSUBCUQ, "vsubcuq") +BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm") +BU_P8V_OVERLOAD_2 (VSUBUQM, "vsubuqm") + +/* ISA 2.07 vector overloaded 3 argument functions. */ +BU_P8V_OVERLOAD_3 (VADDECUQ, "vaddecuq") +BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm") +BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq") +BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm") + +/* ISA 3.0 vector overloaded 2-argument functions. */ +BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv) +BU_P9V_AV_2 (VSRV, "vsrv", CONST, vsrv) + +/* ISA 3.0 vector overloaded 2-argument functions. */ +BU_P9V_OVERLOAD_2 (VSLV, "vslv") +BU_P9V_OVERLOAD_2 (VSRV, "vsrv") + +/* 2 argument vector functions added in ISA 3.0 (power9). */ +BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3) +BU_P9V_AV_2 (VADUH, "vaduh", CONST, vaduv8hi3) +BU_P9V_AV_2 (VADUW, "vaduw", CONST, vaduv4si3) +BU_P9V_AV_2 (VRLWNM, "vrlwnm", CONST, altivec_vrlwnm) +BU_P9V_AV_2 (VRLDNM, "vrldnm", CONST, altivec_vrldnm) +BU_P9V_AV_2 (VBPERMD, "vbpermd", CONST, altivec_vbpermd) + +/* ISA 3.0 vector overloaded 2 argument functions. */ +BU_P9V_OVERLOAD_2 (VADU, "vadu") +BU_P9V_OVERLOAD_2 (VADUB, "vadub") +BU_P9V_OVERLOAD_2 (VADUH, "vaduh") +BU_P9V_OVERLOAD_2 (VADUW, "vaduw") +BU_P9V_OVERLOAD_2 (RLNM, "rlnm") +BU_P9V_OVERLOAD_2 (VBPERM, "vbperm_api") + +/* ISA 3.0 3-argument vector functions. */ +BU_P9V_AV_3 (VRLWMI, "vrlwmi", CONST, altivec_vrlwmi) +BU_P9V_AV_3 (VRLDMI, "vrldmi", CONST, altivec_vrldmi) + +/* ISA 3.0 vector overloaded 3-argument functions. */ +BU_P9V_OVERLOAD_3 (RLMI, "rlmi") + +/* 1 argument vsx scalar functions added in ISA 3.0 (power9). */ +BU_P9V_64BIT_VSX_1 (VSEEDP, "scalar_extract_exp", CONST, xsxexpdp) +BU_P9V_64BIT_VSX_1 (VSESDP, "scalar_extract_sig", CONST, xsxsigdp) + +BU_P9V_VSX_1 (VSTDCNDP, "scalar_test_neg_dp", CONST, xststdcnegdp) +BU_P9V_VSX_1 (VSTDCNSP, "scalar_test_neg_sp", CONST, xststdcnegsp) + +BU_P9V_VSX_1 (XXBRQ_V16QI, "xxbrq_v16qi", CONST, p9_xxbrq_v16qi) +BU_P9V_VSX_1 (XXBRQ_V1TI, "xxbrq_v1ti", CONST, p9_xxbrq_v1ti) +BU_P9V_VSX_1 (XXBRD_V2DI, "xxbrd_v2di", CONST, p9_xxbrd_v2di) +BU_P9V_VSX_1 (XXBRD_V2DF, "xxbrd_v2df", CONST, p9_xxbrd_v2df) +BU_P9V_VSX_1 (XXBRW_V4SI, "xxbrw_v4si", CONST, p9_xxbrw_v4si) +BU_P9V_VSX_1 (XXBRW_V4SF, "xxbrw_v4sf", CONST, p9_xxbrw_v4sf) +BU_P9V_VSX_1 (XXBRH_V8HI, "xxbrh_v8hi", CONST, p9_xxbrh_v8hi) + +/* 2 argument vsx scalar functions added in ISA 3.0 (power9). */ +BU_P9V_64BIT_VSX_2 (VSIEDP, "scalar_insert_exp", CONST, xsiexpdp) +BU_P9V_64BIT_VSX_2 (VSIEDPF, "scalar_insert_exp_dp", CONST, xsiexpdpf) + +BU_P9V_VSX_2 (VSCEDPGT, "scalar_cmp_exp_dp_gt", CONST, xscmpexpdp_gt) +BU_P9V_VSX_2 (VSCEDPLT, "scalar_cmp_exp_dp_lt", CONST, xscmpexpdp_lt) +BU_P9V_VSX_2 (VSCEDPEQ, "scalar_cmp_exp_dp_eq", CONST, xscmpexpdp_eq) +BU_P9V_VSX_2 (VSCEDPUO, "scalar_cmp_exp_dp_unordered", CONST, xscmpexpdp_unordered) + +BU_P9V_VSX_2 (VSTDCDP, "scalar_test_data_class_dp", CONST, xststdcdp) +BU_P9V_VSX_2 (VSTDCSP, "scalar_test_data_class_sp", CONST, xststdcsp) + +/* ISA 3.0 vector scalar overloaded 1 argument functions. */ +BU_P9V_OVERLOAD_1 (VSEEDP, "scalar_extract_exp") +BU_P9V_OVERLOAD_1 (VSESDP, "scalar_extract_sig") + +BU_P9V_OVERLOAD_1 (VSTDCN, "scalar_test_neg") +BU_P9V_OVERLOAD_1 (VSTDCNDP, "scalar_test_neg_dp") +BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp") + +BU_P9V_OVERLOAD_1 (REVB, "revb") + +/* ISA 3.0 vector scalar overloaded 2 argument functions. */ +BU_P9V_OVERLOAD_2 (VSIEDP, "scalar_insert_exp") + +BU_P9V_OVERLOAD_2 (VSTDC, "scalar_test_data_class") +BU_P9V_OVERLOAD_2 (VSTDCDP, "scalar_test_data_class_dp") +BU_P9V_OVERLOAD_2 (VSTDCSP, "scalar_test_data_class_sp") + +BU_P9V_OVERLOAD_2 (VSCEDPGT, "scalar_cmp_exp_gt") +BU_P9V_OVERLOAD_2 (VSCEDPLT, "scalar_cmp_exp_lt") +BU_P9V_OVERLOAD_2 (VSCEDPEQ, "scalar_cmp_exp_eq") +BU_P9V_OVERLOAD_2 (VSCEDPUO, "scalar_cmp_exp_unordered") + +/* 1 argument vsx vector functions added in ISA 3.0 (power9). */ +BU_P9V_VSX_1 (VEEDP, "extract_exp_dp", CONST, xvxexpdp) +BU_P9V_VSX_1 (VEESP, "extract_exp_sp", CONST, xvxexpsp) +BU_P9V_VSX_1 (VESDP, "extract_sig_dp", CONST, xvxsigdp) +BU_P9V_VSX_1 (VESSP, "extract_sig_sp", CONST, xvxsigsp) + +/* 2 argument vsx vector functions added in ISA 3.0 (power9). */ +BU_P9V_VSX_2 (VIEDP, "insert_exp_dp", CONST, xviexpdp) +BU_P9V_VSX_2 (VIESP, "insert_exp_sp", CONST, xviexpsp) +BU_P9V_VSX_2 (VTDCDP, "test_data_class_dp", CONST, xvtstdcdp) +BU_P9V_VSX_2 (VTDCSP, "test_data_class_sp", CONST, xvtstdcsp) + +/* ISA 3.0 vector overloaded 1 argument functions. */ +BU_P9V_OVERLOAD_1 (VES, "extract_sig") +BU_P9V_OVERLOAD_1 (VESDP, "extract_sig_dp") +BU_P9V_OVERLOAD_1 (VESSP, "extract_sig_sp") + +BU_P9V_OVERLOAD_1 (VEE, "extract_exp") +BU_P9V_OVERLOAD_1 (VEEDP, "extract_exp_dp") +BU_P9V_OVERLOAD_1 (VEESP, "extract_exp_sp") + +/* ISA 3.0 vector overloaded 2 argument functions. */ +BU_P9V_OVERLOAD_2 (VTDC, "test_data_class") +BU_P9V_OVERLOAD_2 (VTDCDP, "test_data_class_dp") +BU_P9V_OVERLOAD_2 (VTDCSP, "test_data_class_sp") + +BU_P9V_OVERLOAD_2 (VIE, "insert_exp") +BU_P9V_OVERLOAD_2 (VIEDP, "insert_exp_dp") +BU_P9V_OVERLOAD_2 (VIESP, "insert_exp_sp") + +/* 2 argument vector functions added in ISA 3.0 (power9). */ +BU_P9V_64BIT_VSX_2 (LXVL, "lxvl", CONST, lxvl) + +BU_P9V_AV_2 (VEXTUBLX, "vextublx", CONST, vextublx) +BU_P9V_AV_2 (VEXTUBRX, "vextubrx", CONST, vextubrx) +BU_P9V_AV_2 (VEXTUHLX, "vextuhlx", CONST, vextuhlx) +BU_P9V_AV_2 (VEXTUHRX, "vextuhrx", CONST, vextuhrx) +BU_P9V_AV_2 (VEXTUWLX, "vextuwlx", CONST, vextuwlx) +BU_P9V_AV_2 (VEXTUWRX, "vextuwrx", CONST, vextuwrx) + +/* Insert/extract 4 byte word into a vector. */ +BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b) +BU_P9V_VSX_3 (VINSERT4B, "vinsert4b", CONST, vinsert4b) +BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di) + +/* 3 argument vector functions returning void, treated as SPECIAL, + added in ISA 3.0 (power9). */ +BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC) + +/* 1 argument vector functions added in ISA 3.0 (power9). */ +BU_P9V_AV_1 (VCLZLSBB, "vclzlsbb", CONST, vclzlsbb) +BU_P9V_AV_1 (VCTZLSBB, "vctzlsbb", CONST, vctzlsbb) + +/* Built-in support for Power9 "VSU option" string operations includes + new awareness of the "vector compare not equal" (vcmpneb, vcmpneb., + vcmpneh, vcmpneh., vcmpnew, vcmpnew.) and "vector compare + not equal or zero" (vcmpnezb, vcmpnezb., vcmpnezh, vcmpnezh., + vcmpnezw, vcmpnezw.) instructions. */ + +BU_P9V_AV_2 (CMPNEB, "vcmpneb", CONST, vcmpneb) +BU_P9V_AV_2 (CMPNEH, "vcmpneh", CONST, vcmpneh) +BU_P9V_AV_2 (CMPNEW, "vcmpnew", CONST, vcmpnew) + +BU_P9V_AV_2 (VCMPNEB_P, "vcmpneb_p", CONST, vector_ne_v16qi_p) +BU_P9V_AV_2 (VCMPNEH_P, "vcmpneh_p", CONST, vector_ne_v8hi_p) +BU_P9V_AV_2 (VCMPNEW_P, "vcmpnew_p", CONST, vector_ne_v4si_p) +BU_P9V_AV_2 (VCMPNED_P, "vcmpned_p", CONST, vector_ne_v2di_p) + +BU_P9V_AV_2 (VCMPNEFP_P, "vcmpnefp_p", CONST, vector_ne_v4sf_p) +BU_P9V_AV_2 (VCMPNEDP_P, "vcmpnedp_p", CONST, vector_ne_v2df_p) + +BU_P9V_AV_2 (VCMPAEB_P, "vcmpaeb_p", CONST, vector_ae_v16qi_p) +BU_P9V_AV_2 (VCMPAEH_P, "vcmpaeh_p", CONST, vector_ae_v8hi_p) +BU_P9V_AV_2 (VCMPAEW_P, "vcmpaew_p", CONST, vector_ae_v4si_p) +BU_P9V_AV_2 (VCMPAED_P, "vcmpaed_p", CONST, vector_ae_v2di_p) + +BU_P9V_AV_2 (VCMPAEFP_P, "vcmpaefp_p", CONST, vector_ae_v4sf_p) +BU_P9V_AV_2 (VCMPAEDP_P, "vcmpaedp_p", CONST, vector_ae_v2df_p) + +BU_P9V_AV_2 (CMPNEZB, "vcmpnezb", CONST, vcmpnezb) +BU_P9V_AV_2 (CMPNEZH, "vcmpnezh", CONST, vcmpnezh) +BU_P9V_AV_2 (CMPNEZW, "vcmpnezw", CONST, vcmpnezw) + +BU_P9V_AV_P (VCMPNEZB_P, "vcmpnezb_p", CONST, vector_nez_v16qi_p) +BU_P9V_AV_P (VCMPNEZH_P, "vcmpnezh_p", CONST, vector_nez_v8hi_p) +BU_P9V_AV_P (VCMPNEZW_P, "vcmpnezw_p", CONST, vector_nez_v4si_p) + +/* ISA 3.0 Vector scalar overloaded 2 argument functions */ +BU_P9V_OVERLOAD_2 (LXVL, "lxvl") +BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") +BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") +BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b") + +/* ISA 3.0 Vector scalar overloaded 3 argument functions */ +BU_P9V_OVERLOAD_3 (STXVL, "stxvl") +BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b") + +/* Overloaded CMPNE support was implemented prior to Power 9, + so is not mentioned here. */ +BU_P9V_OVERLOAD_2 (CMPNEZ, "vcmpnez") + +BU_P9V_OVERLOAD_P (VCMPNEZ_P, "vcmpnez_p") +BU_P9V_OVERLOAD_2 (VCMPNE_P, "vcmpne_p") +BU_P9V_OVERLOAD_2 (VCMPAE_P, "vcmpae_p") + +/* ISA 3.0 Vector scalar overloaded 1 argument functions */ +BU_P9V_OVERLOAD_1 (VCLZLSBB, "vclzlsbb") +BU_P9V_OVERLOAD_1 (VCTZLSBB, "vctzlsbb") + +/* 2 argument extended divide functions added in ISA 2.06. */ +BU_P7_MISC_2 (DIVWE, "divwe", CONST, dive_si) +BU_P7_MISC_2 (DIVWEO, "divweo", CONST, diveo_si) +BU_P7_MISC_2 (DIVWEU, "divweu", CONST, diveu_si) +BU_P7_MISC_2 (DIVWEUO, "divweuo", CONST, diveuo_si) +BU_P7_MISC_2 (DIVDE, "divde", CONST, dive_di) +BU_P7_MISC_2 (DIVDEO, "divdeo", CONST, diveo_di) +BU_P7_MISC_2 (DIVDEU, "divdeu", CONST, diveu_di) +BU_P7_MISC_2 (DIVDEUO, "divdeuo", CONST, diveuo_di) + +/* 1 argument DFP (decimal floating point) functions added in ISA 2.05. */ +BU_DFP_MISC_1 (DXEX, "dxex", CONST, dfp_dxex_dd) +BU_DFP_MISC_1 (DXEXQ, "dxexq", CONST, dfp_dxex_td) + +/* 2 argument DFP (decimal floating point) functions added in ISA 2.05. */ +BU_DFP_MISC_2 (DDEDPD, "ddedpd", CONST, dfp_ddedpd_dd) +BU_DFP_MISC_2 (DDEDPDQ, "ddedpdq", CONST, dfp_ddedpd_td) +BU_DFP_MISC_2 (DENBCD, "denbcd", CONST, dfp_denbcd_dd) +BU_DFP_MISC_2 (DENBCDQ, "denbcdq", CONST, dfp_denbcd_td) +BU_DFP_MISC_2 (DIEX, "diex", CONST, dfp_diex_dd) +BU_DFP_MISC_2 (DIEXQ, "diexq", CONST, dfp_diex_td) +BU_DFP_MISC_2 (DSCLI, "dscli", CONST, dfp_dscli_dd) +BU_DFP_MISC_2 (DSCLIQ, "dscliq", CONST, dfp_dscli_td) +BU_DFP_MISC_2 (DSCRI, "dscri", CONST, dfp_dscri_dd) +BU_DFP_MISC_2 (DSCRIQ, "dscriq", CONST, dfp_dscri_td) + +/* 1 argument BCD functions added in ISA 2.06. */ +BU_P7_MISC_1 (CDTBCD, "cdtbcd", CONST, cdtbcd) +BU_P7_MISC_1 (CBCDTD, "cbcdtd", CONST, cbcdtd) + +/* 2 argument BCD functions added in ISA 2.06. */ +BU_P7_MISC_2 (ADDG6S, "addg6s", CONST, addg6s) + +/* 3 argument BCD functions added in ISA 2.07. */ +BU_P8V_MISC_3 (BCDADD, "bcdadd", CONST, bcdadd) +BU_P8V_MISC_3 (BCDADD_LT, "bcdadd_lt", CONST, bcdadd_lt) +BU_P8V_MISC_3 (BCDADD_EQ, "bcdadd_eq", CONST, bcdadd_eq) +BU_P8V_MISC_3 (BCDADD_GT, "bcdadd_gt", CONST, bcdadd_gt) +BU_P8V_MISC_3 (BCDADD_OV, "bcdadd_ov", CONST, bcdadd_unordered) +BU_P8V_MISC_3 (BCDSUB, "bcdsub", CONST, bcdsub) +BU_P8V_MISC_3 (BCDSUB_LT, "bcdsub_lt", CONST, bcdsub_lt) +BU_P8V_MISC_3 (BCDSUB_EQ, "bcdsub_eq", CONST, bcdsub_eq) +BU_P8V_MISC_3 (BCDSUB_GT, "bcdsub_gt", CONST, bcdsub_gt) +BU_P8V_MISC_3 (BCDSUB_OV, "bcdsub_ov", CONST, bcdsub_unordered) + +/* 2 argument pack/unpack 128-bit floating point types. */ +BU_DFP_MISC_2 (PACK_TD, "pack_dec128", CONST, packtd) +BU_DFP_MISC_2 (UNPACK_TD, "unpack_dec128", CONST, unpacktd) + +/* 0 argument general-purpose register functions added in ISA 3.0 (power9). */ +BU_P9_MISC_0 (DARN_32, "darn_32", MISC, darn_32) +BU_P9_64BIT_MISC_0 (DARN_RAW, "darn_raw", MISC, darn_raw) +BU_P9_64BIT_MISC_0 (DARN, "darn", MISC, darn) + +BU_LDBL128_2 (PACK_TF, "pack_longdouble", CONST, packtf) +BU_LDBL128_2 (UNPACK_TF, "unpack_longdouble", CONST, unpacktf) + +BU_P7_MISC_2 (PACK_V1TI, "pack_vector_int128", CONST, packv1ti) +BU_P7_MISC_2 (UNPACK_V1TI, "unpack_vector_int128", CONST, unpackv1ti) + +/* 2 argument DFP (Decimal Floating Point) functions added in ISA 3.0. */ +BU_P9_DFP_MISC_2 (TSTSFI_LT_DD, "dtstsfi_lt_dd", CONST, dfptstsfi_lt_dd) +BU_P9_DFP_MISC_2 (TSTSFI_LT_TD, "dtstsfi_lt_td", CONST, dfptstsfi_lt_td) + +BU_P9_DFP_MISC_2 (TSTSFI_EQ_DD, "dtstsfi_eq_dd", CONST, dfptstsfi_eq_dd) +BU_P9_DFP_MISC_2 (TSTSFI_EQ_TD, "dtstsfi_eq_td", CONST, dfptstsfi_eq_td) + +BU_P9_DFP_MISC_2 (TSTSFI_GT_DD, "dtstsfi_gt_dd", CONST, dfptstsfi_gt_dd) +BU_P9_DFP_MISC_2 (TSTSFI_GT_TD, "dtstsfi_gt_td", CONST, dfptstsfi_gt_td) + +BU_P9_DFP_MISC_2 (TSTSFI_OV_DD, "dtstsfi_ov_dd", CONST, dfptstsfi_unordered_dd) +BU_P9_DFP_MISC_2 (TSTSFI_OV_TD, "dtstsfi_ov_td", CONST, dfptstsfi_unordered_td) + +/* 2 argument overloaded DFP functions added in ISA 3.0. */ +BU_P9_DFP_OVERLOAD_2 (TSTSFI_LT, "dtstsfi_lt") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_LT_DD, "dtstsfi_lt_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_LT_TD, "dtstsfi_lt_td") + +BU_P9_DFP_OVERLOAD_2 (TSTSFI_EQ, "dtstsfi_eq") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_EQ_DD, "dtstsfi_eq_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_EQ_TD, "dtstsfi_eq_td") + +BU_P9_DFP_OVERLOAD_2 (TSTSFI_GT, "dtstsfi_gt") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_GT_DD, "dtstsfi_gt_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_GT_TD, "dtstsfi_gt_td") + +BU_P9_DFP_OVERLOAD_2 (TSTSFI_OV, "dtstsfi_ov") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_OV_DD, "dtstsfi_ov_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_OV_TD, "dtstsfi_ov_td") + +/* 1 argument vector functions added in ISA 3.0 (power9). */ +BU_P9V_AV_1 (VCTZB, "vctzb", CONST, ctzv16qi2) +BU_P9V_AV_1 (VCTZH, "vctzh", CONST, ctzv8hi2) +BU_P9V_AV_1 (VCTZW, "vctzw", CONST, ctzv4si2) +BU_P9V_AV_1 (VCTZD, "vctzd", CONST, ctzv2di2) +BU_P9V_AV_1 (VPRTYBD, "vprtybd", CONST, parityv2di2) +BU_P9V_AV_1 (VPRTYBQ, "vprtybq", CONST, parityv1ti2) +BU_P9V_AV_1 (VPRTYBW, "vprtybw", CONST, parityv4si2) + +/* ISA 3.0 vector overloaded 1 argument functions. */ +BU_P9V_OVERLOAD_1 (VCTZ, "vctz") +BU_P9V_OVERLOAD_1 (VCTZB, "vctzb") +BU_P9V_OVERLOAD_1 (VCTZH, "vctzh") +BU_P9V_OVERLOAD_1 (VCTZW, "vctzw") +BU_P9V_OVERLOAD_1 (VCTZD, "vctzd") +BU_P9V_OVERLOAD_1 (VPRTYB, "vprtyb") +BU_P9V_OVERLOAD_1 (VPRTYBD, "vprtybd") +BU_P9V_OVERLOAD_1 (VPRTYBQ, "vprtybq") +BU_P9V_OVERLOAD_1 (VPRTYBW, "vprtybw") + +/* 2 argument functions added in ISA 3.0 (power9). */ +BU_P9_2 (CMPRB, "byte_in_range", CONST, cmprb) +BU_P9_2 (CMPRB2, "byte_in_either_range", CONST, cmprb2) +BU_P9_64BIT_2 (CMPEQB, "byte_in_set", CONST, cmpeqb) + +/* 2 argument overloaded functions added in ISA 3.0 (power9). */ +BU_P9_OVERLOAD_2 (CMPRB, "byte_in_range") +BU_P9_OVERLOAD_2 (CMPRB2, "byte_in_either_range") +BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set") + +/* 1 argument IEEE 128-bit floating-point functions. */ +BU_FLOAT128_1 (FABSQ, "fabsq", CONST, abskf2) + +/* 2 argument IEEE 128-bit floating-point functions. */ +BU_FLOAT128_2 (COPYSIGNQ, "copysignq", CONST, copysignkf3) + +/* 1 argument crypto functions. */ +BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox) + +/* 2 argument crypto functions. */ +BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher) +BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast) +BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher) +BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast) +BU_CRYPTO_2A (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_CRYPTO_2A (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_CRYPTO_2A (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_CRYPTO_2A (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) + +/* 3 argument crypto functions. */ +BU_CRYPTO_3A (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di) +BU_CRYPTO_3A (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si) +BU_CRYPTO_3A (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi) +BU_CRYPTO_3A (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi) +BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw) +BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad) + +/* 2 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_2A (VPMSUM, "vpmsum") + +/* 3 argument crypto overloaded functions. */ +BU_CRYPTO_OVERLOAD_3A (VPERMXOR, "vpermxor") +BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma") + + +/* HTM functions. */ +BU_HTM_1 (TABORT, "tabort", CR, tabort) +BU_HTM_3 (TABORTDC, "tabortdc", CR, tabortdc) +BU_HTM_3 (TABORTDCI, "tabortdci", CR, tabortdci) +BU_HTM_3 (TABORTWC, "tabortwc", CR, tabortwc) +BU_HTM_3 (TABORTWCI, "tabortwci", CR, tabortwci) +BU_HTM_1 (TBEGIN, "tbegin", CR, tbegin) +BU_HTM_0 (TCHECK, "tcheck", CR, tcheck) +BU_HTM_1 (TEND, "tend", CR, tend) +BU_HTM_0 (TENDALL, "tendall", CR, tend) +BU_HTM_0 (TRECHKPT, "trechkpt", CR, trechkpt) +BU_HTM_1 (TRECLAIM, "treclaim", CR, treclaim) +BU_HTM_0 (TRESUME, "tresume", CR, tsr) +BU_HTM_0 (TSUSPEND, "tsuspend", CR, tsr) +BU_HTM_1 (TSR, "tsr", CR, tsr) +BU_HTM_0 (TTEST, "ttest", CR, ttest) + +BU_HTM_0 (GET_TFHAR, "get_tfhar", SPR, nothing) +BU_HTM_V1 (SET_TFHAR, "set_tfhar", SPR, nothing) +BU_HTM_0 (GET_TFIAR, "get_tfiar", SPR, nothing) +BU_HTM_V1 (SET_TFIAR, "set_tfiar", SPR, nothing) +BU_HTM_0 (GET_TEXASR, "get_texasr", SPR, nothing) +BU_HTM_V1 (SET_TEXASR, "set_texasr", SPR, nothing) +BU_HTM_0 (GET_TEXASRU, "get_texasru", SPR, nothing) +BU_HTM_V1 (SET_TEXASRU, "set_texasru", SPR, nothing) + + +/* 3 argument paired floating point builtins. */ +BU_PAIRED_3 (MSUB, "msub", FP, fmsv2sf4) +BU_PAIRED_3 (MADD, "madd", FP, fmav2sf4) +BU_PAIRED_3 (MADDS0, "madds0", FP, paired_madds0) +BU_PAIRED_3 (MADDS1, "madds1", FP, paired_madds1) +BU_PAIRED_3 (NMSUB, "nmsub", FP, nfmsv2sf4) +BU_PAIRED_3 (NMADD, "nmadd", FP, nfmav2sf4) +BU_PAIRED_3 (SUM0, "sum0", FP, paired_sum0) +BU_PAIRED_3 (SUM1, "sum1", FP, paired_sum1) +BU_PAIRED_3 (SELV2SF4, "selv2sf4", CONST, selv2sf4) + +/* 2 argument paired floating point builtins. */ +BU_PAIRED_2 (DIVV2SF3, "divv2sf3", FP, paired_divv2sf3) +BU_PAIRED_2 (ADDV2SF3, "addv2sf3", FP, paired_addv2sf3) +BU_PAIRED_2 (SUBV2SF3, "subv2sf3", FP, paired_subv2sf3) +BU_PAIRED_2 (MULV2SF3, "mulv2sf3", FP, paired_mulv2sf3) +BU_PAIRED_2 (MULS0, "muls0", FP, paired_muls0) +BU_PAIRED_2 (MULS1, "muls1", FP, paired_muls1) +BU_PAIRED_2 (MERGE00, "merge00", CONST, paired_merge00) +BU_PAIRED_2 (MERGE01, "merge01", CONST, paired_merge01) +BU_PAIRED_2 (MERGE10, "merge10", CONST, paired_merge10) +BU_PAIRED_2 (MERGE11, "merge11", CONST, paired_merge11) + +/* 1 argument paired floating point builtin functions. */ +BU_PAIRED_1 (ABSV2SF2, "absv2sf2", CONST, paired_absv2sf2) +BU_PAIRED_1 (NABSV2SF2, "nabsv2sf2", CONST, nabsv2sf2) +BU_PAIRED_1 (NEGV2SF2, "negv2sf2", CONST, paired_negv2sf2) +BU_PAIRED_1 (SQRTV2SF2, "sqrtv2sf2", FP, sqrtv2sf2) +BU_PAIRED_1 (RESV2SF, "resv2sf2", FP, resv2sf2) + +/* PAIRED builtins that are handled as special cases. */ +BU_PAIRED_X (STX, "stx", MISC) +BU_PAIRED_X (LX, "lx", MISC) + +/* Paired predicates. */ +BU_PAIRED_P (CMPU0, "cmpu0", CONST, paired_cmpu0) +BU_PAIRED_P (CMPU1, "cmpu1", CONST, paired_cmpu1) + +/* PowerPC E500 builtins (SPE). */ + +BU_SPE_2 (EVADDW, "evaddw", MISC, addv2si3) +BU_SPE_2 (EVAND, "evand", MISC, andv2si3) +BU_SPE_2 (EVANDC, "evandc", MISC, spe_evandc) +BU_SPE_2 (EVDIVWS, "evdivws", MISC, divv2si3) +BU_SPE_2 (EVDIVWU, "evdivwu", MISC, spe_evdivwu) +BU_SPE_2 (EVEQV, "eveqv", MISC, spe_eveqv) +BU_SPE_2 (EVFSADD, "evfsadd", MISC, spe_evfsadd) +BU_SPE_2 (EVFSDIV, "evfsdiv", MISC, spe_evfsdiv) +BU_SPE_2 (EVFSMUL, "evfsmul", MISC, spe_evfsmul) +BU_SPE_2 (EVFSSUB, "evfssub", MISC, spe_evfssub) +BU_SPE_2 (EVMERGEHI, "evmergehi", MISC, spe_evmergehi) +BU_SPE_2 (EVMERGEHILO, "evmergehilo", MISC, spe_evmergehilo) +BU_SPE_2 (EVMERGELO, "evmergelo", MISC, spe_evmergelo) +BU_SPE_2 (EVMERGELOHI, "evmergelohi", MISC, spe_evmergelohi) +BU_SPE_2 (EVMHEGSMFAA, "evmhegsmfaa", MISC, spe_evmhegsmfaa) +BU_SPE_2 (EVMHEGSMFAN, "evmhegsmfan", MISC, spe_evmhegsmfan) +BU_SPE_2 (EVMHEGSMIAA, "evmhegsmiaa", MISC, spe_evmhegsmiaa) +BU_SPE_2 (EVMHEGSMIAN, "evmhegsmian", MISC, spe_evmhegsmian) +BU_SPE_2 (EVMHEGUMIAA, "evmhegumiaa", MISC, spe_evmhegumiaa) +BU_SPE_2 (EVMHEGUMIAN, "evmhegumian", MISC, spe_evmhegumian) +BU_SPE_2 (EVMHESMF, "evmhesmf", MISC, spe_evmhesmf) +BU_SPE_2 (EVMHESMFA, "evmhesmfa", MISC, spe_evmhesmfa) +BU_SPE_2 (EVMHESMFAAW, "evmhesmfaaw", MISC, spe_evmhesmfaaw) +BU_SPE_2 (EVMHESMFANW, "evmhesmfanw", MISC, spe_evmhesmfanw) +BU_SPE_2 (EVMHESMI, "evmhesmi", MISC, spe_evmhesmi) +BU_SPE_2 (EVMHESMIA, "evmhesmia", MISC, spe_evmhesmia) +BU_SPE_2 (EVMHESMIAAW, "evmhesmiaaw", MISC, spe_evmhesmiaaw) +BU_SPE_2 (EVMHESMIANW, "evmhesmianw", MISC, spe_evmhesmianw) +BU_SPE_2 (EVMHESSF, "evmhessf", MISC, spe_evmhessf) +BU_SPE_2 (EVMHESSFA, "evmhessfa", MISC, spe_evmhessfa) +BU_SPE_2 (EVMHESSFAAW, "evmhessfaaw", MISC, spe_evmhessfaaw) +BU_SPE_2 (EVMHESSFANW, "evmhessfanw", MISC, spe_evmhessfanw) +BU_SPE_2 (EVMHESSIAAW, "evmhessiaaw", MISC, spe_evmhessiaaw) +BU_SPE_2 (EVMHESSIANW, "evmhessianw", MISC, spe_evmhessianw) +BU_SPE_2 (EVMHEUMI, "evmheumi", MISC, spe_evmheumi) +BU_SPE_2 (EVMHEUMIA, "evmheumia", MISC, spe_evmheumia) +BU_SPE_2 (EVMHEUMIAAW, "evmheumiaaw", MISC, spe_evmheumiaaw) +BU_SPE_2 (EVMHEUMIANW, "evmheumianw", MISC, spe_evmheumianw) +BU_SPE_2 (EVMHEUSIAAW, "evmheusiaaw", MISC, spe_evmheusiaaw) +BU_SPE_2 (EVMHEUSIANW, "evmheusianw", MISC, spe_evmheusianw) +BU_SPE_2 (EVMHOGSMFAA, "evmhogsmfaa", MISC, spe_evmhogsmfaa) +BU_SPE_2 (EVMHOGSMFAN, "evmhogsmfan", MISC, spe_evmhogsmfan) +BU_SPE_2 (EVMHOGSMIAA, "evmhogsmiaa", MISC, spe_evmhogsmiaa) +BU_SPE_2 (EVMHOGSMIAN, "evmhogsmian", MISC, spe_evmhogsmian) +BU_SPE_2 (EVMHOGUMIAA, "evmhogumiaa", MISC, spe_evmhogumiaa) +BU_SPE_2 (EVMHOGUMIAN, "evmhogumian", MISC, spe_evmhogumian) +BU_SPE_2 (EVMHOSMF, "evmhosmf", MISC, spe_evmhosmf) +BU_SPE_2 (EVMHOSMFA, "evmhosmfa", MISC, spe_evmhosmfa) +BU_SPE_2 (EVMHOSMFAAW, "evmhosmfaaw", MISC, spe_evmhosmfaaw) +BU_SPE_2 (EVMHOSMFANW, "evmhosmfanw", MISC, spe_evmhosmfanw) +BU_SPE_2 (EVMHOSMI, "evmhosmi", MISC, spe_evmhosmi) +BU_SPE_2 (EVMHOSMIA, "evmhosmia", MISC, spe_evmhosmia) +BU_SPE_2 (EVMHOSMIAAW, "evmhosmiaaw", MISC, spe_evmhosmiaaw) +BU_SPE_2 (EVMHOSMIANW, "evmhosmianw", MISC, spe_evmhosmianw) +BU_SPE_2 (EVMHOSSF, "evmhossf", MISC, spe_evmhossf) +BU_SPE_2 (EVMHOSSFA, "evmhossfa", MISC, spe_evmhossfa) +BU_SPE_2 (EVMHOSSFAAW, "evmhossfaaw", MISC, spe_evmhossfaaw) +BU_SPE_2 (EVMHOSSFANW, "evmhossfanw", MISC, spe_evmhossfanw) +BU_SPE_2 (EVMHOSSIAAW, "evmhossiaaw", MISC, spe_evmhossiaaw) +BU_SPE_2 (EVMHOSSIANW, "evmhossianw", MISC, spe_evmhossianw) +BU_SPE_2 (EVMHOUMI, "evmhoumi", MISC, spe_evmhoumi) +BU_SPE_2 (EVMHOUMIA, "evmhoumia", MISC, spe_evmhoumia) +BU_SPE_2 (EVMHOUMIAAW, "evmhoumiaaw", MISC, spe_evmhoumiaaw) +BU_SPE_2 (EVMHOUMIANW, "evmhoumianw", MISC, spe_evmhoumianw) +BU_SPE_2 (EVMHOUSIAAW, "evmhousiaaw", MISC, spe_evmhousiaaw) +BU_SPE_2 (EVMHOUSIANW, "evmhousianw", MISC, spe_evmhousianw) +BU_SPE_2 (EVMWHSMF, "evmwhsmf", MISC, spe_evmwhsmf) +BU_SPE_2 (EVMWHSMFA, "evmwhsmfa", MISC, spe_evmwhsmfa) +BU_SPE_2 (EVMWHSMI, "evmwhsmi", MISC, spe_evmwhsmi) +BU_SPE_2 (EVMWHSMIA, "evmwhsmia", MISC, spe_evmwhsmia) +BU_SPE_2 (EVMWHSSF, "evmwhssf", MISC, spe_evmwhssf) +BU_SPE_2 (EVMWHSSFA, "evmwhssfa", MISC, spe_evmwhssfa) +BU_SPE_2 (EVMWHUMI, "evmwhumi", MISC, spe_evmwhumi) +BU_SPE_2 (EVMWHUMIA, "evmwhumia", MISC, spe_evmwhumia) +BU_SPE_2 (EVMWLSMIAAW, "evmwlsmiaaw", MISC, spe_evmwlsmiaaw) +BU_SPE_2 (EVMWLSMIANW, "evmwlsmianw", MISC, spe_evmwlsmianw) +BU_SPE_2 (EVMWLSSIAAW, "evmwlssiaaw", MISC, spe_evmwlssiaaw) +BU_SPE_2 (EVMWLSSIANW, "evmwlssianw", MISC, spe_evmwlssianw) +BU_SPE_2 (EVMWLUMI, "evmwlumi", MISC, spe_evmwlumi) +BU_SPE_2 (EVMWLUMIA, "evmwlumia", MISC, spe_evmwlumia) +BU_SPE_2 (EVMWLUMIAAW, "evmwlumiaaw", MISC, spe_evmwlumiaaw) +BU_SPE_2 (EVMWLUMIANW, "evmwlumianw", MISC, spe_evmwlumianw) +BU_SPE_2 (EVMWLUSIAAW, "evmwlusiaaw", MISC, spe_evmwlusiaaw) +BU_SPE_2 (EVMWLUSIANW, "evmwlusianw", MISC, spe_evmwlusianw) +BU_SPE_2 (EVMWSMF, "evmwsmf", MISC, spe_evmwsmf) +BU_SPE_2 (EVMWSMFA, "evmwsmfa", MISC, spe_evmwsmfa) +BU_SPE_2 (EVMWSMFAA, "evmwsmfaa", MISC, spe_evmwsmfaa) +BU_SPE_2 (EVMWSMFAN, "evmwsmfan", MISC, spe_evmwsmfan) +BU_SPE_2 (EVMWSMI, "evmwsmi", MISC, spe_evmwsmi) +BU_SPE_2 (EVMWSMIA, "evmwsmia", MISC, spe_evmwsmia) +BU_SPE_2 (EVMWSMIAA, "evmwsmiaa", MISC, spe_evmwsmiaa) +BU_SPE_2 (EVMWSMIAN, "evmwsmian", MISC, spe_evmwsmian) +BU_SPE_2 (EVMWSSF, "evmwssf", MISC, spe_evmwssf) +BU_SPE_2 (EVMWSSFA, "evmwssfa", MISC, spe_evmwssfa) +BU_SPE_2 (EVMWSSFAA, "evmwssfaa", MISC, spe_evmwssfaa) +BU_SPE_2 (EVMWSSFAN, "evmwssfan", MISC, spe_evmwssfan) +BU_SPE_2 (EVMWUMI, "evmwumi", MISC, spe_evmwumi) +BU_SPE_2 (EVMWUMIA, "evmwumia", MISC, spe_evmwumia) +BU_SPE_2 (EVMWUMIAA, "evmwumiaa", MISC, spe_evmwumiaa) +BU_SPE_2 (EVMWUMIAN, "evmwumian", MISC, spe_evmwumian) +BU_SPE_2 (EVNAND, "evnand", MISC, spe_evnand) +BU_SPE_2 (EVNOR, "evnor", MISC, spe_evnor) +BU_SPE_2 (EVOR, "evor", MISC, spe_evor) +BU_SPE_2 (EVORC, "evorc", MISC, spe_evorc) +BU_SPE_2 (EVRLW, "evrlw", MISC, spe_evrlw) +BU_SPE_2 (EVSLW, "evslw", MISC, spe_evslw) +BU_SPE_2 (EVSRWS, "evsrws", MISC, spe_evsrws) +BU_SPE_2 (EVSRWU, "evsrwu", MISC, spe_evsrwu) +BU_SPE_2 (EVSUBFW, "evsubfw", MISC, subv2si3) + +/* SPE binary operations expecting a 5-bit unsigned literal. */ +BU_SPE_2 (EVADDIW, "evaddiw", MISC, spe_evaddiw) + +BU_SPE_2 (EVRLWI, "evrlwi", MISC, spe_evrlwi) +BU_SPE_2 (EVSLWI, "evslwi", MISC, spe_evslwi) +BU_SPE_2 (EVSRWIS, "evsrwis", MISC, spe_evsrwis) +BU_SPE_2 (EVSRWIU, "evsrwiu", MISC, spe_evsrwiu) +BU_SPE_2 (EVSUBIFW, "evsubifw", MISC, spe_evsubifw) +BU_SPE_2 (EVMWHSSFAA, "evmwhssfaa", MISC, spe_evmwhssfaa) +BU_SPE_2 (EVMWHSSMAA, "evmwhssmaa", MISC, spe_evmwhssmaa) +BU_SPE_2 (EVMWHSMFAA, "evmwhsmfaa", MISC, spe_evmwhsmfaa) +BU_SPE_2 (EVMWHSMIAA, "evmwhsmiaa", MISC, spe_evmwhsmiaa) +BU_SPE_2 (EVMWHUSIAA, "evmwhusiaa", MISC, spe_evmwhusiaa) +BU_SPE_2 (EVMWHUMIAA, "evmwhumiaa", MISC, spe_evmwhumiaa) +BU_SPE_2 (EVMWHSSFAN, "evmwhssfan", MISC, spe_evmwhssfan) +BU_SPE_2 (EVMWHSSIAN, "evmwhssian", MISC, spe_evmwhssian) +BU_SPE_2 (EVMWHSMFAN, "evmwhsmfan", MISC, spe_evmwhsmfan) +BU_SPE_2 (EVMWHSMIAN, "evmwhsmian", MISC, spe_evmwhsmian) +BU_SPE_2 (EVMWHUSIAN, "evmwhusian", MISC, spe_evmwhusian) +BU_SPE_2 (EVMWHUMIAN, "evmwhumian", MISC, spe_evmwhumian) +BU_SPE_2 (EVMWHGSSFAA, "evmwhgssfaa", MISC, spe_evmwhgssfaa) +BU_SPE_2 (EVMWHGSMFAA, "evmwhgsmfaa", MISC, spe_evmwhgsmfaa) +BU_SPE_2 (EVMWHGSMIAA, "evmwhgsmiaa", MISC, spe_evmwhgsmiaa) +BU_SPE_2 (EVMWHGUMIAA, "evmwhgumiaa", MISC, spe_evmwhgumiaa) +BU_SPE_2 (EVMWHGSSFAN, "evmwhgssfan", MISC, spe_evmwhgssfan) +BU_SPE_2 (EVMWHGSMFAN, "evmwhgsmfan", MISC, spe_evmwhgsmfan) +BU_SPE_2 (EVMWHGSMIAN, "evmwhgsmian", MISC, spe_evmwhgsmian) +BU_SPE_2 (EVMWHGUMIAN, "evmwhgumian", MISC, spe_evmwhgumian) +BU_SPE_2 (BRINC, "brinc", MISC, spe_brinc) +BU_SPE_2 (EVXOR, "evxor", MISC, xorv2si3) + +/* SPE predicate builtins. */ +BU_SPE_P (EVCMPEQ, "evcmpeq", MISC, spe_evcmpeq) +BU_SPE_P (EVCMPGTS, "evcmpgts", MISC, spe_evcmpgts) +BU_SPE_P (EVCMPGTU, "evcmpgtu", MISC, spe_evcmpgtu) +BU_SPE_P (EVCMPLTS, "evcmplts", MISC, spe_evcmplts) +BU_SPE_P (EVCMPLTU, "evcmpltu", MISC, spe_evcmpltu) +BU_SPE_P (EVFSCMPEQ, "evfscmpeq", MISC, spe_evfscmpeq) +BU_SPE_P (EVFSCMPGT, "evfscmpgt", MISC, spe_evfscmpgt) +BU_SPE_P (EVFSCMPLT, "evfscmplt", MISC, spe_evfscmplt) +BU_SPE_P (EVFSTSTEQ, "evfststeq", MISC, spe_evfststeq) +BU_SPE_P (EVFSTSTGT, "evfststgt", MISC, spe_evfststgt) +BU_SPE_P (EVFSTSTLT, "evfststlt", MISC, spe_evfststlt) + +/* SPE evsel builtins. */ +BU_SPE_E (EVSEL_CMPGTS, "evsel_gts", MISC, spe_evcmpgts) +BU_SPE_E (EVSEL_CMPGTU, "evsel_gtu", MISC, spe_evcmpgtu) +BU_SPE_E (EVSEL_CMPLTS, "evsel_lts", MISC, spe_evcmplts) +BU_SPE_E (EVSEL_CMPLTU, "evsel_ltu", MISC, spe_evcmpltu) +BU_SPE_E (EVSEL_CMPEQ, "evsel_eq", MISC, spe_evcmpeq) +BU_SPE_E (EVSEL_FSCMPGT, "evsel_fsgt", MISC, spe_evfscmpgt) +BU_SPE_E (EVSEL_FSCMPLT, "evsel_fslt", MISC, spe_evfscmplt) +BU_SPE_E (EVSEL_FSCMPEQ, "evsel_fseq", MISC, spe_evfscmpeq) +BU_SPE_E (EVSEL_FSTSTGT, "evsel_fststgt", MISC, spe_evfststgt) +BU_SPE_E (EVSEL_FSTSTLT, "evsel_fststlt", MISC, spe_evfststlt) +BU_SPE_E (EVSEL_FSTSTEQ, "evsel_fststeq", MISC, spe_evfststeq) + +BU_SPE_1 (EVABS, "evabs", CONST, absv2si2) +BU_SPE_1 (EVADDSMIAAW, "evaddsmiaaw", CONST, spe_evaddsmiaaw) +BU_SPE_1 (EVADDSSIAAW, "evaddssiaaw", CONST, spe_evaddssiaaw) +BU_SPE_1 (EVADDUMIAAW, "evaddumiaaw", CONST, spe_evaddumiaaw) +BU_SPE_1 (EVADDUSIAAW, "evaddusiaaw", CONST, spe_evaddusiaaw) +BU_SPE_1 (EVCNTLSW, "evcntlsw", CONST, spe_evcntlsw) +BU_SPE_1 (EVCNTLZW, "evcntlzw", CONST, spe_evcntlzw) +BU_SPE_1 (EVEXTSB, "evextsb", CONST, spe_evextsb) +BU_SPE_1 (EVEXTSH, "evextsh", CONST, spe_evextsh) +BU_SPE_1 (EVFSABS, "evfsabs", CONST, spe_evfsabs) +BU_SPE_1 (EVFSCFSF, "evfscfsf", CONST, spe_evfscfsf) +BU_SPE_1 (EVFSCFSI, "evfscfsi", CONST, spe_evfscfsi) +BU_SPE_1 (EVFSCFUF, "evfscfuf", CONST, spe_evfscfuf) +BU_SPE_1 (EVFSCFUI, "evfscfui", CONST, spe_evfscfui) +BU_SPE_1 (EVFSCTSF, "evfsctsf", CONST, spe_evfsctsf) +BU_SPE_1 (EVFSCTSI, "evfsctsi", CONST, spe_evfsctsi) +BU_SPE_1 (EVFSCTSIZ, "evfsctsiz", CONST, spe_evfsctsiz) +BU_SPE_1 (EVFSCTUF, "evfsctuf", CONST, spe_evfsctuf) +BU_SPE_1 (EVFSCTUI, "evfsctui", CONST, spe_evfsctui) +BU_SPE_1 (EVFSCTUIZ, "evfsctuiz", CONST, spe_evfsctuiz) +BU_SPE_1 (EVFSNABS, "evfsnabs", CONST, spe_evfsnabs) +BU_SPE_1 (EVFSNEG, "evfsneg", CONST, spe_evfsneg) +BU_SPE_1 (EVMRA, "evmra", CONST, spe_evmra) +BU_SPE_1 (EVNEG, "evneg", CONST, negv2si2) +BU_SPE_1 (EVRNDW, "evrndw", CONST, spe_evrndw) +BU_SPE_1 (EVSUBFSMIAAW, "evsubfsmiaaw", CONST, spe_evsubfsmiaaw) +BU_SPE_1 (EVSUBFSSIAAW, "evsubfssiaaw", CONST, spe_evsubfssiaaw) +BU_SPE_1 (EVSUBFUMIAAW, "evsubfumiaaw", CONST, spe_evsubfumiaaw) +BU_SPE_1 (EVSUBFUSIAAW, "evsubfusiaaw", CONST, spe_evsubfusiaaw) + +/* SPE builtins that are handled as special cases. */ +BU_SPE_X (EVLDD, "evldd", MISC) +BU_SPE_X (EVLDDX, "evlddx", MISC) +BU_SPE_X (EVLDH, "evldh", MISC) +BU_SPE_X (EVLDHX, "evldhx", MISC) +BU_SPE_X (EVLDW, "evldw", MISC) +BU_SPE_X (EVLDWX, "evldwx", MISC) +BU_SPE_X (EVLHHESPLAT, "evlhhesplat", MISC) +BU_SPE_X (EVLHHESPLATX, "evlhhesplatx", MISC) +BU_SPE_X (EVLHHOSSPLAT, "evlhhossplat", MISC) +BU_SPE_X (EVLHHOSSPLATX, "evlhhossplatx", MISC) +BU_SPE_X (EVLHHOUSPLAT, "evlhhousplat", MISC) +BU_SPE_X (EVLHHOUSPLATX, "evlhhousplatx", MISC) +BU_SPE_X (EVLWHE, "evlwhe", MISC) +BU_SPE_X (EVLWHEX, "evlwhex", MISC) +BU_SPE_X (EVLWHOS, "evlwhos", MISC) +BU_SPE_X (EVLWHOSX, "evlwhosx", MISC) +BU_SPE_X (EVLWHOU, "evlwhou", MISC) +BU_SPE_X (EVLWHOUX, "evlwhoux", MISC) +BU_SPE_X (EVLWHSPLAT, "evlwhsplat", MISC) +BU_SPE_X (EVLWHSPLATX, "evlwhsplatx", MISC) +BU_SPE_X (EVLWWSPLAT, "evlwwsplat", MISC) +BU_SPE_X (EVLWWSPLATX, "evlwwsplatx", MISC) +BU_SPE_X (EVSPLATFI, "evsplatfi", MISC) +BU_SPE_X (EVSPLATI, "evsplati", MISC) +BU_SPE_X (EVSTDD, "evstdd", MISC) +BU_SPE_X (EVSTDDX, "evstddx", MISC) +BU_SPE_X (EVSTDH, "evstdh", MISC) +BU_SPE_X (EVSTDHX, "evstdhx", MISC) +BU_SPE_X (EVSTDW, "evstdw", MISC) +BU_SPE_X (EVSTDWX, "evstdwx", MISC) +BU_SPE_X (EVSTWHE, "evstwhe", MISC) +BU_SPE_X (EVSTWHEX, "evstwhex", MISC) +BU_SPE_X (EVSTWHO, "evstwho", MISC) +BU_SPE_X (EVSTWHOX, "evstwhox", MISC) +BU_SPE_X (EVSTWWE, "evstwwe", MISC) +BU_SPE_X (EVSTWWEX, "evstwwex", MISC) +BU_SPE_X (EVSTWWO, "evstwwo", MISC) +BU_SPE_X (EVSTWWOX, "evstwwox", MISC) +BU_SPE_X (MFSPEFSCR, "mfspefscr", MISC) +BU_SPE_X (MTSPEFSCR, "mtspefscr", MISC) + + +/* Power7 builtins, that aren't VSX instructions. */ +BU_SPECIAL_X (POWER7_BUILTIN_BPERMD, "__builtin_bpermd", RS6000_BTM_POPCNTD, + RS6000_BTC_CONST) + +/* Miscellaneous builtins. */ +BU_SPECIAL_X (RS6000_BUILTIN_RECIP, "__builtin_recipdiv", RS6000_BTM_FRE, + RS6000_BTC_FP) + +BU_SPECIAL_X (RS6000_BUILTIN_RECIPF, "__builtin_recipdivf", RS6000_BTM_FRES, + RS6000_BTC_FP) + +BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, "__builtin_rsqrt", RS6000_BTM_FRSQRTE, + RS6000_BTC_FP) + +BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", RS6000_BTM_FRSQRTES, + RS6000_BTC_FP) + +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + +BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + +BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, + CODE_FOR_rs6000_mtfsf) + +BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + +BU_SPECIAL_X (RS6000_BUILTIN_CPU_IS, "__builtin_cpu_is", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + +BU_SPECIAL_X (RS6000_BUILTIN_CPU_SUPPORTS, "__builtin_cpu_supports", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + +BU_SPECIAL_X (RS6000_BUILTIN_NANQ, "__builtin_nanq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + +BU_SPECIAL_X (RS6000_BUILTIN_NANSQ, "__builtin_nansq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + +BU_SPECIAL_X (RS6000_BUILTIN_INFQ, "__builtin_infq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + +BU_SPECIAL_X (RS6000_BUILTIN_HUGE_VALQ, "__builtin_huge_valq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + +/* Darwin CfString builtin. */ +BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS, + RS6000_BTC_MISC) diff --git a/gcc/config/powerpcspe/powerpcspe-c.c b/gcc/config/powerpcspe/powerpcspe-c.c new file mode 100644 index 000000000000..8039814b48ea --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-c.c @@ -0,0 +1,6581 @@ +/* Subroutines for the C front end on the PowerPC architecture. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + + Contributed by Zack Weinberg <zack@codesourcery.com> + and Paolo Bonzini <bonzini@gnu.org> + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "c-family/c-common.h" +#include "memmodel.h" +#include "tm_p.h" +#include "stringpool.h" +#include "stor-layout.h" +#include "c-family/c-pragma.h" +#include "langhooks.h" +#include "c/c-tree.h" + + + +/* Handle the machine specific pragma longcall. Its syntax is + + # pragma longcall ( TOGGLE ) + + where TOGGLE is either 0 or 1. + + rs6000_default_long_calls is set to the value of TOGGLE, changing + whether or not new function declarations receive a longcall + attribute by default. */ + +#define SYNTAX_ERROR(gmsgid) do { \ + warning (OPT_Wpragmas, gmsgid); \ + warning (OPT_Wpragmas, "ignoring malformed #pragma longcall"); \ + return; \ +} while (0) + +void +rs6000_pragma_longcall (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + tree x, n; + + /* If we get here, generic code has already scanned the directive + leader and the word "longcall". */ + + if (pragma_lex (&x) != CPP_OPEN_PAREN) + SYNTAX_ERROR ("missing open paren"); + if (pragma_lex (&n) != CPP_NUMBER) + SYNTAX_ERROR ("missing number"); + if (pragma_lex (&x) != CPP_CLOSE_PAREN) + SYNTAX_ERROR ("missing close paren"); + + if (n != integer_zero_node && n != integer_one_node) + SYNTAX_ERROR ("number must be 0 or 1"); + + if (pragma_lex (&x) != CPP_EOF) + warning (OPT_Wpragmas, "junk at end of #pragma longcall"); + + rs6000_default_long_calls = (n == integer_one_node); +} + +/* Handle defining many CPP flags based on TARGET_xxx. As a general + policy, rather than trying to guess what flags a user might want a + #define for, it's better to define a flag for everything. */ + +#define builtin_define(TXT) cpp_define (pfile, TXT) +#define builtin_assert(TXT) cpp_assert (pfile, TXT) + +/* Keep the AltiVec keywords handy for fast comparisons. */ +static GTY(()) tree __vector_keyword; +static GTY(()) tree vector_keyword; +static GTY(()) tree __pixel_keyword; +static GTY(()) tree pixel_keyword; +static GTY(()) tree __bool_keyword; +static GTY(()) tree bool_keyword; +static GTY(()) tree _Bool_keyword; +static GTY(()) tree __int128_type; +static GTY(()) tree __uint128_type; + +/* Preserved across calls. */ +static tree expand_bool_pixel; + +static cpp_hashnode * +altivec_categorize_keyword (const cpp_token *tok) +{ + if (tok->type == CPP_NAME) + { + cpp_hashnode *ident = tok->val.node.node; + + if (ident == C_CPP_HASHNODE (vector_keyword)) + return C_CPP_HASHNODE (__vector_keyword); + + if (ident == C_CPP_HASHNODE (pixel_keyword)) + return C_CPP_HASHNODE (__pixel_keyword); + + if (ident == C_CPP_HASHNODE (bool_keyword)) + return C_CPP_HASHNODE (__bool_keyword); + + if (ident == C_CPP_HASHNODE (_Bool_keyword)) + return C_CPP_HASHNODE (__bool_keyword); + + return ident; + } + + return 0; +} + +static void +init_vector_keywords (void) +{ + /* Keywords without two leading underscores are context-sensitive, and hence + implemented as conditional macros, controlled by the + rs6000_macro_to_expand() function below. If we have ISA 2.07 64-bit + support, record the __int128_t and __uint128_t types. */ + + __vector_keyword = get_identifier ("__vector"); + C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL; + + __pixel_keyword = get_identifier ("__pixel"); + C_CPP_HASHNODE (__pixel_keyword)->flags |= NODE_CONDITIONAL; + + __bool_keyword = get_identifier ("__bool"); + C_CPP_HASHNODE (__bool_keyword)->flags |= NODE_CONDITIONAL; + + vector_keyword = get_identifier ("vector"); + C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL; + + pixel_keyword = get_identifier ("pixel"); + C_CPP_HASHNODE (pixel_keyword)->flags |= NODE_CONDITIONAL; + + bool_keyword = get_identifier ("bool"); + C_CPP_HASHNODE (bool_keyword)->flags |= NODE_CONDITIONAL; + + _Bool_keyword = get_identifier ("_Bool"); + C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL; + + if (TARGET_VADDUQM) + { + __int128_type = get_identifier ("__int128_t"); + __uint128_type = get_identifier ("__uint128_t"); + } +} + +/* Helper function to find out which RID_INT_N_* code is the one for + __int128, if any. Returns RID_MAX+1 if none apply, which is safe + (for our purposes, since we always expect to have __int128) to + compare against. */ +static int +rid_int128(void) +{ + int i; + + for (i = 0; i < NUM_INT_N_ENTS; i ++) + if (int_n_enabled_p[i] + && int_n_data[i].bitsize == 128) + return RID_INT_N_0 + i; + + return RID_MAX + 1; +} + +/* Called to decide whether a conditional macro should be expanded. + Since we have exactly one such macro (i.e, 'vector'), we do not + need to examine the 'tok' parameter. */ + +static cpp_hashnode * +rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok) +{ + cpp_hashnode *expand_this = tok->val.node.node; + cpp_hashnode *ident; + + /* If the current machine does not have altivec, don't look for the + keywords. */ + if (!TARGET_ALTIVEC) + return NULL; + + ident = altivec_categorize_keyword (tok); + + if (ident != expand_this) + expand_this = NULL; + + if (ident == C_CPP_HASHNODE (__vector_keyword)) + { + int idx = 0; + do + tok = cpp_peek_token (pfile, idx++); + while (tok->type == CPP_PADDING); + ident = altivec_categorize_keyword (tok); + + if (ident == C_CPP_HASHNODE (__pixel_keyword)) + { + expand_this = C_CPP_HASHNODE (__vector_keyword); + expand_bool_pixel = __pixel_keyword; + } + else if (ident == C_CPP_HASHNODE (__bool_keyword)) + { + expand_this = C_CPP_HASHNODE (__vector_keyword); + expand_bool_pixel = __bool_keyword; + } + /* The boost libraries have code with Iterator::vector vector in it. If + we allow the normal handling, this module will be called recursively, + and the vector will be skipped.; */ + else if (ident && (ident != C_CPP_HASHNODE (__vector_keyword))) + { + enum rid rid_code = (enum rid)(ident->rid_code); + enum node_type itype = ident->type; + /* If there is a function-like macro, check if it is going to be + invoked with or without arguments. Without following ( treat + it like non-macro, otherwise the following cpp_get_token eats + what should be preserved. */ + if (itype == NT_MACRO && cpp_fun_like_macro_p (ident)) + { + int idx2 = idx; + do + tok = cpp_peek_token (pfile, idx2++); + while (tok->type == CPP_PADDING); + if (tok->type != CPP_OPEN_PAREN) + itype = NT_VOID; + } + if (itype == NT_MACRO) + { + do + (void) cpp_get_token (pfile); + while (--idx > 0); + do + tok = cpp_peek_token (pfile, idx++); + while (tok->type == CPP_PADDING); + ident = altivec_categorize_keyword (tok); + if (ident == C_CPP_HASHNODE (__pixel_keyword)) + { + expand_this = C_CPP_HASHNODE (__vector_keyword); + expand_bool_pixel = __pixel_keyword; + rid_code = RID_MAX; + } + else if (ident == C_CPP_HASHNODE (__bool_keyword)) + { + expand_this = C_CPP_HASHNODE (__vector_keyword); + expand_bool_pixel = __bool_keyword; + rid_code = RID_MAX; + } + else if (ident) + rid_code = (enum rid)(ident->rid_code); + } + + if (rid_code == RID_UNSIGNED || rid_code == RID_LONG + || rid_code == RID_SHORT || rid_code == RID_SIGNED + || rid_code == RID_INT || rid_code == RID_CHAR + || rid_code == RID_FLOAT + || (rid_code == RID_DOUBLE && TARGET_VSX) + || (rid_code == rid_int128 () && TARGET_VADDUQM)) + { + expand_this = C_CPP_HASHNODE (__vector_keyword); + /* If the next keyword is bool or pixel, it + will need to be expanded as well. */ + do + tok = cpp_peek_token (pfile, idx++); + while (tok->type == CPP_PADDING); + ident = altivec_categorize_keyword (tok); + + if (ident == C_CPP_HASHNODE (__pixel_keyword)) + expand_bool_pixel = __pixel_keyword; + else if (ident == C_CPP_HASHNODE (__bool_keyword)) + expand_bool_pixel = __bool_keyword; + else + { + /* Try two tokens down, too. */ + do + tok = cpp_peek_token (pfile, idx++); + while (tok->type == CPP_PADDING); + ident = altivec_categorize_keyword (tok); + if (ident == C_CPP_HASHNODE (__pixel_keyword)) + expand_bool_pixel = __pixel_keyword; + else if (ident == C_CPP_HASHNODE (__bool_keyword)) + expand_bool_pixel = __bool_keyword; + } + } + + /* Support vector __int128_t, but we don't need to worry about bool + or pixel on this type. */ + else if (TARGET_VADDUQM + && (ident == C_CPP_HASHNODE (__int128_type) + || ident == C_CPP_HASHNODE (__uint128_type))) + expand_this = C_CPP_HASHNODE (__vector_keyword); + } + } + else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__pixel_keyword)) + { + expand_this = C_CPP_HASHNODE (__pixel_keyword); + expand_bool_pixel = 0; + } + else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__bool_keyword)) + { + expand_this = C_CPP_HASHNODE (__bool_keyword); + expand_bool_pixel = 0; + } + + return expand_this; +} + + +/* Define or undefine a single macro. */ + +static void +rs6000_define_or_undefine_macro (bool define_p, const char *name) +{ + if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) + fprintf (stderr, "#%s %s\n", (define_p) ? "define" : "undef", name); + + if (define_p) + cpp_define (parse_in, name); + else + cpp_undef (parse_in, name); +} + +/* Define or undefine macros based on the current target. If the user does + #pragma GCC target, we need to adjust the macros dynamically. Note, some of + the options needed for builtins have been moved to separate variables, so + have both the target flags and the builtin flags as arguments. */ + +void +rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, + HOST_WIDE_INT bu_mask) +{ + if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) + fprintf (stderr, + "rs6000_target_modify_macros (%s, " HOST_WIDE_INT_PRINT_HEX + ", " HOST_WIDE_INT_PRINT_HEX ")\n", + (define_p) ? "define" : "undef", + flags, bu_mask); + + /* Each of the flags mentioned below controls whether certain + preprocessor macros will be automatically defined when + preprocessing source files for compilation by this compiler. + While most of these flags can be enabled or disabled + explicitly by specifying certain command-line options when + invoking the compiler, there are also many ways in which these + flags are enabled or disabled implicitly, based on compiler + defaults, configuration choices, and on the presence of certain + related command-line options. Many, but not all, of these + implicit behaviors can be found in file "rs6000.c", the + rs6000_option_override_internal() function. + + In general, each of the flags may be automatically enabled in + any of the following conditions: + + 1. If no -mcpu target is specified on the command line and no + --with-cpu target is specified to the configure command line + and the TARGET_DEFAULT macro for this default cpu host + includes the flag, and the flag has not been explicitly disabled + by command-line options. + + 2. If the target specified with -mcpu=target on the command line, or + in the absence of a -mcpu=target command-line option, if the + target specified using --with-cpu=target on the configure + command line, is disqualified because the associated binary + tools (e.g. the assembler) lack support for the requested cpu, + and the TARGET_DEFAULT macro for this default cpu host + includes the flag, and the flag has not been explicitly disabled + by command-line options. + + 3. If either of the above two conditions apply except that the + TARGET_DEFAULT macro is defined to equal zero, and + TARGET_POWERPC64 and + a) BYTES_BIG_ENDIAN and the flag to be enabled is either + MASK_PPC_GFXOPT or MASK_POWERPC64 (flags for "powerpc64" + target), or + b) !BYTES_BIG_ENDIAN and the flag to be enabled is either + MASK_POWERPC64 or it is one of the flags included in + ISA_2_7_MASKS_SERVER (flags for "powerpc64le" target). + + 4. If a cpu has been requested with a -mcpu=target command-line option + and this cpu has not been disqualified due to shortcomings of the + binary tools, and the set of flags associated with the requested cpu + include the flag to be enabled. See rs6000-cpus.def for macro + definitions that represent various ABI standards + (e.g. ISA_2_1_MASKS, ISA_3_0_MASKS_SERVER) and for a list of + the specific flags that are associated with each of the cpu + choices that can be specified as the target of a -mcpu=target + compile option, or as the the target of a --with-cpu=target + configure option. Target flags that are specified in either + of these two ways are considered "implicit" since the flags + are not mentioned specifically by name. + + Additional documentation describing behavior specific to + particular flags is provided below, immediately preceding the + use of each relevant flag. + + 5. If there is no -mcpu=target command-line option, and the cpu + requested by a --with-cpu=target command-line option has not + been disqualified due to shortcomings of the binary tools, and + the set of flags associated with the specified target include + the flag to be enabled. See the notes immediately above for a + summary of the flags associated with particular cpu + definitions. */ + + /* rs6000_isa_flags based options. */ + rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC"); + if ((flags & OPTION_MASK_PPC_GPOPT) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ"); + if ((flags & OPTION_MASK_PPC_GFXOPT) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR"); + if ((flags & OPTION_MASK_POWERPC64) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64"); + if ((flags & OPTION_MASK_MFCRF) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4"); + if ((flags & OPTION_MASK_POPCNTB) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5"); + if ((flags & OPTION_MASK_FPRND) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X"); + if ((flags & OPTION_MASK_CMPB) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6"); + if ((flags & OPTION_MASK_MFPGPR) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X"); + if ((flags & OPTION_MASK_POPCNTD) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); + /* Note that the OPTION_MASK_DIRECT_MOVE flag is automatically + turned on in the following condition: + 1. TARGET_P9_DFORM_SCALAR or TARGET_P9_DFORM_VECTOR are enabled + and OPTION_MASK_DIRECT_MOVE is not explicitly disabled. + Hereafter, the OPTION_MASK_DIRECT_MOVE flag is considered to + have been turned on explicitly. + Note that the OPTION_MASK_DIRECT_MOVE flag is automatically + turned off in any of the following conditions: + 1. TARGET_HARD_FLOAT, TARGET_ALTIVEC, or TARGET_VSX is explicitly + disabled and OPTION_MASK_DIRECT_MOVE was not explicitly + enabled. + 2. TARGET_VSX is off. */ + if ((flags & OPTION_MASK_DIRECT_MOVE) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8"); + if ((flags & OPTION_MASK_MODULO) != 0) + rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR9"); + if ((flags & OPTION_MASK_SOFT_FLOAT) != 0) + rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT"); + if ((flags & OPTION_MASK_RECIP_PRECISION) != 0) + rs6000_define_or_undefine_macro (define_p, "__RECIP_PRECISION__"); + /* Note that the OPTION_MASK_ALTIVEC flag is automatically turned on + in any of the following conditions: + 1. The command line specifies either -maltivec=le or -maltivec=be. + 2. The operating system is Darwin and it is configured for 64 + bit. (See darwin_rs6000_override_options.) + 3. The operating system is Darwin and the operating system + version is 10.5 or higher and the user has not explicitly + disabled ALTIVEC by specifying -mcpu=G3 or -mno-altivec and + the compiler is not producing code for integration within the + kernel. (See darwin_rs6000_override_options.) + Note that the OPTION_MASK_ALTIVEC flag is automatically turned + off in any of the following conditions: + 1. The operating system does not support saving of AltiVec + registers (OS_MISSING_ALTIVEC). + 2. If an inner context (as introduced by + __attribute__((__target__())) or #pragma GCC target() + requests a target that normally enables the + OPTION_MASK_ALTIVEC flag but the outer-most "main target" + does not support the rs6000_altivec_abi, this flag is + turned off for the inner context unless OPTION_MASK_ALTIVEC + was explicitly enabled for the inner context. */ + if ((flags & OPTION_MASK_ALTIVEC) != 0) + { + const char *vec_str = (define_p) ? "__VEC__=10206" : "__VEC__"; + rs6000_define_or_undefine_macro (define_p, "__ALTIVEC__"); + rs6000_define_or_undefine_macro (define_p, vec_str); + + /* Define this when supporting context-sensitive keywords. */ + if (!flag_iso) + rs6000_define_or_undefine_macro (define_p, "__APPLE_ALTIVEC__"); + } + /* Note that the OPTION_MASK_VSX flag is automatically turned on in + the following conditions: + 1. TARGET_P8_VECTOR is explicitly turned on and the OPTION_MASK_VSX + was not explicitly turned off. Hereafter, the OPTION_MASK_VSX + flag is considered to have been explicitly turned on. + Note that the OPTION_MASK_VSX flag is automatically turned off in + the following conditions: + 1. The operating system does not support saving of AltiVec + registers (OS_MISSING_ALTIVEC). + 2. If any of the options TARGET_HARD_FLOAT, TARGET_FPRS, + TARGET_SINGLE_FLOAT, or TARGET_DOUBLE_FLOAT are turned off. + Hereafter, the OPTION_MASK_VSX flag is considered to have been + turned off explicitly. + 3. If TARGET_PAIRED_FLOAT was enabled. Hereafter, the + OPTION_MASK_VSX flag is considered to have been turned off + explicitly. + 4. If TARGET_AVOID_XFORM is turned on explicitly at the outermost + compilation context, or if it is turned on by any means in an + inner compilation context. Hereafter, the OPTION_MASK_VSX + flag is considered to have been turned off explicitly. + 5. If TARGET_ALTIVEC was explicitly disabled. Hereafter, the + OPTION_MASK_VSX flag is considered to have been turned off + explicitly. + 6. If an inner context (as introduced by + __attribute__((__target__())) or #pragma GCC target() + requests a target that normally enables the + OPTION_MASK_VSX flag but the outer-most "main target" + does not support the rs6000_altivec_abi, this flag is + turned off for the inner context unless OPTION_MASK_VSX + was explicitly enabled for the inner context. */ + if ((flags & OPTION_MASK_VSX) != 0) + rs6000_define_or_undefine_macro (define_p, "__VSX__"); + if ((flags & OPTION_MASK_HTM) != 0) + { + rs6000_define_or_undefine_macro (define_p, "__HTM__"); + /* Tell the user that our HTM insn patterns act as memory barriers. */ + rs6000_define_or_undefine_macro (define_p, "__TM_FENCE__"); + } + /* Note that the OPTION_MASK_P8_VECTOR flag is automatically turned + on in the following conditions: + 1. TARGET_P9_VECTOR is explicitly turned on and + OPTION_MASK_P8_VECTOR is not explicitly turned off. + Hereafter, the OPTION_MASK_P8_VECTOR flag is considered to + have been turned off explicitly. + Note that the OPTION_MASK_P8_VECTOR flag is automatically turned + off in the following conditions: + 1. If any of TARGET_HARD_FLOAT, TARGET_ALTIVEC, or TARGET_VSX + were turned off explicitly and OPTION_MASK_P8_VECTOR flag was + not turned on explicitly. + 2. If TARGET_ALTIVEC is turned off. Hereafter, the + OPTION_MASK_P8_VECTOR flag is considered to have been turned off + explicitly. + 3. If TARGET_VSX is turned off and OPTION_MASK_P8_VECTOR was not + explicitly enabled. If TARGET_VSX is explicitly enabled, the + OPTION_MASK_P8_VECTOR flag is hereafter also considered to + have been turned off explicitly. */ + if ((flags & OPTION_MASK_P8_VECTOR) != 0) + rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__"); + /* Note that the OPTION_MASK_P9_VECTOR flag is automatically turned + off in the following conditions: + 1. If TARGET_P8_VECTOR is turned off and OPTION_MASK_P9_VECTOR is + not turned on explicitly. Hereafter, if OPTION_MASK_P8_VECTOR + was turned on explicitly, the OPTION_MASK_P9_VECTOR flag is + also considered to have been turned off explicitly. + Note that the OPTION_MASK_P9_VECTOR is automatically turned on + in the following conditions: + 1. If TARGET_P9_DFORM_SCALAR or TARGET_P9_DFORM_VECTOR and + OPTION_MASK_P9_VECTOR was not turned off explicitly. + Hereafter, THE OPTION_MASK_P9_VECTOR flag is considered to + have been turned on explicitly. */ + if ((flags & OPTION_MASK_P9_VECTOR) != 0) + rs6000_define_or_undefine_macro (define_p, "__POWER9_VECTOR__"); + /* Note that the OPTION_MASK_QUAD_MEMORY flag is automatically + turned off in the following conditions: + 1. If TARGET_POWERPC64 is turned off. + 2. If WORDS_BIG_ENDIAN is false (non-atomic quad memory + load/store are disabled on little endian). */ + if ((flags & OPTION_MASK_QUAD_MEMORY) != 0) + rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__"); + /* Note that the OPTION_MASK_QUAD_MEMORY_ATOMIC flag is automatically + turned off in the following conditions: + 1. If TARGET_POWERPC64 is turned off. + Note that the OPTION_MASK_QUAD_MEMORY_ATOMIC flag is + automatically turned on in the following conditions: + 1. If TARGET_QUAD_MEMORY and this flag was not explicitly + disabled. */ + if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0) + rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__"); + /* Note that the OPTION_MASK_CRYPTO flag is automatically turned off + in the following conditions: + 1. If any of TARGET_HARD_FLOAT or TARGET_ALTIVEC or TARGET_VSX + are turned off explicitly and OPTION_MASK_CRYPTO is not turned + on explicitly. + 2. If TARGET_ALTIVEC is turned off. */ + if ((flags & OPTION_MASK_CRYPTO) != 0) + rs6000_define_or_undefine_macro (define_p, "__CRYPTO__"); + /* Note that the OPTION_MASK_UPPER_REGS_DF flag is automatically + turned on in the following conditions: + 1. If TARGET_UPPER_REGS is explicitly turned on and + TARGET_VSX is turned on and OPTION_MASK_UPPER_REGS_DF is not + explicitly turned off. Hereafter, the + OPTION_MASK_UPPER_REGS_DF flag is considered to have been + explicitly set. + Note that the OPTION_MASK_UPPER_REGS_DF flag is automatically + turned off in the following conditions: + 1. If TARGET_UPPER_REGS is explicitly turned off and TARGET_VSX + is turned on and OPTION_MASK_UPPER_REGS_DF is not explicitly + turned on. Hereafter, the OPTION_MASK_UPPER_REGS_DF flag is + considered to have been explicitly cleared. + 2. If TARGET_UPPER_REGS_DF is turned on but TARGET_VSX is turned + off. */ + if ((flags & OPTION_MASK_UPPER_REGS_DF) != 0) + rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_DF__"); + /* Note that the OPTION_MASK_UPPER_REGS_SF flag is automatically + turned on in the following conditions: + 1. If TARGET_UPPER_REGS is explicitly turned on and + TARGET_P8_VECTOR is on and OPTION_MASK_UPPER_REGS_SF is not + turned off explicitly. Hereafter, the + OPTION_MASK_UPPER_REGS_SF flag is considered to have been + explicitly set. + Note that the OPTION_MASK_UPPER_REGS_SF flag is automatically + turned off in the following conditions: + 1. If TARGET_UPPER_REGS is explicitly turned off and + TARGET_P8_VECTOR is on and OPTION_MASK_UPPER_REGS_SF is not + turned off explicitly. Hereafter, the + OPTION_MASK_UPPER_REGS_SF flag is considered to have been + explicitly cleared. + 2. If TARGET_P8_VECTOR is off. */ + if ((flags & OPTION_MASK_UPPER_REGS_SF) != 0) + rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_SF__"); + + /* options from the builtin masks. */ + /* Note that RS6000_BTM_SPE is enabled only if TARGET_SPE + (e.g. -mspe). */ + if ((bu_mask & RS6000_BTM_SPE) != 0) + rs6000_define_or_undefine_macro (define_p, "__SPE__"); + /* Note that RS6000_BTM_PAIRED is enabled only if + TARGET_PAIRED_FLOAT is enabled (e.g. -mpaired). */ + if ((bu_mask & RS6000_BTM_PAIRED) != 0) + rs6000_define_or_undefine_macro (define_p, "__PAIRED__"); + /* Note that RS6000_BTM_CELL is enabled only if (rs6000_cpu == + PROCESSOR_CELL) (e.g. -mcpu=cell). */ + if ((bu_mask & RS6000_BTM_CELL) != 0) + rs6000_define_or_undefine_macro (define_p, "__PPU__"); +} + +void +rs6000_cpu_cpp_builtins (cpp_reader *pfile) +{ + /* Define all of the common macros. */ + rs6000_target_modify_macros (true, rs6000_isa_flags, + rs6000_builtin_mask_calculate ()); + + if (TARGET_FRE) + builtin_define ("__RECIP__"); + if (TARGET_FRES) + builtin_define ("__RECIPF__"); + if (TARGET_FRSQRTE) + builtin_define ("__RSQRTE__"); + if (TARGET_FRSQRTES) + builtin_define ("__RSQRTEF__"); + if (TARGET_FLOAT128_KEYWORD) + builtin_define ("__FLOAT128__"); + if (TARGET_FLOAT128_TYPE) + builtin_define ("__FLOAT128_TYPE__"); + if (TARGET_FLOAT128_HW) + builtin_define ("__FLOAT128_HARDWARE__"); + if (TARGET_LONG_DOUBLE_128 && FLOAT128_IBM_P (TFmode)) + builtin_define ("__ibm128=long double"); + + /* We needed to create a keyword if -mfloat128-type was used but not -mfloat, + so we used __ieee128. If -mfloat128 was used, create a #define back to + the real keyword in case somebody used it. */ + if (TARGET_FLOAT128_KEYWORD) + builtin_define ("__ieee128=__float128"); + + if (TARGET_EXTRA_BUILTINS && cpp_get_options (pfile)->lang != CLK_ASM) + { + /* Define the AltiVec syntactic elements. */ + builtin_define ("__vector=__attribute__((altivec(vector__)))"); + builtin_define ("__pixel=__attribute__((altivec(pixel__))) unsigned short"); + builtin_define ("__bool=__attribute__((altivec(bool__))) unsigned"); + + if (!flag_iso) + { + builtin_define ("vector=vector"); + builtin_define ("pixel=pixel"); + builtin_define ("bool=bool"); + builtin_define ("_Bool=_Bool"); + init_vector_keywords (); + + /* Enable context-sensitive macros. */ + cpp_get_callbacks (pfile)->macro_to_expand = rs6000_macro_to_expand; + } + } + if ((!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE))) + ||(TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_DOUBLE_FLOAT)) + builtin_define ("_SOFT_DOUBLE"); + /* Used by lwarx/stwcx. errata work-around. */ + if (rs6000_cpu == PROCESSOR_PPC405) + builtin_define ("__PPC405__"); + /* Used by libstdc++. */ + if (TARGET_NO_LWSYNC) + builtin_define ("__NO_LWSYNC__"); + + if (TARGET_EXTRA_BUILTINS) + { + /* For the VSX builtin functions identical to Altivec functions, just map + the altivec builtin into the vsx version (the altivec functions + generate VSX code if -mvsx). */ + builtin_define ("__builtin_vsx_xxland=__builtin_vec_and"); + builtin_define ("__builtin_vsx_xxlandc=__builtin_vec_andc"); + builtin_define ("__builtin_vsx_xxlnor=__builtin_vec_nor"); + builtin_define ("__builtin_vsx_xxlor=__builtin_vec_or"); + builtin_define ("__builtin_vsx_xxlxor=__builtin_vec_xor"); + builtin_define ("__builtin_vsx_xxsel=__builtin_vec_sel"); + builtin_define ("__builtin_vsx_vperm=__builtin_vec_perm"); + + /* Also map the a and m versions of the multiply/add instructions to the + builtin for people blindly going off the instruction manual. */ + builtin_define ("__builtin_vsx_xvmaddadp=__builtin_vsx_xvmadddp"); + builtin_define ("__builtin_vsx_xvmaddmdp=__builtin_vsx_xvmadddp"); + builtin_define ("__builtin_vsx_xvmaddasp=__builtin_vsx_xvmaddsp"); + builtin_define ("__builtin_vsx_xvmaddmsp=__builtin_vsx_xvmaddsp"); + builtin_define ("__builtin_vsx_xvmsubadp=__builtin_vsx_xvmsubdp"); + builtin_define ("__builtin_vsx_xvmsubmdp=__builtin_vsx_xvmsubdp"); + builtin_define ("__builtin_vsx_xvmsubasp=__builtin_vsx_xvmsubsp"); + builtin_define ("__builtin_vsx_xvmsubmsp=__builtin_vsx_xvmsubsp"); + builtin_define ("__builtin_vsx_xvnmaddadp=__builtin_vsx_xvnmadddp"); + builtin_define ("__builtin_vsx_xvnmaddmdp=__builtin_vsx_xvnmadddp"); + builtin_define ("__builtin_vsx_xvnmaddasp=__builtin_vsx_xvnmaddsp"); + builtin_define ("__builtin_vsx_xvnmaddmsp=__builtin_vsx_xvnmaddsp"); + builtin_define ("__builtin_vsx_xvnmsubadp=__builtin_vsx_xvnmsubdp"); + builtin_define ("__builtin_vsx_xvnmsubmdp=__builtin_vsx_xvnmsubdp"); + builtin_define ("__builtin_vsx_xvnmsubasp=__builtin_vsx_xvnmsubsp"); + builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp"); + } + + /* Tell users they can use __builtin_bswap{16,64}. */ + builtin_define ("__HAVE_BSWAP__"); + + /* May be overridden by target configuration. */ + RS6000_CPU_CPP_ENDIAN_BUILTINS(); + + if (TARGET_LONG_DOUBLE_128) + { + builtin_define ("__LONG_DOUBLE_128__"); + builtin_define ("__LONGDOUBLE128"); + + if (TARGET_IEEEQUAD) + builtin_define ("__LONG_DOUBLE_IEEE128__"); + else + builtin_define ("__LONG_DOUBLE_IBM128__"); + } + + switch (TARGET_CMODEL) + { + /* Deliberately omit __CMODEL_SMALL__ since that was the default + before --mcmodel support was added. */ + case CMODEL_MEDIUM: + builtin_define ("__CMODEL_MEDIUM__"); + break; + case CMODEL_LARGE: + builtin_define ("__CMODEL_LARGE__"); + break; + default: + break; + } + + switch (rs6000_current_abi) + { + case ABI_V4: + builtin_define ("_CALL_SYSV"); + break; + case ABI_AIX: + builtin_define ("_CALL_AIXDESC"); + builtin_define ("_CALL_AIX"); + builtin_define ("_CALL_ELF=1"); + break; + case ABI_ELFv2: + builtin_define ("_CALL_ELF=2"); + break; + case ABI_DARWIN: + builtin_define ("_CALL_DARWIN"); + break; + default: + break; + } + + /* Vector element order. */ + if (BYTES_BIG_ENDIAN || (rs6000_altivec_element_order == 2)) + builtin_define ("__VEC_ELEMENT_REG_ORDER__=__ORDER_BIG_ENDIAN__"); + else + builtin_define ("__VEC_ELEMENT_REG_ORDER__=__ORDER_LITTLE_ENDIAN__"); + + /* Let the compiled code know if 'f' class registers will not be available. */ + if (TARGET_SOFT_FLOAT || !TARGET_FPRS) + builtin_define ("__NO_FPRS__"); + + /* Whether aggregates passed by value are aligned to a 16 byte boundary + if their alignment is 16 bytes or larger. */ + if ((TARGET_MACHO && rs6000_darwin64_abi) + || DEFAULT_ABI == ABI_ELFv2 + || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) + builtin_define ("__STRUCT_PARM_ALIGN__=16"); + + /* Generate defines for Xilinx FPU. */ + if (rs6000_xilinx_fpu) + { + builtin_define ("_XFPU"); + if (rs6000_single_float && ! rs6000_double_float) + { + if (rs6000_simple_fpu) + builtin_define ("_XFPU_SP_LITE"); + else + builtin_define ("_XFPU_SP_FULL"); + } + if (rs6000_double_float) + { + if (rs6000_simple_fpu) + builtin_define ("_XFPU_DP_LITE"); + else + builtin_define ("_XFPU_DP_FULL"); + } + } +} + + +struct altivec_builtin_types +{ + enum rs6000_builtins code; + enum rs6000_builtins overloaded_code; + signed char ret_type; + signed char op1; + signed char op2; + signed char op3; +}; + +const struct altivec_builtin_types altivec_overloaded_builtins[] = { + /* Unary AltiVec/VSX builtins. */ + { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_CEIL, ALTIVEC_BUILTIN_VRFIP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_CEIL, VSX_BUILTIN_XVRDPIP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_EXPTE, ALTIVEC_BUILTIN_VEXPTEFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_FLOOR, VSX_BUILTIN_XVRDPIM, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_FLOOR, ALTIVEC_BUILTIN_VRFIM, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_LOGE, ALTIVEC_BUILTIN_VLOGEFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RE, ALTIVEC_BUILTIN_VREFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RE, VSX_BUILTIN_XVREDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ROUND, VSX_BUILTIN_XVRDPI, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_RSQRT, ALTIVEC_BUILTIN_VRSQRTFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RSQRT, VSX_BUILTIN_RSQRT_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RSQRTE, ALTIVEC_BUILTIN_VRSQRTEFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RSQRTE, VSX_BUILTIN_XVRSQRTEDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_TRUNC, ALTIVEC_BUILTIN_VRFIZ, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_TRUNC, VSX_BUILTIN_XVRDPIZ, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSB, ALTIVEC_BUILTIN_VUPKHSB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKHSB, ALTIVEC_BUILTIN_VUPKHSB, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSB, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLPX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKLSH, ALTIVEC_BUILTIN_VUPKLSH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKLSH, ALTIVEC_BUILTIN_VUPKLSH, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 }, + + /* Binary AltiVec/VSX builtins. */ + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDC, ALTIVEC_BUILTIN_VADDCUW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDC, ALTIVEC_BUILTIN_VADDCUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VAVGSW, ALTIVEC_BUILTIN_VAVGSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VAVGUW, ALTIVEC_BUILTIN_VAVGUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VAVGSH, ALTIVEC_BUILTIN_VAVGSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VAVGUH, ALTIVEC_BUILTIN_VAVGUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VAVGSB, ALTIVEC_BUILTIN_VAVGSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VAVGUB, ALTIVEC_BUILTIN_VAVGUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPB, ALTIVEC_BUILTIN_VCMPBFP, + RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPEQFP, ALTIVEC_BUILTIN_VCMPEQFP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + + { ALTIVEC_BUILTIN_VEC_VCMPEQUW, ALTIVEC_BUILTIN_VCMPEQUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPEQUW, ALTIVEC_BUILTIN_VCMPEQUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + + { ALTIVEC_BUILTIN_VEC_VCMPEQUH, ALTIVEC_BUILTIN_VCMPEQUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPEQUH, ALTIVEC_BUILTIN_VCMPEQUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + + { ALTIVEC_BUILTIN_VEC_VCMPEQUB, ALTIVEC_BUILTIN_VCMPEQUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPEQUB, ALTIVEC_BUILTIN_VCMPEQUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + + { ALTIVEC_BUILTIN_VEC_CMPGE, ALTIVEC_BUILTIN_VCMPGEFP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_XVCMPGEDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTFP, ALTIVEC_BUILTIN_VCMPGTFP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTUW, ALTIVEC_BUILTIN_VCMPGTUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTUW, ALTIVEC_BUILTIN_VCMPGTUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTSH, ALTIVEC_BUILTIN_VCMPGTSH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTSH, ALTIVEC_BUILTIN_VCMPGTSH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTUH, ALTIVEC_BUILTIN_VCMPGTUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTUH, ALTIVEC_BUILTIN_VCMPGTUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTSB, ALTIVEC_BUILTIN_VCMPGTSB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTSB, ALTIVEC_BUILTIN_VCMPGTSB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTUB, ALTIVEC_BUILTIN_VCMPGTUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCMPGTUB, ALTIVEC_BUILTIN_VCMPGTUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLE, ALTIVEC_BUILTIN_VCMPGEFP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_XVCMPGEDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_COPYSIGN, VSX_BUILTIN_CPSGNDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_COPYSIGN, ALTIVEC_BUILTIN_COPYSIGN_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFUX, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX, + RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVSXDDP_SCALE, + RS6000_BTI_V2DF, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0}, + { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVUXDDP_SCALE, + RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0}, + { ALTIVEC_BUILTIN_VEC_VCFSX, ALTIVEC_BUILTIN_VCFSX, + RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VCFUX, ALTIVEC_BUILTIN_VCFUX, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VCTSXS, + RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTS, VSX_BUILTIN_XVCVDPSXDS_SCALE, + RS6000_BTI_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTU, VSX_BUILTIN_XVCVDPUXDS_SCALE, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_DIV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVSXDDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DI, 0, 0 }, + { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVUXDDP, + RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEHX, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEHX, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEHX, ALTIVEC_BUILTIN_LVEHX, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEHX, ALTIVEC_BUILTIN_LVEHX, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXFP, ALTIVEC_BUILTIN_VMAXFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINFP, ALTIVEC_BUILTIN_VMINFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULEUH, ALTIVEC_BUILTIN_VMULEUH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULESH, ALTIVEC_BUILTIN_VMULESH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULOUH, ALTIVEC_BUILTIN_VMULOUH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULOSB, ALTIVEC_BUILTIN_VMULOSB, + RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VMULOUB, ALTIVEC_BUILTIN_VMULOUB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_NABS, ALTIVEC_BUILTIN_NABS_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NABS, ALTIVEC_BUILTIN_NABS_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NABS, ALTIVEC_BUILTIN_NABS_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NABS, ALTIVEC_BUILTIN_NABS_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NABS, ALTIVEC_BUILTIN_NABS_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NABS, VSX_BUILTIN_XVNABSDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRDPI, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRSPI, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + + { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM, + RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM, + RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, + RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM, + RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKPX, ALTIVEC_BUILTIN_VPKPX, + RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKUHUS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKSHSS, + RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKUWUS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKSWSS, + RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKSWSS, ALTIVEC_BUILTIN_VPKSWSS, + RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS, + RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKUHUS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSHUS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKUWUS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRDPIC, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRSPIC, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VRLH, ALTIVEC_BUILTIN_VRLH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VRLH, ALTIVEC_BUILTIN_VRLH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLWMI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSLH, ALTIVEC_BUILTIN_VSLH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSLH, ALTIVEC_BUILTIN_VSLH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSLB, ALTIVEC_BUILTIN_VSLB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSLB, ALTIVEC_BUILTIN_VSLB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRH, ALTIVEC_BUILTIN_VSRH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRH, ALTIVEC_BUILTIN_VSRH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRB, ALTIVEC_BUILTIN_VSRB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRB, ALTIVEC_BUILTIN_VSRB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRAH, ALTIVEC_BUILTIN_VSRAH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRAH, ALTIVEC_BUILTIN_VSRAH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRAB, ALTIVEC_BUILTIN_VSRAB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSRAB, ALTIVEC_BUILTIN_VSRAB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBC, ALTIVEC_BUILTIN_VSUBCUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4UBS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4SBS, + RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4SHS, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUM4SHS, ALTIVEC_BUILTIN_VSUM4SHS, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUM4SBS, ALTIVEC_BUILTIN_VSUM4SBS, + RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_VSUM4UBS, ALTIVEC_BUILTIN_VSUM4UBS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUM2S, ALTIVEC_BUILTIN_VSUM2SWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUMS, ALTIVEC_BUILTIN_VSUMSWS, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, + + /* Ternary AltiVec/VSX builtins. */ + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST, + RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST, + RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT, + RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT, + RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMADDFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_MRADDS, ALTIVEC_BUILTIN_VMHRADDSHS, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUBM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMMBM, + RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUHM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMSHM, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VMSUMSHM, ALTIVEC_BUILTIN_VMSUMSHM, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VMSUMUHM, ALTIVEC_BUILTIN_VMSUMUHM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_VMSUMMBM, ALTIVEC_BUILTIN_VMSUMMBM, + RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VMSUMUBM, ALTIVEC_BUILTIN_VMSUMUBM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_MSUMS, ALTIVEC_BUILTIN_VMSUMUHS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_MSUMS, ALTIVEC_BUILTIN_VMSUMSHS, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VMSUMSHS, ALTIVEC_BUILTIN_VMSUMSHS, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VMSUMUHS, ALTIVEC_BUILTIN_VMSUMUHS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDSP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_NMSUB, ALTIVEC_BUILTIN_VNMSUBFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_NMSUB, VSX_BUILTIN_XVNMSUBDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI, + RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, + RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, + RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, + RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, + RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_SLDW, VSX_BUILTIN_XXSLDWI_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_NOT_OPAQUE }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_void }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_void }, + { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_void }, + { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_void }, + { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_void }, + { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void }, + { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_long_long }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_NOT_OPAQUE }, + + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V4SI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V4SI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V8HI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V8HI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V16QI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_pixel_V8HI }, + + /* Predicates. */ + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + + + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + + + /* cmpge is the same as cmpgt for all cases except floating point. + There is further code to deal with this special case in + altivec_build_resolved_builtin. */ + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + + /* Power8 vector overloaded functions. */ + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + + { P9V_BUILTIN_VEC_VBPERM, P9V_BUILTIN_VBPERMD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VBPERM, P8V_BUILTIN_VBPERMQ, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VBPERM, P8V_BUILTIN_VBPERMQ2, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ, + RS6000_BTI_V2DI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_LT, MISC_BUILTIN_TSTSFI_LT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_LT, MISC_BUILTIN_TSTSFI_LT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_LT_TD, MISC_BUILTIN_TSTSFI_LT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_LT_DD, MISC_BUILTIN_TSTSFI_LT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_EQ, MISC_BUILTIN_TSTSFI_EQ_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_EQ, MISC_BUILTIN_TSTSFI_EQ_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_EQ_TD, MISC_BUILTIN_TSTSFI_EQ_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_EQ_DD, MISC_BUILTIN_TSTSFI_EQ_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_GT, MISC_BUILTIN_TSTSFI_GT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_GT, MISC_BUILTIN_TSTSFI_GT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_GT_TD, MISC_BUILTIN_TSTSFI_GT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_GT_DD, MISC_BUILTIN_TSTSFI_GT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_OV, MISC_BUILTIN_TSTSFI_OV_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_OV, MISC_BUILTIN_TSTSFI_OV_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_OV_TD, MISC_BUILTIN_TSTSFI_OV_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_OV_DD, MISC_BUILTIN_TSTSFI_OV_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZB, P9V_BUILTIN_VCTZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZB, P9V_BUILTIN_VCTZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZH, P9V_BUILTIN_VCTZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZH, P9V_BUILTIN_VCTZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZW, P9V_BUILTIN_VCTZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZW, P9V_BUILTIN_VCTZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZD, P9V_BUILTIN_VCTZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZD, P9V_BUILTIN_VCTZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P9V_BUILTIN_VEC_VADUB, P9V_BUILTIN_VADUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P9V_BUILTIN_VEC_VADUH, P9V_BUILTIN_VADUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + + { P9V_BUILTIN_VEC_VADUW, P9V_BUILTIN_VADUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P9V_BUILTIN_VEC_VES, P9V_BUILTIN_VESSP, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, 0, 0 }, + { P9V_BUILTIN_VEC_VES, P9V_BUILTIN_VESDP, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, 0, 0 }, + + { P9V_BUILTIN_VEC_VESSP, P9V_BUILTIN_VESSP, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, 0, 0 }, + { P9V_BUILTIN_VEC_VESDP, P9V_BUILTIN_VESDP, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, 0, 0 }, + + { P9V_BUILTIN_VEC_VEE, P9V_BUILTIN_VEESP, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, 0, 0 }, + { P9V_BUILTIN_VEC_VEE, P9V_BUILTIN_VEEDP, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, 0, 0 }, + + { P9V_BUILTIN_VEC_VEESP, P9V_BUILTIN_VEESP, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, 0, 0 }, + { P9V_BUILTIN_VEC_VEEDP, P9V_BUILTIN_VEEDP, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, 0, 0 }, + + { P9V_BUILTIN_VEC_VTDC, P9V_BUILTIN_VTDCSP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { P9V_BUILTIN_VEC_VTDC, P9V_BUILTIN_VTDCDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, + + { P9V_BUILTIN_VEC_VTDCSP, P9V_BUILTIN_VTDCSP, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { P9V_BUILTIN_VEC_VTDCDP, P9V_BUILTIN_VTDCDP, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, + + { P9V_BUILTIN_VEC_VIE, P9V_BUILTIN_VIESP, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VIE, P9V_BUILTIN_VIESP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, 0 }, + + { P9V_BUILTIN_VEC_VIE, P9V_BUILTIN_VIEDP, + RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P9V_BUILTIN_VEC_VIE, P9V_BUILTIN_VIEDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, 0 }, + + { P9V_BUILTIN_VEC_VIESP, P9V_BUILTIN_VIESP, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VIESP, P9V_BUILTIN_VIESP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, 0 }, + + { P9V_BUILTIN_VEC_VIEDP, P9V_BUILTIN_VIEDP, + RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P9V_BUILTIN_VEC_VIEDP, P9V_BUILTIN_VIEDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, 0 }, + + { P9V_BUILTIN_VEC_VSTDC, P9V_BUILTIN_VSTDCSP, + RS6000_BTI_bool_int, RS6000_BTI_float, RS6000_BTI_INTSI, 0 }, + { P9V_BUILTIN_VEC_VSTDC, P9V_BUILTIN_VSTDCDP, + RS6000_BTI_bool_int, RS6000_BTI_double, RS6000_BTI_INTSI, 0 }, + + { P9V_BUILTIN_VEC_VSTDCSP, P9V_BUILTIN_VSTDCSP, + RS6000_BTI_bool_int, RS6000_BTI_float, RS6000_BTI_INTSI, 0 }, + { P9V_BUILTIN_VEC_VSTDCDP, P9V_BUILTIN_VSTDCDP, + RS6000_BTI_bool_int, RS6000_BTI_double, RS6000_BTI_INTSI, 0 }, + + { P9V_BUILTIN_VEC_VSTDCN, P9V_BUILTIN_VSTDCNSP, + RS6000_BTI_bool_int, RS6000_BTI_float, 0, 0 }, + { P9V_BUILTIN_VEC_VSTDCN, P9V_BUILTIN_VSTDCNDP, + RS6000_BTI_bool_int, RS6000_BTI_double, 0, 0 }, + + { P9V_BUILTIN_VEC_VSTDCNSP, P9V_BUILTIN_VSTDCNSP, + RS6000_BTI_bool_int, RS6000_BTI_float, 0, 0 }, + { P9V_BUILTIN_VEC_VSTDCNDP, P9V_BUILTIN_VSTDCNDP, + RS6000_BTI_bool_int, RS6000_BTI_double, 0, 0 }, + + { P9V_BUILTIN_VEC_VSEEDP, P9V_BUILTIN_VSEEDP, + RS6000_BTI_UINTSI, RS6000_BTI_double, 0, 0 }, + + { P9V_BUILTIN_VEC_VSESDP, P9V_BUILTIN_VSESDP, + RS6000_BTI_UINTDI, RS6000_BTI_double, 0, 0 }, + + { P9V_BUILTIN_VEC_VSIEDP, P9V_BUILTIN_VSIEDP, + RS6000_BTI_double, RS6000_BTI_UINTDI, RS6000_BTI_UINTDI, 0 }, + { P9V_BUILTIN_VEC_VSIEDP, P9V_BUILTIN_VSIEDPF, + RS6000_BTI_double, RS6000_BTI_double, RS6000_BTI_UINTDI, 0 }, + + { P9V_BUILTIN_VEC_VSCEDPGT, P9V_BUILTIN_VSCEDPGT, + RS6000_BTI_INTSI, RS6000_BTI_double, RS6000_BTI_double, 0 }, + { P9V_BUILTIN_VEC_VSCEDPLT, P9V_BUILTIN_VSCEDPLT, + RS6000_BTI_INTSI, RS6000_BTI_double, RS6000_BTI_double, 0 }, + { P9V_BUILTIN_VEC_VSCEDPEQ, P9V_BUILTIN_VSCEDPEQ, + RS6000_BTI_INTSI, RS6000_BTI_double, RS6000_BTI_double, 0 }, + { P9V_BUILTIN_VEC_VSCEDPUO, P9V_BUILTIN_VSCEDPUO, + RS6000_BTI_INTSI, RS6000_BTI_double, RS6000_BTI_double, 0 }, + + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_unsigned_V16QI, ~RS6000_BTI_UINTQI, + RS6000_BTI_unsigned_long_long, 0 }, + + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_V4SI, ~RS6000_BTI_INTSI, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_unsigned_V4SI, ~RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_long_long, 0 }, + + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_V1TI, ~RS6000_BTI_INTTI, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_unsigned_V1TI, ~RS6000_BTI_UINTTI, + RS6000_BTI_unsigned_long_long, 0 }, + + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_V2DI, ~RS6000_BTI_long_long, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_unsigned_V2DI, ~RS6000_BTI_unsigned_long_long, + RS6000_BTI_unsigned_long_long, 0 }, + + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_V8HI, ~RS6000_BTI_INTHI, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_unsigned_V8HI, ~RS6000_BTI_UINTHI, + RS6000_BTI_unsigned_long_long, 0 }, + + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_V2DF, ~RS6000_BTI_double, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, + RS6000_BTI_V4SF, ~RS6000_BTI_float, + RS6000_BTI_unsigned_long_long, 0 }, + /* At an appropriate future time, add support for the + RS6000_BTI_Float16 (exact name to be determined) type here. */ + + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, + RS6000_BTI_unsigned_long_long }, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, ~RS6000_BTI_UINTQI, + RS6000_BTI_unsigned_long_long }, + + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_V4SI, ~RS6000_BTI_INTSI, + RS6000_BTI_unsigned_long_long }, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, ~RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_long_long }, + + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_V1TI, ~RS6000_BTI_INTTI, + RS6000_BTI_unsigned_long_long }, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_unsigned_V1TI, ~RS6000_BTI_UINTTI, + RS6000_BTI_unsigned_long_long }, + + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_V2DI, ~RS6000_BTI_long_long, + RS6000_BTI_unsigned_long_long }, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, ~RS6000_BTI_unsigned_long_long, + RS6000_BTI_unsigned_long_long }, + + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_V8HI, ~RS6000_BTI_INTHI, + RS6000_BTI_unsigned_long_long }, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, ~RS6000_BTI_UINTHI, + RS6000_BTI_unsigned_long_long }, + + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_V2DF, ~RS6000_BTI_double, + RS6000_BTI_unsigned_long_long }, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, + RS6000_BTI_void, RS6000_BTI_V4SF, ~RS6000_BTI_float, + RS6000_BTI_unsigned_long_long }, + /* At an appropriate future time, add support for the + RS6000_BTI_Float16 (exact name to be determined) type here. */ + + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEB, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, + RS6000_BTI_V16QI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEH, + RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, + RS6000_BTI_V8HI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, + RS6000_BTI_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, + RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, + RS6000_BTI_INTSI, RS6000_BTI_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, + RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, + RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEH_P, + RS6000_BTI_INTSI, RS6000_BTI_pixel_V8HI, + RS6000_BTI_pixel_V8HI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEW_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEW_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEW_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEW_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, + RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEW_P, + RS6000_BTI_INTSI, RS6000_BTI_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEW_P, + RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEW_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 + }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, + RS6000_BTI_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, + RS6000_BTI_INTSI, RS6000_BTI_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, + RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEFP_P, + RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEDP_P, + RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEB_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEB_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEB_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEB_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, + RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEB_P, + RS6000_BTI_INTSI, RS6000_BTI_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEB_P, + RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEB_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, + RS6000_BTI_bool_V16QI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, + RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, + RS6000_BTI_bool_V8HI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEH_P, + RS6000_BTI_INTSI, RS6000_BTI_pixel_V8HI, + RS6000_BTI_pixel_V8HI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEW_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEW_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEW_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEW_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, + RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEW_P, + RS6000_BTI_INTSI, RS6000_BTI_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEW_P, + RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEW_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, + RS6000_BTI_bool_V4SI, 0 }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 + }, + + /* The following 2 entries have been deprecated. */ + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, + RS6000_BTI_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, + RS6000_BTI_INTSI, RS6000_BTI_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, + RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, + RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEFP_P, + RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEDP_P, + RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P9V_BUILTIN_VEC_VCMPNEZ_P, P9V_BUILTIN_VCMPNEZB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI }, + { P9V_BUILTIN_VEC_VCMPNEZ_P, P9V_BUILTIN_VCMPNEZB_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI }, + + { P9V_BUILTIN_VEC_VCMPNEZ_P, P9V_BUILTIN_VCMPNEZH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI }, + { P9V_BUILTIN_VEC_VCMPNEZ_P, P9V_BUILTIN_VCMPNEZH_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + + { P9V_BUILTIN_VEC_VCMPNEZ_P, P9V_BUILTIN_VCMPNEZW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI }, + { P9V_BUILTIN_VEC_VCMPNEZ_P, P9V_BUILTIN_VCMPNEZW_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + + { P9V_BUILTIN_VEC_CMPNEZ, P9V_BUILTIN_CMPNEZB, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, + RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_CMPNEZ, P9V_BUILTIN_CMPNEZB, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P9V_BUILTIN_VEC_CMPNEZ, P9V_BUILTIN_CMPNEZH, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, + RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_CMPNEZ, P9V_BUILTIN_CMPNEZH, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + + { P9V_BUILTIN_VEC_CMPNEZ, P9V_BUILTIN_CMPNEZW, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, + RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_CMPNEZ, P9V_BUILTIN_CMPNEZW, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P9V_BUILTIN_VEC_VCLZLSBB, P9V_BUILTIN_VCLZLSBB, + RS6000_BTI_INTSI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCLZLSBB, P9V_BUILTIN_VCLZLSBB, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB, + RS6000_BTI_INTSI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, + RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, + RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 }, + + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX, + RS6000_BTI_INTQI, RS6000_BTI_UINTSI, + RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX, + RS6000_BTI_UINTQI, RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUHLX, + RS6000_BTI_INTHI, RS6000_BTI_UINTSI, + RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUHLX, + RS6000_BTI_UINTHI, RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_V8HI, 0 }, + + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUWLX, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, + RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUWLX, + RS6000_BTI_UINTSI, RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUWLX, + RS6000_BTI_float, RS6000_BTI_UINTSI, + RS6000_BTI_V4SF, 0 }, + + { P9V_BUILTIN_VEC_VEXTURX, P9V_BUILTIN_VEXTUBRX, + RS6000_BTI_INTQI, RS6000_BTI_UINTSI, + RS6000_BTI_V16QI, 0 }, + { P9V_BUILTIN_VEC_VEXTURX, P9V_BUILTIN_VEXTUBRX, + RS6000_BTI_UINTQI, RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P9V_BUILTIN_VEC_VEXTURX, P9V_BUILTIN_VEXTUHRX, + RS6000_BTI_INTHI, RS6000_BTI_UINTSI, + RS6000_BTI_V8HI, 0 }, + { P9V_BUILTIN_VEC_VEXTURX, P9V_BUILTIN_VEXTUHRX, + RS6000_BTI_UINTHI, RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_V8HI, 0 }, + + { P9V_BUILTIN_VEC_VEXTURX, P9V_BUILTIN_VEXTUWRX, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, + RS6000_BTI_V4SI, 0 }, + { P9V_BUILTIN_VEC_VEXTURX, P9V_BUILTIN_VEXTUWRX, + RS6000_BTI_UINTSI, RS6000_BTI_UINTSI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P9V_BUILTIN_VEC_VEXTURX, P9V_BUILTIN_VEXTUWRX, + RS6000_BTI_float, RS6000_BTI_UINTSI, + RS6000_BTI_V4SF, 0 }, + + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_V16QI, RS6000_BTI_V4SI, + RS6000_BTI_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_V16QI, RS6000_BTI_INTDI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTDI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI }, + + { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + + { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + + { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + + { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTU, P8V_BUILTIN_VPOPCNTUD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0, 0 }, + + { P9V_BUILTIN_VEC_VPRTYBW, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBW, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P9V_BUILTIN_VEC_VPRTYBD, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBD, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0, 0 }, + + { P9_BUILTIN_CMPRB, P9_BUILTIN_SCALAR_CMPRB, + RS6000_BTI_INTSI, RS6000_BTI_UINTQI, RS6000_BTI_UINTSI, 0 }, + { P9_BUILTIN_CMPRB2, P9_BUILTIN_SCALAR_CMPRB2, + RS6000_BTI_INTSI, RS6000_BTI_UINTQI, RS6000_BTI_UINTSI, 0 }, + { P9_BUILTIN_CMPEQB, P9_BUILTIN_SCALAR_CMPEQB, + RS6000_BTI_INTSI, RS6000_BTI_UINTQI, RS6000_BTI_UINTDI, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS, + RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + + { P6_OV_BUILTIN_CMPB, P6_BUILTIN_CMPB_32, + RS6000_BTI_UINTSI, RS6000_BTI_UINTSI, RS6000_BTI_UINTSI, 0 }, + { P6_OV_BUILTIN_CMPB, P6_BUILTIN_CMPB, + RS6000_BTI_UINTDI, RS6000_BTI_UINTDI, RS6000_BTI_UINTDI, 0 }, + + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, + + { P9V_BUILTIN_VEC_VSLV, P9V_BUILTIN_VSLV, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VSRV, P9V_BUILTIN_VSRV, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + + /* Crypto builtins. */ + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + + { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 } +}; + + +/* Convert a type stored into a struct altivec_builtin_types as ID, + into a tree. The types are in rs6000_builtin_types: negative values + create a pointer type for the type associated to ~ID. Note it is + a logical NOT, rather than a negation, otherwise you cannot represent + a pointer type for ID 0. */ + +static inline tree +rs6000_builtin_type (int id) +{ + tree t; + t = rs6000_builtin_types[id < 0 ? ~id : id]; + return id < 0 ? build_pointer_type (t) : t; +} + +/* Check whether the type of an argument, T, is compatible with a + type ID stored into a struct altivec_builtin_types. Integer + types are considered compatible; otherwise, the language hook + lang_hooks.types_compatible_p makes the decision. */ + +static inline bool +rs6000_builtin_type_compatible (tree t, int id) +{ + tree builtin_type; + builtin_type = rs6000_builtin_type (id); + if (t == error_mark_node) + return false; + if (INTEGRAL_TYPE_P (t) && INTEGRAL_TYPE_P (builtin_type)) + return true; + else + return lang_hooks.types_compatible_p (t, builtin_type); +} + + +/* In addition to calling fold_convert for EXPR of type TYPE, also + call c_fully_fold to remove any C_MAYBE_CONST_EXPRs that could be + hiding there (PR47197). */ + +static tree +fully_fold_convert (tree type, tree expr) +{ + tree result = fold_convert (type, expr); + bool maybe_const = true; + + if (!c_dialect_cxx ()) + result = c_fully_fold (result, false, &maybe_const); + + return result; +} + +/* Build a tree for a function call to an Altivec non-overloaded builtin. + The overloaded builtin that matched the types and args is described + by DESC. The N arguments are given in ARGS, respectively. + + Actually the only thing it does is calling fold_convert on ARGS, with + a small exception for vec_{all,any}_{ge,le} predicates. */ + +static tree +altivec_build_resolved_builtin (tree *args, int n, + const struct altivec_builtin_types *desc) +{ + tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code]; + tree ret_type = rs6000_builtin_type (desc->ret_type); + tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl)); + tree arg_type[3]; + tree call; + + int i; + for (i = 0; i < n; i++) + arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes); + + /* The AltiVec overloading implementation is overall gross, but this + is particularly disgusting. The vec_{all,any}_{ge,le} builtins + are completely different for floating-point vs. integer vector + types, because the former has vcmpgefp, but the latter should use + vcmpgtXX. + + In practice, the second and third arguments are swapped, and the + condition (LT vs. EQ, which is recognizable by bit 1 of the first + argument) is reversed. Patch the arguments here before building + the resolved CALL_EXPR. */ + if (desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P + && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P + && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P) + { + tree t; + t = args[2], args[2] = args[1], args[1] = t; + t = arg_type[2], arg_type[2] = arg_type[1], arg_type[1] = t; + + args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0], + build_int_cst (NULL_TREE, 2)); + } + + switch (n) + { + case 0: + call = build_call_expr (impl_fndecl, 0); + break; + case 1: + call = build_call_expr (impl_fndecl, 1, + fully_fold_convert (arg_type[0], args[0])); + break; + case 2: + call = build_call_expr (impl_fndecl, 2, + fully_fold_convert (arg_type[0], args[0]), + fully_fold_convert (arg_type[1], args[1])); + break; + case 3: + call = build_call_expr (impl_fndecl, 3, + fully_fold_convert (arg_type[0], args[0]), + fully_fold_convert (arg_type[1], args[1]), + fully_fold_convert (arg_type[2], args[2])); + break; + default: + gcc_unreachable (); + } + return fold_convert (ret_type, call); +} + +/* Implementation of the resolve_overloaded_builtin target hook, to + support Altivec's overloaded builtins. */ + +tree +altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, + void *passed_arglist) +{ + vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist); + unsigned int nargs = vec_safe_length (arglist); + enum rs6000_builtins fcode + = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl); + tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); + tree types[3], args[3]; + const struct altivec_builtin_types *desc; + unsigned int n; + + if (!rs6000_overloaded_builtin_p (fcode)) + return NULL_TREE; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n", + (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); + + /* vec_lvsl and vec_lvsr are deprecated for use with LE element order. */ + if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !VECTOR_ELT_ORDER_BIG) + warning (OPT_Wdeprecated, + "vec_lvsl is deprecated for little endian; use " + "assignment for unaligned loads and stores"); + else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !VECTOR_ELT_ORDER_BIG) + warning (OPT_Wdeprecated, + "vec_lvsr is deprecated for little endian; use " + "assignment for unaligned loads and stores"); + + if (fcode == ALTIVEC_BUILTIN_VEC_MUL) + { + /* vec_mul needs to be special cased because there are no instructions + for it for the {un}signed char, {un}signed short, and {un}signed int + types. */ + if (nargs != 2) + { + error ("vec_mul only accepts 2 arguments"); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + + /* Both arguments must be vectors and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + case QImode: + case HImode: + case SImode: + case DImode: + case TImode: + { + /* For scalar types just use a multiply expression. */ + return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0, + fold_convert (TREE_TYPE (arg0), arg1)); + } + case SFmode: + { + /* For floats use the xvmulsp instruction directly. */ + tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULSP]; + return build_call_expr (call, 2, arg0, arg1); + } + case DFmode: + { + /* For doubles use the xvmuldp instruction directly. */ + tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULDP]; + return build_call_expr (call, 2, arg0, arg1); + } + /* Other types are errors. */ + default: + goto bad; + } + } + + if (fcode == ALTIVEC_BUILTIN_VEC_CMPNE) + { + /* vec_cmpne needs to be special cased because there are no instructions + for it (prior to power 9). */ + if (nargs != 2) + { + error ("vec_cmpne only accepts 2 arguments"); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + + /* Power9 instructions provide the most efficient implementation of + ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode + or SFmode or DFmode. */ + if (!TARGET_P9_VECTOR + || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode) + || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode) + || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode) + || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode)) + { + /* Both arguments must be vectors and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb), + vec_cmpeq (va, vb)). */ + /* Note: vec_nand also works but opt changes vec_nand's + to vec_nor's anyway. */ + case QImode: + case HImode: + case SImode: + case DImode: + case TImode: + case SFmode: + case DFmode: + { + /* call = vec_cmpeq (va, vb) + result = vec_nor (call, call). */ + vec<tree, va_gc> *params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + tree call = altivec_resolve_overloaded_builtin + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_CMPEQ], + params); + /* Use save_expr to ensure that operands used more than once + that may have side effects (like calls) are only evaluated + once. */ + call = save_expr (call); + params = make_tree_vector (); + vec_safe_push (params, call); + vec_safe_push (params, call); + return altivec_resolve_overloaded_builtin + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_NOR], params); + } + /* Other types are errors. */ + default: + goto bad; + } + } + /* else, fall through and process the Power9 alternative below */ + } + + if (fcode == ALTIVEC_BUILTIN_VEC_ADDE) + { + /* vec_adde needs to be special cased because there is no instruction + for the {un}signed int version. */ + if (nargs != 3) + { + error ("vec_adde only accepts 3 arguments"); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + tree arg2 = (*arglist)[2]; + tree arg2_type = TREE_TYPE (arg2); + + /* All 3 arguments must be vectors of (signed or unsigned) (int or + __int128) and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) || + !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + /* For {un}signed ints, + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), + vec_and (carryv, 0x1)). */ + case SImode: + { + vec<tree, va_gc> *params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + tree add_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD]; + tree call = altivec_resolve_overloaded_builtin (loc, add_builtin, + params); + tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); + tree ones_vector = build_vector_from_val (arg0_type, const1); + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, + arg2, ones_vector); + params = make_tree_vector (); + vec_safe_push (params, call); + vec_safe_push (params, and_expr); + return altivec_resolve_overloaded_builtin (loc, add_builtin, + params); + } + /* For {un}signed __int128s use the vaddeuqm instruction + directly. */ + case TImode: + { + tree adde_bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM]; + return altivec_resolve_overloaded_builtin (loc, adde_bii, + arglist); + } + + /* Types other than {un}signed int and {un}signed __int128 + are errors. */ + default: + goto bad; + } + } + + if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC) + { + /* vec_addec needs to be special cased because there is no instruction + for the {un}signed int version. */ + if (nargs != 3) + { + error ("vec_addec only accepts 3 arguments"); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + tree arg2 = (*arglist)[2]; + tree arg2_type = TREE_TYPE (arg2); + + /* All 3 arguments must be vectors of (signed or unsigned) (int or + __int128) and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) || + !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + /* For {un}signed ints, + vec_addec (va, vb, carryv) == + vec_or (vec_addc (va, vb), + vec_addc (vec_add (va, vb), + vec_and (carryv, 0x1))). */ + case SImode: + { + /* Use save_expr to ensure that operands used more than once + that may have side effects (like calls) are only evaluated + once. */ + arg0 = save_expr (arg0); + arg1 = save_expr (arg1); + vec<tree, va_gc> *params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + tree addc_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADDC]; + tree call1 = altivec_resolve_overloaded_builtin (loc, addc_builtin, + params); + params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + tree add_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD]; + tree call2 = altivec_resolve_overloaded_builtin (loc, add_builtin, + params); + tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); + tree ones_vector = build_vector_from_val (arg0_type, const1); + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, + arg2, ones_vector); + params = make_tree_vector (); + vec_safe_push (params, call2); + vec_safe_push (params, and_expr); + call2 = altivec_resolve_overloaded_builtin (loc, addc_builtin, + params); + params = make_tree_vector (); + vec_safe_push (params, call1); + vec_safe_push (params, call2); + tree or_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_OR]; + return altivec_resolve_overloaded_builtin (loc, or_builtin, + params); + } + /* For {un}signed __int128s use the vaddecuq instruction. */ + case TImode: + { + tree VADDECUQ_bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ]; + return altivec_resolve_overloaded_builtin (loc, VADDECUQ_bii, + arglist); + } + /* Types other than {un}signed int and {un}signed __int128 + are errors. */ + default: + goto bad; + } + } + + /* For now treat vec_splats and vec_promote as the same. */ + if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS + || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE) + { + tree type, arg; + int size; + int i; + bool unsigned_p; + vec<constructor_elt, va_gc> *vec; + const char *name = fcode == ALTIVEC_BUILTIN_VEC_SPLATS ? "vec_splats": "vec_promote"; + + if (nargs == 0) + { + error ("%s only accepts %d arguments", name, (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)+1 ); + return error_mark_node; + } + if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS && nargs != 1) + { + error ("%s only accepts 1 argument", name); + return error_mark_node; + } + if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE && nargs != 2) + { + error ("%s only accepts 2 arguments", name); + return error_mark_node; + } + /* Ignore promote's element argument. */ + if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE + && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1]))) + goto bad; + + arg = (*arglist)[0]; + type = TREE_TYPE (arg); + if (!SCALAR_FLOAT_TYPE_P (type) + && !INTEGRAL_TYPE_P (type)) + goto bad; + unsigned_p = TYPE_UNSIGNED (type); + switch (TYPE_MODE (type)) + { + case TImode: + type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); + size = 1; + break; + case DImode: + type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); + size = 2; + break; + case SImode: + type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); + size = 4; + break; + case HImode: + type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); + size = 8; + break; + case QImode: + type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); + size = 16; + break; + case SFmode: type = V4SF_type_node; size = 4; break; + case DFmode: type = V2DF_type_node; size = 2; break; + default: + goto bad; + } + arg = save_expr (fold_convert (TREE_TYPE (type), arg)); + vec_alloc (vec, size); + for(i = 0; i < size; i++) + { + constructor_elt elt = {NULL_TREE, arg}; + vec->quick_push (elt); + } + return build_constructor (type, vec); + } + + /* For now use pointer tricks to do the extraction, unless we are on VSX + extracting a double from a constant offset. */ + if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT) + { + tree arg1; + tree arg1_type; + tree arg2; + tree arg1_inner_type; + tree decl, stmt; + tree innerptrtype; + machine_mode mode; + + /* No second argument. */ + if (nargs != 2) + { + error ("vec_extract only accepts 2 arguments"); + return error_mark_node; + } + + arg2 = (*arglist)[1]; + arg1 = (*arglist)[0]; + arg1_type = TREE_TYPE (arg1); + + if (TREE_CODE (arg1_type) != VECTOR_TYPE) + goto bad; + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) + goto bad; + + /* If we are targeting little-endian, but -maltivec=be has been + specified to override the element order, adjust the element + number accordingly. */ + if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2) + { + unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1; + arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2), + build_int_cstu (TREE_TYPE (arg2), last_elem), + arg2); + } + + /* See if we can optimize vec_extracts with the current VSX instruction + set. */ + mode = TYPE_MODE (arg1_type); + if (VECTOR_MEM_VSX_P (mode)) + + { + tree call = NULL_TREE; + int nunits = GET_MODE_NUNITS (mode); + + /* If the second argument is an integer constant, if the value is in + the expected range, generate the built-in code if we can. We need + 64-bit and direct move to extract the small integer vectors. */ + if (TREE_CODE (arg2) == INTEGER_CST && wi::ltu_p (arg2, nunits)) + { + switch (mode) + { + default: + break; + + case V1TImode: + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI]; + break; + + case V2DFmode: + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF]; + break; + + case V2DImode: + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI]; + break; + + case V4SFmode: + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF]; + break; + + case V4SImode: + if (TARGET_DIRECT_MOVE_64BIT) + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI]; + break; + + case V8HImode: + if (TARGET_DIRECT_MOVE_64BIT) + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI]; + break; + + case V16QImode: + if (TARGET_DIRECT_MOVE_64BIT) + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI]; + break; + } + } + + /* If the second argument is variable, we can optimize it if we are + generating 64-bit code on a machine with direct move. */ + else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT) + { + switch (mode) + { + default: + break; + + case V2DFmode: + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF]; + break; + + case V2DImode: + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI]; + break; + + case V4SFmode: + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF]; + break; + + case V4SImode: + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI]; + break; + + case V8HImode: + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI]; + break; + + case V16QImode: + call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI]; + break; + } + } + + if (call) + return build_call_expr (call, 2, arg1, arg2); + } + + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */ + arg1_inner_type = TREE_TYPE (arg1_type); + arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, + build_int_cst (TREE_TYPE (arg2), + TYPE_VECTOR_SUBPARTS (arg1_type) + - 1), 0); + decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type); + DECL_EXTERNAL (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_CONTEXT (decl) = current_function_decl; + TREE_USED (decl) = 1; + TREE_TYPE (decl) = arg1_type; + TREE_READONLY (decl) = TYPE_READONLY (arg1_type); + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + TREE_ADDRESSABLE (decl) = 1; + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } + + innerptrtype = build_pointer_type (arg1_inner_type); + + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); + stmt = convert (innerptrtype, stmt); + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); + stmt = build_indirect_ref (loc, stmt, RO_NULL); + + return stmt; + } + + /* For now use pointer tricks to do the insertion, unless we are on VSX + inserting a double to a constant offset.. */ + if (fcode == ALTIVEC_BUILTIN_VEC_INSERT) + { + tree arg0; + tree arg1; + tree arg2; + tree arg1_type; + tree arg1_inner_type; + tree decl, stmt; + tree innerptrtype; + machine_mode mode; + + /* No second or third arguments. */ + if (nargs != 3) + { + error ("vec_insert only accepts 3 arguments"); + return error_mark_node; + } + + arg0 = (*arglist)[0]; + arg1 = (*arglist)[1]; + arg1_type = TREE_TYPE (arg1); + arg2 = (*arglist)[2]; + + if (TREE_CODE (arg1_type) != VECTOR_TYPE) + goto bad; + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) + goto bad; + + /* If we are targeting little-endian, but -maltivec=be has been + specified to override the element order, adjust the element + number accordingly. */ + if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2) + { + unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1; + arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2), + build_int_cstu (TREE_TYPE (arg2), last_elem), + arg2); + } + + /* If we can use the VSX xxpermdi instruction, use that for insert. */ + mode = TYPE_MODE (arg1_type); + if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST + && wi::ltu_p (arg2, 2)) + { + tree call = NULL_TREE; + + if (mode == V2DFmode) + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF]; + else if (mode == V2DImode) + call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI]; + + /* Note, __builtin_vec_insert_<xxx> has vector and scalar types + reversed. */ + if (call) + return build_call_expr (call, 3, arg1, arg0, arg2); + } + else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST + && wi::eq_p (arg2, 0)) + { + tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI]; + + /* Note, __builtin_vec_insert_<xxx> has vector and scalar types + reversed. */ + return build_call_expr (call, 3, arg1, arg0, arg2); + } + + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */ + arg1_inner_type = TREE_TYPE (arg1_type); + arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, + build_int_cst (TREE_TYPE (arg2), + TYPE_VECTOR_SUBPARTS (arg1_type) + - 1), 0); + decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type); + DECL_EXTERNAL (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_CONTEXT (decl) = current_function_decl; + TREE_USED (decl) = 1; + TREE_TYPE (decl) = arg1_type; + TREE_READONLY (decl) = TYPE_READONLY (arg1_type); + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + TREE_ADDRESSABLE (decl) = 1; + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } + + innerptrtype = build_pointer_type (arg1_inner_type); + + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); + stmt = convert (innerptrtype, stmt); + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); + stmt = build_indirect_ref (loc, stmt, RO_NULL); + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, + convert (TREE_TYPE (stmt), arg0)); + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + return stmt; + } + + /* Expand vec_ld into an expression that masks the address and + performs the load. We need to expand this early to allow + the best aliasing, as by the time we get into RTL we no longer + are able to honor __restrict__, for example. We may want to + consider this for all memory access built-ins. + + When -maltivec=be is specified, or the wrong number of arguments + is provided, simply punt to existing built-in processing. */ + if (fcode == ALTIVEC_BUILTIN_VEC_LD + && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) + && nargs == 2) + { + tree arg0 = (*arglist)[0]; + tree arg1 = (*arglist)[1]; + + /* Strip qualifiers like "const" from the pointer arg. */ + tree arg1_type = TREE_TYPE (arg1); + tree inner_type = TREE_TYPE (arg1_type); + if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0) + { + arg1_type = build_pointer_type (build_qualified_type (inner_type, + 0)); + arg1 = fold_convert (arg1_type, arg1); + } + + /* Construct the masked address. Let existing error handling take + over if we don't have a constant offset. */ + arg0 = fold (arg0); + + if (TREE_CODE (arg0) == INTEGER_CST) + { + if (!ptrofftype_p (TREE_TYPE (arg0))) + arg0 = build1 (NOP_EXPR, sizetype, arg0); + + tree arg1_type = TREE_TYPE (arg1); + if (TREE_CODE (arg1_type) == ARRAY_TYPE) + { + arg1_type = TYPE_POINTER_TO (TREE_TYPE (arg1_type)); + tree const0 = build_int_cstu (sizetype, 0); + tree arg1_elt0 = build_array_ref (loc, arg1, const0); + arg1 = build1 (ADDR_EXPR, arg1_type, arg1_elt0); + } + + tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type, + arg1, arg0); + tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr, + build_int_cst (arg1_type, -16)); + + /* Find the built-in to get the return type so we can convert + the result properly (or fall back to default handling if the + arguments aren't compatible). */ + for (desc = altivec_overloaded_builtins; + desc->code && desc->code != fcode; desc++) + continue; + + for (; desc->code == fcode; desc++) + if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) + && (rs6000_builtin_type_compatible (TREE_TYPE (arg1), + desc->op2))) + { + tree ret_type = rs6000_builtin_type (desc->ret_type); + if (TYPE_MODE (ret_type) == V2DImode) + /* Type-based aliasing analysis thinks vector long + and vector long long are different and will put them + in distinct alias classes. Force our return type + to be a may-alias type to avoid this. */ + ret_type + = build_pointer_type_for_mode (ret_type, Pmode, + true/*can_alias_all*/); + else + ret_type = build_pointer_type (ret_type); + aligned = build1 (NOP_EXPR, ret_type, aligned); + tree ret_val = build_indirect_ref (loc, aligned, RO_NULL); + return ret_val; + } + } + } + + /* Similarly for stvx. */ + if (fcode == ALTIVEC_BUILTIN_VEC_ST + && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) + && nargs == 3) + { + tree arg0 = (*arglist)[0]; + tree arg1 = (*arglist)[1]; + tree arg2 = (*arglist)[2]; + + /* Construct the masked address. Let existing error handling take + over if we don't have a constant offset. */ + arg1 = fold (arg1); + + if (TREE_CODE (arg1) == INTEGER_CST) + { + if (!ptrofftype_p (TREE_TYPE (arg1))) + arg1 = build1 (NOP_EXPR, sizetype, arg1); + + tree arg2_type = TREE_TYPE (arg2); + if (TREE_CODE (arg2_type) == ARRAY_TYPE) + { + arg2_type = TYPE_POINTER_TO (TREE_TYPE (arg2_type)); + tree const0 = build_int_cstu (sizetype, 0); + tree arg2_elt0 = build_array_ref (loc, arg2, const0); + arg2 = build1 (ADDR_EXPR, arg2_type, arg2_elt0); + } + + tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type, + arg2, arg1); + tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, addr, + build_int_cst (arg2_type, -16)); + + /* Find the built-in to make sure a compatible one exists; if not + we fall back to default handling to get the error message. */ + for (desc = altivec_overloaded_builtins; + desc->code && desc->code != fcode; desc++) + continue; + + for (; desc->code == fcode; desc++) + if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) + && rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2) + && rs6000_builtin_type_compatible (TREE_TYPE (arg2), + desc->op3)) + { + tree arg0_type = TREE_TYPE (arg0); + if (TYPE_MODE (arg0_type) == V2DImode) + /* Type-based aliasing analysis thinks vector long + and vector long long are different and will put them + in distinct alias classes. Force our address type + to be a may-alias type to avoid this. */ + arg0_type + = build_pointer_type_for_mode (arg0_type, Pmode, + true/*can_alias_all*/); + else + arg0_type = build_pointer_type (arg0_type); + aligned = build1 (NOP_EXPR, arg0_type, aligned); + tree stg = build_indirect_ref (loc, aligned, RO_NULL); + tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg, + convert (TREE_TYPE (stg), arg0)); + return retval; + } + } + } + + for (n = 0; + !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; + fnargs = TREE_CHAIN (fnargs), n++) + { + tree decl_type = TREE_VALUE (fnargs); + tree arg = (*arglist)[n]; + tree type; + + if (arg == error_mark_node) + return error_mark_node; + + if (n >= 3) + abort (); + + arg = default_conversion (arg); + + /* The C++ front-end converts float * to const void * using + NOP_EXPR<const void *> (NOP_EXPR<void *> (x)). */ + type = TREE_TYPE (arg); + if (POINTER_TYPE_P (type) + && TREE_CODE (arg) == NOP_EXPR + && lang_hooks.types_compatible_p (TREE_TYPE (arg), + const_ptr_type_node) + && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)), + ptr_type_node)) + { + arg = TREE_OPERAND (arg, 0); + type = TREE_TYPE (arg); + } + + /* Remove the const from the pointers to simplify the overload + matching further down. */ + if (POINTER_TYPE_P (decl_type) + && POINTER_TYPE_P (type) + && TYPE_QUALS (TREE_TYPE (type)) != 0) + { + if (TYPE_READONLY (TREE_TYPE (type)) + && !TYPE_READONLY (TREE_TYPE (decl_type))) + warning (0, "passing arg %d of %qE discards qualifiers from " + "pointer target type", n + 1, fndecl); + type = build_pointer_type (build_qualified_type (TREE_TYPE (type), + 0)); + arg = fold_convert (type, arg); + } + + args[n] = arg; + types[n] = type; + } + + /* If the number of arguments did not match the prototype, return NULL + and the generic code will issue the appropriate error message. */ + if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs) + return NULL; + + if (n == 0) + abort (); + + if (fcode == ALTIVEC_BUILTIN_VEC_STEP) + { + if (TREE_CODE (types[0]) != VECTOR_TYPE) + goto bad; + + return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0])); + } + + { + bool unsupported_builtin = false; + for (desc = altivec_overloaded_builtins; + desc->code && desc->code != fcode; desc++) + continue; + + /* Need to special case __builtin_cmp because the overloaded forms + of this function take (unsigned int, unsigned int) or (unsigned + long long int, unsigned long long int). Since C conventions + allow the respective argument types to be implicitly coerced into + each other, the default handling does not provide adequate + discrimination between the desired forms of the function. */ + if (fcode == P6_OV_BUILTIN_CMPB) + { + int overloaded_code; + int arg1_mode = TYPE_MODE (types[0]); + int arg2_mode = TYPE_MODE (types[1]); + + if (nargs != 2) + { + error ("__builtin_cmpb only accepts 2 arguments"); + return error_mark_node; + } + + /* If any supplied arguments are wider than 32 bits, resolve to + 64-bit variant of built-in function. */ + if ((GET_MODE_PRECISION (arg1_mode) > 32) + || (GET_MODE_PRECISION (arg2_mode) > 32)) + { + /* Assure all argument and result types are compatible with + the built-in function represented by P6_BUILTIN_CMPB. */ + overloaded_code = P6_BUILTIN_CMPB; + } + else + { + /* Assure all argument and result types are compatible with + the built-in function represented by P6_BUILTIN_CMPB_32. */ + overloaded_code = P6_BUILTIN_CMPB_32; + } + + while (desc->code && desc->code == fcode && + desc->overloaded_code != overloaded_code) + desc++; + + if (desc->code && (desc->code == fcode) + && rs6000_builtin_type_compatible (types[0], desc->op1) + && rs6000_builtin_type_compatible (types[1], desc->op2)) + { + if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE) + return altivec_build_resolved_builtin (args, n, desc); + else + unsupported_builtin = true; + } + } + else + { + /* For arguments after the last, we have RS6000_BTI_NOT_OPAQUE in + the opX fields. */ + for (; desc->code == fcode; desc++) + { + if ((desc->op1 == RS6000_BTI_NOT_OPAQUE + || rs6000_builtin_type_compatible (types[0], desc->op1)) + && (desc->op2 == RS6000_BTI_NOT_OPAQUE + || rs6000_builtin_type_compatible (types[1], desc->op2)) + && (desc->op3 == RS6000_BTI_NOT_OPAQUE + || rs6000_builtin_type_compatible (types[2], desc->op3))) + { + if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE) + return altivec_build_resolved_builtin (args, n, desc); + else + unsupported_builtin = true; + } + } + } + + if (unsupported_builtin) + { + const char *name = rs6000_overloaded_builtin_name (fcode); + error ("Builtin function %s not supported in this compiler configuration", + name); + return error_mark_node; + } + } + bad: + { + const char *name = rs6000_overloaded_builtin_name (fcode); + error ("invalid parameter combination for AltiVec intrinsic %s", name); + return error_mark_node; + } +} diff --git a/gcc/config/powerpcspe/powerpcspe-cpus.def b/gcc/config/powerpcspe/powerpcspe-cpus.def new file mode 100644 index 000000000000..cd5c70688d85 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-cpus.def @@ -0,0 +1,264 @@ +/* IBM RS/6000 CPU names.. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* ISA masks. */ +#ifndef ISA_2_1_MASKS +#define ISA_2_1_MASKS OPTION_MASK_MFCRF +#define ISA_2_2_MASKS (ISA_2_1_MASKS | OPTION_MASK_POPCNTB) +#define ISA_2_4_MASKS (ISA_2_2_MASKS | OPTION_MASK_FPRND) + + /* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and don't add + ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, fsel, + fre, fsqrt, etc. were no longer documented as optional. Group masks by + server and embedded. */ +#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS \ + | OPTION_MASK_CMPB \ + | OPTION_MASK_RECIP_PRECISION \ + | OPTION_MASK_PPC_GFXOPT \ + | OPTION_MASK_PPC_GPOPT) + +#define ISA_2_5_MASKS_SERVER (ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_DFP) + + /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but + altivec is a win so enable it. */ + /* OPTION_MASK_VSX_TIMODE should be set, but disable it for now until + PR 58587 is fixed. */ +#define ISA_2_6_MASKS_EMBEDDED (ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_POPCNTD) +#define ISA_2_6_MASKS_SERVER (ISA_2_5_MASKS_SERVER \ + | OPTION_MASK_POPCNTD \ + | OPTION_MASK_ALTIVEC \ + | OPTION_MASK_VSX \ + | OPTION_MASK_UPPER_REGS_DI \ + | OPTION_MASK_UPPER_REGS_DF) + +/* For now, don't provide an embedded version of ISA 2.07. */ +#define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_CRYPTO \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ + | OPTION_MASK_HTM \ + | OPTION_MASK_QUAD_MEMORY \ + | OPTION_MASK_QUAD_MEMORY_ATOMIC \ + | OPTION_MASK_UPPER_REGS_SF \ + | OPTION_MASK_VSX_SMALL_INTEGER) + +/* Add ISEL back into ISA 3.0, since it is supposed to be a win. Do not add + FLOAT128_HW here until we are ready to make -mfloat128 on by default. */ +#define ISA_3_0_MASKS_SERVER (ISA_2_7_MASKS_SERVER \ + | OPTION_MASK_ISEL \ + | OPTION_MASK_MODULO \ + | OPTION_MASK_P9_FUSION \ + | OPTION_MASK_P9_DFORM_SCALAR \ + | OPTION_MASK_P9_DFORM_VECTOR \ + | OPTION_MASK_P9_MINMAX \ + | OPTION_MASK_P9_MISC \ + | OPTION_MASK_P9_VECTOR) + +/* Support for the IEEE 128-bit floating point hardware requires a lot of the + VSX instructions that are part of ISA 3.0. */ +#define ISA_3_0_MASKS_IEEE (OPTION_MASK_VSX \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_P9_VECTOR \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_UPPER_REGS_DI \ + | OPTION_MASK_UPPER_REGS_DF \ + | OPTION_MASK_UPPER_REGS_SF \ + | OPTION_MASK_VSX_SMALL_INTEGER) + +/* Flags that need to be turned off if -mno-power9-vector. */ +#define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ + | OPTION_MASK_P9_DFORM_SCALAR \ + | OPTION_MASK_P9_DFORM_VECTOR \ + | OPTION_MASK_P9_MINMAX) + +/* Flags that need to be turned off if -mno-power8-vector. */ +#define OTHER_P8_VECTOR_MASKS (OTHER_P9_VECTOR_MASKS \ + | OPTION_MASK_P9_VECTOR \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_CRYPTO \ + | OPTION_MASK_UPPER_REGS_SF) \ + +/* Flags that need to be turned off if -mno-vsx. */ +#define OTHER_VSX_VECTOR_MASKS (OTHER_P8_VECTOR_MASKS \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ + | OPTION_MASK_FLOAT128_KEYWORD \ + | OPTION_MASK_FLOAT128_TYPE \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_UPPER_REGS_DI \ + | OPTION_MASK_UPPER_REGS_DF \ + | OPTION_MASK_VSX_SMALL_INTEGER \ + | OPTION_MASK_VSX_TIMODE) + +#define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) + +/* Deal with ports that do not have -mstrict-align. */ +#ifdef OPTION_MASK_STRICT_ALIGN +#define OPTION_MASK_STRICT_ALIGN_OPTIONAL OPTION_MASK_STRICT_ALIGN +#else +#define OPTION_MASK_STRICT_ALIGN 0 +#define OPTION_MASK_STRICT_ALIGN_OPTIONAL 0 +#ifndef MASK_STRICT_ALIGN +#define MASK_STRICT_ALIGN 0 +#endif +#endif + +/* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */ +#define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ + | OPTION_MASK_CMPB \ + | OPTION_MASK_CRYPTO \ + | OPTION_MASK_DFP \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_DLMZB \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ + | OPTION_MASK_FLOAT128_HW \ + | OPTION_MASK_FLOAT128_KEYWORD \ + | OPTION_MASK_FLOAT128_TYPE \ + | OPTION_MASK_FPRND \ + | OPTION_MASK_HTM \ + | OPTION_MASK_ISEL \ + | OPTION_MASK_LRA \ + | OPTION_MASK_MFCRF \ + | OPTION_MASK_MFPGPR \ + | OPTION_MASK_MODULO \ + | OPTION_MASK_MULHW \ + | OPTION_MASK_NO_UPDATE \ + | OPTION_MASK_P8_FUSION \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_P9_DFORM_SCALAR \ + | OPTION_MASK_P9_DFORM_VECTOR \ + | OPTION_MASK_P9_FUSION \ + | OPTION_MASK_P9_MINMAX \ + | OPTION_MASK_P9_MISC \ + | OPTION_MASK_P9_VECTOR \ + | OPTION_MASK_POPCNTB \ + | OPTION_MASK_POPCNTD \ + | OPTION_MASK_POWERPC64 \ + | OPTION_MASK_PPC_GFXOPT \ + | OPTION_MASK_PPC_GPOPT \ + | OPTION_MASK_QUAD_MEMORY \ + | OPTION_MASK_QUAD_MEMORY_ATOMIC \ + | OPTION_MASK_RECIP_PRECISION \ + | OPTION_MASK_SOFT_FLOAT \ + | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ + | OPTION_MASK_TOC_FUSION \ + | OPTION_MASK_UPPER_REGS_DI \ + | OPTION_MASK_UPPER_REGS_DF \ + | OPTION_MASK_UPPER_REGS_SF \ + | OPTION_MASK_VSX \ + | OPTION_MASK_VSX_SMALL_INTEGER \ + | OPTION_MASK_VSX_TIMODE) + +#endif + +/* This table occasionally claims that a processor does not support a + particular feature even though it does, but the feature is slower than the + alternative. Thus, it shouldn't be relied on as a complete description of + the processor's support. + + Please keep this list in order, and don't forget to update the documentation + in invoke.texi when adding a new processor or flag. + + Before including this file, define a macro: + + RS6000_CPU (NAME, CPU, FLAGS) + + where the arguments are the fields of struct rs6000_ptt. */ + +RS6000_CPU ("401", PROCESSOR_PPC403, MASK_SOFT_FLOAT) +RS6000_CPU ("403", PROCESSOR_PPC403, MASK_SOFT_FLOAT | MASK_STRICT_ALIGN) +RS6000_CPU ("405", PROCESSOR_PPC405, MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("405fp", PROCESSOR_PPC405, MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("440", PROCESSOR_PPC440, MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("440fp", PROCESSOR_PPC440, MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("464", PROCESSOR_PPC440, MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("464fp", PROCESSOR_PPC440, MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("476", PROCESSOR_PPC476, + MASK_SOFT_FLOAT | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB + | MASK_FPRND | MASK_CMPB | MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("476fp", PROCESSOR_PPC476, + MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND + | MASK_CMPB | MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("505", PROCESSOR_MPCCORE, 0) +RS6000_CPU ("601", PROCESSOR_PPC601, MASK_MULTIPLE | MASK_STRING) +RS6000_CPU ("602", PROCESSOR_PPC603, MASK_PPC_GFXOPT) +RS6000_CPU ("603", PROCESSOR_PPC603, MASK_PPC_GFXOPT) +RS6000_CPU ("603e", PROCESSOR_PPC603, MASK_PPC_GFXOPT) +RS6000_CPU ("604", PROCESSOR_PPC604, MASK_PPC_GFXOPT) +RS6000_CPU ("604e", PROCESSOR_PPC604e, MASK_PPC_GFXOPT) +RS6000_CPU ("620", PROCESSOR_PPC620, MASK_PPC_GFXOPT | MASK_POWERPC64) +RS6000_CPU ("630", PROCESSOR_PPC630, MASK_PPC_GFXOPT | MASK_POWERPC64) +RS6000_CPU ("740", PROCESSOR_PPC750, MASK_PPC_GFXOPT) +RS6000_CPU ("7400", PROCESSOR_PPC7400, POWERPC_7400_MASK) +RS6000_CPU ("7450", PROCESSOR_PPC7450, POWERPC_7400_MASK) +RS6000_CPU ("750", PROCESSOR_PPC750, MASK_PPC_GFXOPT) +RS6000_CPU ("801", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT) +RS6000_CPU ("821", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT) +RS6000_CPU ("823", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT) +RS6000_CPU ("8540", PROCESSOR_PPC8540, MASK_STRICT_ALIGN | MASK_ISEL) +RS6000_CPU ("8548", PROCESSOR_PPC8548, MASK_STRICT_ALIGN | MASK_ISEL) +RS6000_CPU ("a2", PROCESSOR_PPCA2, + MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_POPCNTB | MASK_CMPB + | MASK_NO_UPDATE) +RS6000_CPU ("e300c2", PROCESSOR_PPCE300C2, MASK_SOFT_FLOAT) +RS6000_CPU ("e300c3", PROCESSOR_PPCE300C3, 0) +RS6000_CPU ("e500mc", PROCESSOR_PPCE500MC, MASK_PPC_GFXOPT | MASK_ISEL) +RS6000_CPU ("e500mc64", PROCESSOR_PPCE500MC64, + MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_ISEL) +RS6000_CPU ("e5500", PROCESSOR_PPCE5500, + MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_ISEL) +RS6000_CPU ("e6500", PROCESSOR_PPCE6500, POWERPC_7400_MASK | MASK_POWERPC64 + | MASK_MFCRF | MASK_ISEL) +RS6000_CPU ("860", PROCESSOR_MPCCORE, MASK_SOFT_FLOAT) +RS6000_CPU ("970", PROCESSOR_POWER4, + POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64) +RS6000_CPU ("cell", PROCESSOR_CELL, + POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64) +RS6000_CPU ("ec603e", PROCESSOR_PPC603, MASK_SOFT_FLOAT) +RS6000_CPU ("G3", PROCESSOR_PPC750, MASK_PPC_GFXOPT) +RS6000_CPU ("G4", PROCESSOR_PPC7450, POWERPC_7400_MASK) +RS6000_CPU ("G5", PROCESSOR_POWER4, + POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64) +RS6000_CPU ("titan", PROCESSOR_TITAN, MASK_MULHW | MASK_DLMZB) +RS6000_CPU ("power3", PROCESSOR_PPC630, MASK_PPC_GFXOPT | MASK_POWERPC64) +RS6000_CPU ("power4", PROCESSOR_POWER4, MASK_POWERPC64 | MASK_PPC_GPOPT + | MASK_PPC_GFXOPT | MASK_MFCRF) +RS6000_CPU ("power5", PROCESSOR_POWER5, MASK_POWERPC64 | MASK_PPC_GPOPT + | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB) +RS6000_CPU ("power5+", PROCESSOR_POWER5, MASK_POWERPC64 | MASK_PPC_GPOPT + | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND) +RS6000_CPU ("power6", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT + | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND + | MASK_CMPB | MASK_DFP | MASK_RECIP_PRECISION) +RS6000_CPU ("power6x", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT + | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND + | MASK_CMPB | MASK_DFP | MASK_MFPGPR | MASK_RECIP_PRECISION) +RS6000_CPU ("power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ + POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF + | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD + | MASK_VSX | MASK_RECIP_PRECISION | OPTION_MASK_UPPER_REGS_DF + | OPTION_MASK_UPPER_REGS_DI) +RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) +RS6000_CPU ("power9", PROCESSOR_POWER9, MASK_POWERPC64 | ISA_3_0_MASKS_SERVER) +RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0) +RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64) +RS6000_CPU ("powerpc64le", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) +RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64) diff --git a/gcc/config/powerpcspe/powerpcspe-linux.c b/gcc/config/powerpcspe/powerpcspe-linux.c new file mode 100644 index 000000000000..4a8d9fad6ea2 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-linux.c @@ -0,0 +1,36 @@ +/* Functions for Linux on PowerPC. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ + +bool +rs6000_linux_float_exceptions_rounding_supported_p (void) +{ + /* glibc has support for exceptions and rounding modes for software + floating point. */ + if (OPTION_GLIBC) + return true; + else + return TARGET_DF_INSN; +} diff --git a/gcc/config/powerpcspe/powerpcspe-modes.def b/gcc/config/powerpcspe/powerpcspe-modes.def new file mode 100644 index 000000000000..fc66fca7300d --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-modes.def @@ -0,0 +1,56 @@ +/* Definitions of target machine for GNU compiler, for IBM RS/6000. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* IBM 128-bit floating point. IFmode and KFmode use the fractional float + support in order to declare 3 128-bit floating point types. */ +FRACTIONAL_FLOAT_MODE (IF, 106, 16, ibm_extended_format); + +/* Explicit IEEE 128-bit floating point. */ +FRACTIONAL_FLOAT_MODE (KF, 113, 16, ieee_quad_format); + +/* 128-bit floating point. ABI_V4 uses IEEE quad, AIX/Darwin + adjust this in rs6000_option_override_internal. */ +FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Add any extra modes needed to represent the condition code. + + For the RS/6000, we need separate modes when unsigned (logical) comparisons + are being done and we need a separate mode for floating-point. We also + use a mode for the case when we are comparing the results of two + comparisons, as then only the EQ bit is valid in the register. */ + +CC_MODE (CCUNS); +CC_MODE (CCFP); +CC_MODE (CCEQ); + +/* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */ +VECTOR_MODE (INT, DI, 1); +VECTOR_MODE (INT, TI, 1); +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ +VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ + +/* Replacement for TImode that only is allowed in GPRs. We also use PTImode + for quad memory atomic operations to force getting an even/odd register + combination. */ +PARTIAL_INT_MODE (TI, 128, PTI); diff --git a/gcc/config/powerpcspe/powerpcspe-opts.h b/gcc/config/powerpcspe/powerpcspe-opts.h new file mode 100644 index 000000000000..086217a37f74 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-opts.h @@ -0,0 +1,168 @@ +/* Definitions of target machine needed for option handling for GNU compiler, + for IBM RS/6000. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Michael Meissner (meissner@linux.vnet.ibm.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef RS6000_OPTS_H +#define RS6000_OPTS_H + +/* Processor type. Order must match cpu attribute in MD file. */ +enum processor_type + { + PROCESSOR_PPC601, + PROCESSOR_PPC603, + PROCESSOR_PPC604, + PROCESSOR_PPC604e, + PROCESSOR_PPC620, + PROCESSOR_PPC630, + + PROCESSOR_PPC750, + PROCESSOR_PPC7400, + PROCESSOR_PPC7450, + + PROCESSOR_PPC403, + PROCESSOR_PPC405, + PROCESSOR_PPC440, + PROCESSOR_PPC476, + + PROCESSOR_PPC8540, + PROCESSOR_PPC8548, + PROCESSOR_PPCE300C2, + PROCESSOR_PPCE300C3, + PROCESSOR_PPCE500MC, + PROCESSOR_PPCE500MC64, + PROCESSOR_PPCE5500, + PROCESSOR_PPCE6500, + + PROCESSOR_POWER4, + PROCESSOR_POWER5, + PROCESSOR_POWER6, + PROCESSOR_POWER7, + PROCESSOR_POWER8, + PROCESSOR_POWER9, + + PROCESSOR_RS64A, + PROCESSOR_MPCCORE, + PROCESSOR_CELL, + PROCESSOR_PPCA2, + PROCESSOR_TITAN +}; + + +/* FP processor type. */ +enum fpu_type_t +{ + FPU_NONE, /* No FPU */ + FPU_SF_LITE, /* Limited Single Precision FPU */ + FPU_DF_LITE, /* Limited Double Precision FPU */ + FPU_SF_FULL, /* Full Single Precision FPU */ + FPU_DF_FULL /* Full Double Single Precision FPU */ +}; + + +/* Types of costly dependences. */ +enum rs6000_dependence_cost +{ + max_dep_latency = 1000, + no_dep_costly, + all_deps_costly, + true_store_to_load_dep_costly, + store_to_load_dep_costly +}; + +/* Types of nop insertion schemes in sched target hook sched_finish. */ +enum rs6000_nop_insertion +{ + sched_finish_regroup_exact = 1000, + sched_finish_pad_groups, + sched_finish_none +}; + +/* Dispatch group termination caused by an insn. */ +enum group_termination +{ + current_group, + previous_group +}; + +/* Enumeration to give which calling sequence to use. */ +enum rs6000_abi { + ABI_NONE, + ABI_AIX, /* IBM's AIX, or Linux ELFv1 */ + ABI_ELFv2, /* Linux ELFv2 ABI */ + ABI_V4, /* System V.4/eabi */ + ABI_DARWIN /* Apple's Darwin (OS X kernel) */ +}; + +/* Small data support types. */ +enum rs6000_sdata_type { + SDATA_NONE, /* No small data support. */ + SDATA_DATA, /* Just put data in .sbss/.sdata, don't use relocs. */ + SDATA_SYSV, /* Use r13 to point to .sdata/.sbss. */ + SDATA_EABI /* Use r13 like above, r2 points to .sdata2/.sbss2. */ +}; + +/* Type of traceback to use. */ +enum rs6000_traceback_type { + traceback_default = 0, + traceback_none, + traceback_part, + traceback_full +}; + +/* Code model for 64-bit linux. + small: 16-bit toc offsets. + medium: 32-bit toc offsets, static data and code within 2G of TOC pointer. + large: 32-bit toc offsets, no limit on static data and code. */ +enum rs6000_cmodel { + CMODEL_SMALL, + CMODEL_MEDIUM, + CMODEL_LARGE +}; + +/* Describe which vector unit to use for a given machine mode. The + VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and + P8_VECTOR are contiguous. */ +enum rs6000_vector { + VECTOR_NONE, /* Type is not a vector or not supported */ + VECTOR_ALTIVEC, /* Use altivec for vector processing */ + VECTOR_VSX, /* Use VSX for vector processing */ + VECTOR_P8_VECTOR, /* Use ISA 2.07 VSX for vector processing */ + VECTOR_PAIRED, /* Use paired floating point for vectors */ + VECTOR_SPE, /* Use SPE for vector processing */ + VECTOR_OTHER /* Some other vector unit */ +}; + +/* Where to get the canary for the stack protector. */ +enum stack_protector_guard { + SSP_TLS, /* per-thread canary in TLS block */ + SSP_GLOBAL /* global canary */ +}; + +/* No enumeration is defined to index the -mcpu= values (entries in + processor_target_table), with the type int being used instead, but + we need to distinguish the special "native" value. */ +#define RS6000_CPU_OPTION_NATIVE -1 + +#endif diff --git a/gcc/config/powerpcspe/powerpcspe-passes.def b/gcc/config/powerpcspe/powerpcspe-passes.def new file mode 100644 index 000000000000..4383554d00ea --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-passes.def @@ -0,0 +1,27 @@ +/* Description of target passes for rs6000 + Copyright (C) 2016-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* + Macros that can be used in this file: + INSERT_PASS_AFTER (PASS, INSTANCE, TGT_PASS) + INSERT_PASS_BEFORE (PASS, INSTANCE, TGT_PASS) + REPLACE_PASS (PASS, INSTANCE, TGT_PASS) + */ + + INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps); diff --git a/gcc/config/powerpcspe/powerpcspe-protos.h b/gcc/config/powerpcspe/powerpcspe-protos.h new file mode 100644 index 000000000000..0344823db0a8 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-protos.h @@ -0,0 +1,269 @@ +/* Definitions of target machine for GNU compiler, for IBM RS/6000. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_RS6000_PROTOS_H +#define GCC_RS6000_PROTOS_H + +/* Declare functions in rs6000.c */ + +#ifdef RTX_CODE + +#ifdef TREE_CODE +extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, + tree, machine_mode); +#endif /* TREE_CODE */ + +extern bool easy_altivec_constant (rtx, machine_mode); +extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); +extern int vspltis_shifted (rtx); +extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); +extern bool macho_lo_sum_memory_operand (rtx, machine_mode); +extern int num_insns_constant (rtx, machine_mode); +extern int num_insns_constant_wide (HOST_WIDE_INT); +extern int small_data_operand (rtx, machine_mode); +extern bool mem_operand_gpr (rtx, machine_mode); +extern bool mem_operand_ds_form (rtx, machine_mode); +extern bool toc_relative_expr_p (const_rtx, bool); +extern bool invalid_e500_subreg (rtx, machine_mode); +extern void validate_condition_mode (enum rtx_code, machine_mode); +extern bool legitimate_constant_pool_address_p (const_rtx, machine_mode, + bool); +extern bool legitimate_indirect_address_p (rtx, int); +extern bool legitimate_indexed_address_p (rtx, int); +extern bool avoiding_indexed_address_p (machine_mode); + +extern rtx rs6000_got_register (rtx); +extern rtx find_addr_reg (rtx); +extern rtx gen_easy_altivec_constant (rtx); +extern const char *output_vec_const_move (rtx *); +extern const char *rs6000_output_move_128bit (rtx *); +extern bool rs6000_move_128bit_ok_p (rtx []); +extern bool rs6000_split_128bit_ok_p (rtx []); +extern void rs6000_expand_float128_convert (rtx, rtx, bool); +extern void rs6000_expand_vector_init (rtx, rtx); +extern void paired_expand_vector_init (rtx, rtx); +extern void rs6000_expand_vector_set (rtx, rtx, int); +extern void rs6000_expand_vector_extract (rtx, rtx, rtx); +extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); +extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); +extern void rs6000_split_v4si_init (rtx []); +extern bool altivec_expand_vec_perm_const (rtx op[4]); +extern void altivec_expand_vec_perm_le (rtx op[4]); +extern bool rs6000_expand_vec_perm_const (rtx op[4]); +extern void altivec_expand_lvx_be (rtx, rtx, machine_mode, unsigned); +extern void altivec_expand_stvx_be (rtx, rtx, machine_mode, unsigned); +extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned); +extern void rs6000_expand_extract_even (rtx, rtx, rtx); +extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); +extern void rs6000_scale_v2df (rtx, rtx, int); +extern int expand_block_clear (rtx[]); +extern int expand_block_move (rtx[]); +extern bool expand_block_compare (rtx[]); +extern bool expand_strn_compare (rtx[], int); +extern const char * rs6000_output_load_multiple (rtx[]); +extern bool rs6000_is_valid_mask (rtx, int *, int *, machine_mode); +extern bool rs6000_is_valid_and_mask (rtx, machine_mode); +extern bool rs6000_is_valid_shift_mask (rtx, rtx, machine_mode); +extern bool rs6000_is_valid_insert_mask (rtx, rtx, machine_mode); +extern const char *rs6000_insn_for_and_mask (machine_mode, rtx *, bool); +extern const char *rs6000_insn_for_shift_mask (machine_mode, rtx *, bool); +extern const char *rs6000_insn_for_insert_mask (machine_mode, rtx *, bool); +extern bool rs6000_is_valid_2insn_and (rtx, machine_mode); +extern void rs6000_emit_2insn_and (machine_mode, rtx *, bool, int); +extern int registers_ok_for_quad_peep (rtx, rtx); +extern int mems_ok_for_quad_peep (rtx, rtx); +extern bool gpr_or_gpr_p (rtx, rtx); +extern bool direct_move_p (rtx, rtx); +extern bool quad_address_p (rtx, machine_mode, bool); +extern bool quad_load_store_p (rtx, rtx); +extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx); +extern void expand_fusion_gpr_load (rtx *); +extern void emit_fusion_addis (rtx, rtx, const char *, const char *); +extern void emit_fusion_load_store (rtx, rtx, rtx, const char *); +extern const char *emit_fusion_gpr_load (rtx, rtx); +extern bool fusion_p9_p (rtx, rtx, rtx, rtx); +extern void expand_fusion_p9_load (rtx *); +extern void expand_fusion_p9_store (rtx *); +extern const char *emit_fusion_p9_load (rtx, rtx, rtx); +extern const char *emit_fusion_p9_store (rtx, rtx, rtx); +extern rtx fusion_wrap_memory_address (rtx); +extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, + enum reg_class); +extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, + machine_mode, + rtx); +extern bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, + enum reg_class, + machine_mode); +extern bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode, + machine_mode, + enum reg_class); +extern void rs6000_secondary_reload_inner (rtx, rtx, rtx, bool); +extern void rs6000_secondary_reload_gpr (rtx, rtx, rtx, bool); +extern int paired_emit_vector_cond_expr (rtx, rtx, rtx, + rtx, rtx, rtx); +extern void paired_expand_vector_move (rtx operands[]); + + +extern int ccr_bit (rtx, int); +extern void rs6000_output_function_entry (FILE *, const char *); +extern void print_operand (FILE *, rtx, int); +extern void print_operand_address (FILE *, rtx); +extern enum rtx_code rs6000_reverse_condition (machine_mode, + enum rtx_code); +extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx); +extern void rs6000_emit_sISEL (machine_mode, rtx[]); +extern void rs6000_emit_sCOND (machine_mode, rtx[]); +extern void rs6000_emit_cbranch (machine_mode, rtx[]); +extern char * output_cbranch (rtx, const char *, int, rtx_insn *); +extern char * output_e500_flip_gt_bit (rtx, rtx); +extern const char * output_probe_stack_range (rtx, rtx); +extern bool rs6000_emit_set_const (rtx, rtx); +extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); +extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); +extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx); +extern void rs6000_split_signbit (rtx, rtx); +extern void rs6000_expand_atomic_compare_and_swap (rtx op[]); +extern void rs6000_expand_atomic_exchange (rtx op[]); +extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool); +extern void rs6000_emit_swsqrt (rtx, rtx, bool); +extern void output_toc (FILE *, rtx, int, machine_mode); +extern rtx rs6000_longcall_ref (rtx); +extern void rs6000_fatal_bad_address (rtx); +extern rtx create_TOC_reference (rtx, rtx); +extern void rs6000_split_multireg_move (rtx, rtx); +extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode); +extern bool valid_sf_si_move (rtx, rtx, machine_mode); +extern void rs6000_emit_move (rtx, rtx, machine_mode); +extern rtx rs6000_secondary_memory_needed_rtx (machine_mode); +extern machine_mode rs6000_secondary_memory_needed_mode (machine_mode); +extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, + int, int, int, int *); +extern bool rs6000_legitimate_offset_address_p (machine_mode, rtx, + bool, bool); +extern rtx rs6000_find_base_term (rtx); +extern rtx rs6000_return_addr (int, rtx); +extern void rs6000_output_symbol_ref (FILE*, rtx); +extern HOST_WIDE_INT rs6000_initial_elimination_offset (int, int); +extern void rs6000_emit_popcount (rtx, rtx); +extern void rs6000_emit_parity (rtx, rtx); + +extern rtx rs6000_machopic_legitimize_pic_address (rtx, machine_mode, + rtx); +extern rtx rs6000_address_for_fpconvert (rtx); +extern rtx rs6000_address_for_altivec (rtx); +extern rtx rs6000_allocate_stack_temp (machine_mode, bool, bool); +extern int rs6000_loop_align (rtx); +extern void rs6000_split_logical (rtx [], enum rtx_code, bool, bool, bool); +#endif /* RTX_CODE */ + +#ifdef TREE_CODE +extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align); +extern bool rs6000_special_adjust_field_align_p (tree, unsigned int); +extern unsigned int rs6000_special_round_type_align (tree, unsigned int, + unsigned int); +extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int, + unsigned int); +extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *); +extern rtx rs6000_libcall_value (machine_mode); +extern rtx rs6000_va_arg (tree, tree); +extern int function_ok_for_sibcall (tree); +extern int rs6000_reg_parm_stack_space (tree, bool); +extern void rs6000_asm_weaken_decl (FILE *, tree, const char *, const char *); +extern void rs6000_xcoff_declare_function_name (FILE *, const char *, tree); +extern void rs6000_xcoff_declare_object_name (FILE *, const char *, tree); +extern void rs6000_xcoff_asm_output_aligned_decl_common (FILE *, tree, + const char *, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); +extern void rs6000_elf_declare_function_name (FILE *, const char *, tree); +extern bool rs6000_elf_in_small_data_p (const_tree); +#ifdef ARGS_SIZE_RTX +/* expr.h defines ARGS_SIZE_RTX and `enum direction' */ +extern enum direction function_arg_padding (machine_mode, const_tree); +#endif /* ARGS_SIZE_RTX */ + +#endif /* TREE_CODE */ + +extern int direct_return (void); +extern int first_reg_to_save (void); +extern int first_fp_reg_to_save (void); +extern void output_ascii (FILE *, const char *, int); +extern void rs6000_gen_section_name (char **, const char *, const char *); +extern void output_function_profiler (FILE *, int); +extern void output_profile_hook (int); +extern int rs6000_trampoline_size (void); +extern alias_set_type get_TOC_alias_set (void); +extern void rs6000_emit_prologue (void); +extern void rs6000_emit_load_toc_table (int); +extern unsigned int rs6000_dbx_register_number (unsigned int, unsigned int); +extern void rs6000_emit_epilogue (int); +extern void rs6000_expand_split_stack_prologue (void); +extern void rs6000_split_stack_space_check (rtx, rtx); +extern void rs6000_emit_eh_reg_restore (rtx, rtx); +extern const char * output_isel (rtx *); +extern void rs6000_call_aix (rtx, rtx, rtx, rtx); +extern void rs6000_sibcall_aix (rtx, rtx, rtx, rtx); +extern void rs6000_aix_asm_output_dwarf_table_ref (char *); +extern void get_ppc476_thunk_name (char name[32]); +extern bool rs6000_overloaded_builtin_p (enum rs6000_builtins); +extern const char *rs6000_overloaded_builtin_name (enum rs6000_builtins); +extern int rs6000_store_data_bypass_p (rtx_insn *, rtx_insn *); +extern HOST_WIDE_INT rs6000_builtin_mask_calculate (void); +extern void rs6000_asm_output_dwarf_pcrel (FILE *file, int size, + const char *label); +extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size, + const char *label); + +/* Declare functions in rs6000-c.c */ + +extern void rs6000_pragma_longcall (struct cpp_reader *); +extern void rs6000_cpu_cpp_builtins (struct cpp_reader *); +#ifdef TREE_CODE +extern bool rs6000_pragma_target_parse (tree, tree); +#endif +extern void rs6000_target_modify_macros (bool, HOST_WIDE_INT, HOST_WIDE_INT); +extern void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, + HOST_WIDE_INT); + +#if TARGET_MACHO +char *output_call (rtx_insn *, rtx *, int, int); +#endif + +#ifdef NO_DOLLAR_IN_LABEL +const char * rs6000_xcoff_strip_dollar (const char *); +#endif + +void rs6000_final_prescan_insn (rtx_insn *, rtx *operand, int num_operands); + +extern bool rs6000_hard_regno_mode_ok_p[][FIRST_PSEUDO_REGISTER]; +extern unsigned char rs6000_class_max_nregs[][LIM_REG_CLASSES]; +extern unsigned char rs6000_hard_regno_nregs[][FIRST_PSEUDO_REGISTER]; + +extern bool rs6000_linux_float_exceptions_rounding_supported_p (void); + +/* Pass management. */ +namespace gcc { class context; } +class rtl_opt_pass; + +extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *); + +#endif /* rs6000-protos.h */ diff --git a/gcc/config/powerpcspe/powerpcspe-tables.opt b/gcc/config/powerpcspe/powerpcspe-tables.opt new file mode 100644 index 000000000000..2ae74f4f4000 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe-tables.opt @@ -0,0 +1,196 @@ +; -*- buffer-read-only: t -*- +; Generated automatically by genopt.sh from powerpcspe-cpus.def. + +; Copyright (C) 2011-2017 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +Enum +Name(rs6000_cpu_opt_value) Type(int) +Known CPUs (for use with the -mcpu= and -mtune= options): + +EnumValue +Enum(rs6000_cpu_opt_value) String(native) Value(RS6000_CPU_OPTION_NATIVE) DriverOnly + +EnumValue +Enum(rs6000_cpu_opt_value) String(401) Value(0) + +EnumValue +Enum(rs6000_cpu_opt_value) String(403) Value(1) + +EnumValue +Enum(rs6000_cpu_opt_value) String(405) Value(2) + +EnumValue +Enum(rs6000_cpu_opt_value) String(405fp) Value(3) + +EnumValue +Enum(rs6000_cpu_opt_value) String(440) Value(4) + +EnumValue +Enum(rs6000_cpu_opt_value) String(440fp) Value(5) + +EnumValue +Enum(rs6000_cpu_opt_value) String(464) Value(6) + +EnumValue +Enum(rs6000_cpu_opt_value) String(464fp) Value(7) + +EnumValue +Enum(rs6000_cpu_opt_value) String(476) Value(8) + +EnumValue +Enum(rs6000_cpu_opt_value) String(476fp) Value(9) + +EnumValue +Enum(rs6000_cpu_opt_value) String(505) Value(10) + +EnumValue +Enum(rs6000_cpu_opt_value) String(601) Value(11) + +EnumValue +Enum(rs6000_cpu_opt_value) String(602) Value(12) + +EnumValue +Enum(rs6000_cpu_opt_value) String(603) Value(13) + +EnumValue +Enum(rs6000_cpu_opt_value) String(603e) Value(14) + +EnumValue +Enum(rs6000_cpu_opt_value) String(604) Value(15) + +EnumValue +Enum(rs6000_cpu_opt_value) String(604e) Value(16) + +EnumValue +Enum(rs6000_cpu_opt_value) String(620) Value(17) + +EnumValue +Enum(rs6000_cpu_opt_value) String(630) Value(18) + +EnumValue +Enum(rs6000_cpu_opt_value) String(740) Value(19) + +EnumValue +Enum(rs6000_cpu_opt_value) String(7400) Value(20) + +EnumValue +Enum(rs6000_cpu_opt_value) String(7450) Value(21) + +EnumValue +Enum(rs6000_cpu_opt_value) String(750) Value(22) + +EnumValue +Enum(rs6000_cpu_opt_value) String(801) Value(23) + +EnumValue +Enum(rs6000_cpu_opt_value) String(821) Value(24) + +EnumValue +Enum(rs6000_cpu_opt_value) String(823) Value(25) + +EnumValue +Enum(rs6000_cpu_opt_value) String(8540) Value(26) + +EnumValue +Enum(rs6000_cpu_opt_value) String(8548) Value(27) + +EnumValue +Enum(rs6000_cpu_opt_value) String(a2) Value(28) + +EnumValue +Enum(rs6000_cpu_opt_value) String(e300c2) Value(29) + +EnumValue +Enum(rs6000_cpu_opt_value) String(e300c3) Value(30) + +EnumValue +Enum(rs6000_cpu_opt_value) String(e500mc) Value(31) + +EnumValue +Enum(rs6000_cpu_opt_value) String(e500mc64) Value(32) + +EnumValue +Enum(rs6000_cpu_opt_value) String(e5500) Value(33) + +EnumValue +Enum(rs6000_cpu_opt_value) String(e6500) Value(34) + +EnumValue +Enum(rs6000_cpu_opt_value) String(860) Value(35) + +EnumValue +Enum(rs6000_cpu_opt_value) String(970) Value(36) + +EnumValue +Enum(rs6000_cpu_opt_value) String(cell) Value(37) + +EnumValue +Enum(rs6000_cpu_opt_value) String(ec603e) Value(38) + +EnumValue +Enum(rs6000_cpu_opt_value) String(G3) Value(39) + +EnumValue +Enum(rs6000_cpu_opt_value) String(G4) Value(40) + +EnumValue +Enum(rs6000_cpu_opt_value) String(G5) Value(41) + +EnumValue +Enum(rs6000_cpu_opt_value) String(titan) Value(42) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power3) Value(43) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power4) Value(44) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power5) Value(45) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power5+) Value(46) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power6) Value(47) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power6x) Value(48) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power7) Value(49) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power8) Value(50) + +EnumValue +Enum(rs6000_cpu_opt_value) String(power9) Value(51) + +EnumValue +Enum(rs6000_cpu_opt_value) String(powerpc) Value(52) + +EnumValue +Enum(rs6000_cpu_opt_value) String(powerpc64) Value(53) + +EnumValue +Enum(rs6000_cpu_opt_value) String(powerpc64le) Value(54) + +EnumValue +Enum(rs6000_cpu_opt_value) String(rs64) Value(55) + diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c new file mode 100644 index 000000000000..73d608fd8052 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe.c @@ -0,0 +1,43668 @@ +/* Subroutines used for code generation on IBM RS/6000. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "gimple.h" +#include "cfghooks.h" +#include "cfgloop.h" +#include "df.h" +#include "tm_p.h" +#include "stringpool.h" +#include "expmed.h" +#include "optabs.h" +#include "regs.h" +#include "ira.h" +#include "recog.h" +#include "cgraph.h" +#include "diagnostic-core.h" +#include "insn-attr.h" +#include "flags.h" +#include "alias.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "calls.h" +#include "print-tree.h" +#include "varasm.h" +#include "explow.h" +#include "expr.h" +#include "output.h" +#include "dbxout.h" +#include "common/common-target.h" +#include "langhooks.h" +#include "reload.h" +#include "sched-int.h" +#include "gimplify.h" +#include "gimple-fold.h" +#include "gimple-iterator.h" +#include "gimple-ssa.h" +#include "gimple-walk.h" +#include "intl.h" +#include "params.h" +#include "tm-constrs.h" +#include "tree-vectorizer.h" +#include "target-globals.h" +#include "builtins.h" +#include "context.h" +#include "tree-pass.h" +#include "except.h" +#if TARGET_XCOFF +#include "xcoffout.h" /* get declarations of xcoff_*_section_name */ +#endif +#if TARGET_MACHO +#include "gstab.h" /* for N_SLINE */ +#endif +#include "case-cfn-macros.h" +#include "ppc-auxv.h" + +/* This file should be included last. */ +#include "target-def.h" + +#ifndef TARGET_NO_PROTOTYPE +#define TARGET_NO_PROTOTYPE 0 +#endif + +#define min(A,B) ((A) < (B) ? (A) : (B)) +#define max(A,B) ((A) > (B) ? (A) : (B)) + +/* Structure used to define the rs6000 stack */ +typedef struct rs6000_stack { + int reload_completed; /* stack info won't change from here on */ + int first_gp_reg_save; /* first callee saved GP register used */ + int first_fp_reg_save; /* first callee saved FP register used */ + int first_altivec_reg_save; /* first callee saved AltiVec register used */ + int lr_save_p; /* true if the link reg needs to be saved */ + int cr_save_p; /* true if the CR reg needs to be saved */ + unsigned int vrsave_mask; /* mask of vec registers to save */ + int push_p; /* true if we need to allocate stack space */ + int calls_p; /* true if the function makes any calls */ + int world_save_p; /* true if we're saving *everything*: + r13-r31, cr, f14-f31, vrsave, v20-v31 */ + enum rs6000_abi abi; /* which ABI to use */ + int gp_save_offset; /* offset to save GP regs from initial SP */ + int fp_save_offset; /* offset to save FP regs from initial SP */ + int altivec_save_offset; /* offset to save AltiVec regs from initial SP */ + int lr_save_offset; /* offset to save LR from initial SP */ + int cr_save_offset; /* offset to save CR from initial SP */ + int vrsave_save_offset; /* offset to save VRSAVE from initial SP */ + int spe_gp_save_offset; /* offset to save spe 64-bit gprs */ + int varargs_save_offset; /* offset to save the varargs registers */ + int ehrd_offset; /* offset to EH return data */ + int ehcr_offset; /* offset to EH CR field data */ + int reg_size; /* register size (4 or 8) */ + HOST_WIDE_INT vars_size; /* variable save area size */ + int parm_size; /* outgoing parameter size */ + int save_size; /* save area size */ + int fixed_size; /* fixed size of stack frame */ + int gp_size; /* size of saved GP registers */ + int fp_size; /* size of saved FP registers */ + int altivec_size; /* size of saved AltiVec registers */ + int cr_size; /* size to hold CR if not in fixed area */ + int vrsave_size; /* size to hold VRSAVE */ + int altivec_padding_size; /* size of altivec alignment padding */ + int spe_gp_size; /* size of 64-bit GPR save size for SPE */ + int spe_padding_size; + HOST_WIDE_INT total_size; /* total bytes allocated for stack */ + int spe_64bit_regs_used; + int savres_strategy; +} rs6000_stack_t; + +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +typedef struct GTY(()) machine_function +{ + /* Whether the instruction chain has been scanned already. */ + int spe_insn_chain_scanned_p; + /* Flags if __builtin_return_address (n) with n >= 1 was used. */ + int ra_needs_full_frame; + /* Flags if __builtin_return_address (0) was used. */ + int ra_need_lr; + /* Cache lr_save_p after expansion of builtin_eh_return. */ + int lr_save_state; + /* Whether we need to save the TOC to the reserved stack location in the + function prologue. */ + bool save_toc_in_prologue; + /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4 + varargs save area. */ + HOST_WIDE_INT varargs_save_offset; + /* Temporary stack slot to use for SDmode copies. This slot is + 64-bits wide and is allocated early enough so that the offset + does not overflow the 16-bit load/store offset field. */ + rtx sdmode_stack_slot; + /* Alternative internal arg pointer for -fsplit-stack. */ + rtx split_stack_arg_pointer; + bool split_stack_argp_used; + /* Flag if r2 setup is needed with ELFv2 ABI. */ + bool r2_setup_needed; + /* The number of components we use for separate shrink-wrapping. */ + int n_components; + /* The components already handled by separate shrink-wrapping, which should + not be considered by the prologue and epilogue. */ + bool gpr_is_wrapped_separately[32]; + bool fpr_is_wrapped_separately[32]; + bool lr_is_wrapped_separately; +} machine_function; + +/* Support targetm.vectorize.builtin_mask_for_load. */ +static GTY(()) tree altivec_builtin_mask_for_load; + +/* Set to nonzero once AIX common-mode calls have been defined. */ +static GTY(()) int common_mode_defined; + +/* Label number of label created for -mrelocatable, to call to so we can + get the address of the GOT section */ +static int rs6000_pic_labelno; + +#ifdef USING_ELFOS_H +/* Counter for labels which are to be placed in .fixup. */ +int fixuplabelno = 0; +#endif + +/* Whether to use variant of AIX ABI for PowerPC64 Linux. */ +int dot_symbols; + +/* Specify the machine mode that pointers have. After generation of rtl, the + compiler makes no further distinction between pointers and any other objects + of this machine mode. The type is unsigned since not all things that + include powerpcspe.h also include machmode.h. */ +unsigned rs6000_pmode; + +/* Width in bits of a pointer. */ +unsigned rs6000_pointer_size; + +#ifdef HAVE_AS_GNU_ATTRIBUTE +# ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE +# define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0 +# endif +/* Flag whether floating point values have been passed/returned. + Note that this doesn't say whether fprs are used, since the + Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls + should be set for soft-float values passed in gprs and ieee128 + values passed in vsx registers. */ +static bool rs6000_passes_float; +static bool rs6000_passes_long_double; +/* Flag whether vector values have been passed/returned. */ +static bool rs6000_passes_vector; +/* Flag whether small (<= 8 byte) structures have been returned. */ +static bool rs6000_returns_struct; +#endif + +/* Value is TRUE if register/mode pair is acceptable. */ +bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; + +/* Maximum number of registers needed for a given register class and mode. */ +unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES]; + +/* How many registers are needed for a given register and mode. */ +unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; + +/* Map register number to register class. */ +enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; + +static int dbg_cost_ctrl; + +/* Built in types. */ +tree rs6000_builtin_types[RS6000_BTI_MAX]; +tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT]; + +/* Flag to say the TOC is initialized */ +int toc_initialized, need_toc_init; +char toc_label_name[10]; + +/* Cached value of rs6000_variable_issue. This is cached in + rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ +static short cached_can_issue_more; + +static GTY(()) section *read_only_data_section; +static GTY(()) section *private_data_section; +static GTY(()) section *tls_data_section; +static GTY(()) section *tls_private_data_section; +static GTY(()) section *read_only_private_data_section; +static GTY(()) section *sdata2_section; +static GTY(()) section *toc_section; + +struct builtin_description +{ + const HOST_WIDE_INT mask; + const enum insn_code icode; + const char *const name; + const enum rs6000_builtins code; +}; + +/* Describe the vector unit used for modes. */ +enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES]; +enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES]; + +/* Register classes for various constraints that are based on the target + switches. */ +enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; + +/* Describe the alignment of a vector. */ +int rs6000_vector_align[NUM_MACHINE_MODES]; + +/* Map selected modes to types for builtins. */ +static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; + +/* What modes to automatically generate reciprocal divide estimate (fre) and + reciprocal sqrt (frsqrte) for. */ +unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; + +/* Masks to determine which reciprocal esitmate instructions to generate + automatically. */ +enum rs6000_recip_mask { + RECIP_SF_DIV = 0x001, /* Use divide estimate */ + RECIP_DF_DIV = 0x002, + RECIP_V4SF_DIV = 0x004, + RECIP_V2DF_DIV = 0x008, + + RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */ + RECIP_DF_RSQRT = 0x020, + RECIP_V4SF_RSQRT = 0x040, + RECIP_V2DF_RSQRT = 0x080, + + /* Various combination of flags for -mrecip=xxx. */ + RECIP_NONE = 0, + RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV + | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT + | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT), + + RECIP_HIGH_PRECISION = RECIP_ALL, + + /* On low precision machines like the power5, don't enable double precision + reciprocal square root estimate, since it isn't accurate enough. */ + RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT)) +}; + +/* -mrecip options. */ +static struct +{ + const char *string; /* option name */ + unsigned int mask; /* mask bits to set */ +} recip_options[] = { + { "all", RECIP_ALL }, + { "none", RECIP_NONE }, + { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV + | RECIP_V2DF_DIV) }, + { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) }, + { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) }, + { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT + | RECIP_V2DF_RSQRT) }, + { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) }, + { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, +}; + +/* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */ +static const struct +{ + const char *cpu; + unsigned int cpuid; +} cpu_is_info[] = { + { "power9", PPC_PLATFORM_POWER9 }, + { "power8", PPC_PLATFORM_POWER8 }, + { "power7", PPC_PLATFORM_POWER7 }, + { "power6x", PPC_PLATFORM_POWER6X }, + { "power6", PPC_PLATFORM_POWER6 }, + { "power5+", PPC_PLATFORM_POWER5_PLUS }, + { "power5", PPC_PLATFORM_POWER5 }, + { "ppc970", PPC_PLATFORM_PPC970 }, + { "power4", PPC_PLATFORM_POWER4 }, + { "ppca2", PPC_PLATFORM_PPCA2 }, + { "ppc476", PPC_PLATFORM_PPC476 }, + { "ppc464", PPC_PLATFORM_PPC464 }, + { "ppc440", PPC_PLATFORM_PPC440 }, + { "ppc405", PPC_PLATFORM_PPC405 }, + { "ppc-cell-be", PPC_PLATFORM_CELL_BE } +}; + +/* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */ +static const struct +{ + const char *hwcap; + int mask; + unsigned int id; +} cpu_supports_info[] = { + /* AT_HWCAP masks. */ + { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 }, + { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 }, + { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 }, + { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 }, + { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 }, + { "booke", PPC_FEATURE_BOOKE, 0 }, + { "cellbe", PPC_FEATURE_CELL_BE, 0 }, + { "dfp", PPC_FEATURE_HAS_DFP, 0 }, + { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 }, + { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 }, + { "fpu", PPC_FEATURE_HAS_FPU, 0 }, + { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 }, + { "mmu", PPC_FEATURE_HAS_MMU, 0 }, + { "notb", PPC_FEATURE_NO_TB, 0 }, + { "pa6t", PPC_FEATURE_PA6T, 0 }, + { "power4", PPC_FEATURE_POWER4, 0 }, + { "power5", PPC_FEATURE_POWER5, 0 }, + { "power5+", PPC_FEATURE_POWER5_PLUS, 0 }, + { "power6x", PPC_FEATURE_POWER6_EXT, 0 }, + { "ppc32", PPC_FEATURE_32, 0 }, + { "ppc601", PPC_FEATURE_601_INSTR, 0 }, + { "ppc64", PPC_FEATURE_64, 0 }, + { "ppcle", PPC_FEATURE_PPC_LE, 0 }, + { "smt", PPC_FEATURE_SMT, 0 }, + { "spe", PPC_FEATURE_HAS_SPE, 0 }, + { "true_le", PPC_FEATURE_TRUE_LE, 0 }, + { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 }, + { "vsx", PPC_FEATURE_HAS_VSX, 0 }, + + /* AT_HWCAP2 masks. */ + { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 }, + { "dscr", PPC_FEATURE2_HAS_DSCR, 1 }, + { "ebb", PPC_FEATURE2_HAS_EBB, 1 }, + { "htm", PPC_FEATURE2_HAS_HTM, 1 }, + { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 }, + { "isel", PPC_FEATURE2_HAS_ISEL, 1 }, + { "tar", PPC_FEATURE2_HAS_TAR, 1 }, + { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, + { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 }, + { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 } +}; + +/* Newer LIBCs explicitly export this symbol to declare that they provide + the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a + reference to this symbol whenever we expand a CPU builtin, so that + we never link against an old LIBC. */ +const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform"; + +/* True if we have expanded a CPU builtin. */ +bool cpu_builtin_p; + +/* Pointer to function (in powerpcspe-c.c) that can define or undefine target + macros that have changed. Languages that don't support the preprocessor + don't link in powerpcspe-c.c, so we can't call it directly. */ +void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); + +/* Simplfy register classes into simpler classifications. We assume + GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range + check for standard register classes (gpr/floating/altivec/vsx) and + floating/vector classes (float/altivec/vsx). */ + +enum rs6000_reg_type { + NO_REG_TYPE, + PSEUDO_REG_TYPE, + GPR_REG_TYPE, + VSX_REG_TYPE, + ALTIVEC_REG_TYPE, + FPR_REG_TYPE, + SPR_REG_TYPE, + CR_REG_TYPE, + SPE_ACC_TYPE, + SPEFSCR_REG_TYPE +}; + +/* Map register class to register type. */ +static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; + +/* First/last register type for the 'normal' register types (i.e. general + purpose, floating point, altivec, and VSX registers). */ +#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) + +#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) + + +/* Register classes we care about in secondary reload or go if legitimate + address. We only need to worry about GPR, FPR, and Altivec registers here, + along an ANY field that is the OR of the 3 register classes. */ + +enum rs6000_reload_reg_type { + RELOAD_REG_GPR, /* General purpose registers. */ + RELOAD_REG_FPR, /* Traditional floating point regs. */ + RELOAD_REG_VMX, /* Altivec (VMX) registers. */ + RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ + N_RELOAD_REG +}; + +/* For setting up register classes, loop through the 3 register classes mapping + into real registers, and skip the ANY class, which is just an OR of the + bits. */ +#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR +#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX + +/* Map reload register type to a register in the register class. */ +struct reload_reg_map_type { + const char *name; /* Register class name. */ + int reg; /* Register in the register class. */ +}; + +static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { + { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ + { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ + { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ + { "Any", -1 }, /* RELOAD_REG_ANY. */ +}; + +/* Mask bits for each register class, indexed per mode. Historically the + compiler has been more restrictive which types can do PRE_MODIFY instead of + PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ +typedef unsigned char addr_mask_type; + +#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ +#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ +#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ +#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ +#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ +#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ +#define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ +#define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ + +/* Register type masks based on the type, of valid addressing modes. */ +struct rs6000_reg_addr { + enum insn_code reload_load; /* INSN to reload for loading. */ + enum insn_code reload_store; /* INSN to reload for storing. */ + enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ + enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ + enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ + enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */ + /* INSNs for fusing addi with loads + or stores for each reg. class. */ + enum insn_code fusion_addi_ld[(int)N_RELOAD_REG]; + enum insn_code fusion_addi_st[(int)N_RELOAD_REG]; + /* INSNs for fusing addis with loads + or stores for each reg. class. */ + enum insn_code fusion_addis_ld[(int)N_RELOAD_REG]; + enum insn_code fusion_addis_st[(int)N_RELOAD_REG]; + addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ + bool scalar_in_vmx_p; /* Scalar value can go in VMX. */ + bool fused_toc; /* Mode supports TOC fusion. */ +}; + +static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; + +/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ +static inline bool +mode_supports_pre_incdec_p (machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) + != 0); +} + +/* Helper function to say whether a mode supports PRE_MODIFY. */ +static inline bool +mode_supports_pre_modify_p (machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) + != 0); +} + +/* Given that there exists at least one variable that is set (produced) + by OUT_INSN and read (consumed) by IN_INSN, return true iff + IN_INSN represents one or more memory store operations and none of + the variables set by OUT_INSN is used by IN_INSN as the address of a + store operation. If either IN_INSN or OUT_INSN does not represent + a "single" RTL SET expression (as loosely defined by the + implementation of the single_set function) or a PARALLEL with only + SETs, CLOBBERs, and USEs inside, this function returns false. + + This rs6000-specific version of store_data_bypass_p checks for + certain conditions that result in assertion failures (and internal + compiler errors) in the generic store_data_bypass_p function and + returns false rather than calling store_data_bypass_p if one of the + problematic conditions is detected. */ + +int +rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) +{ + rtx out_set, in_set; + rtx out_pat, in_pat; + rtx out_exp, in_exp; + int i, j; + + in_set = single_set (in_insn); + if (in_set) + { + if (MEM_P (SET_DEST (in_set))) + { + out_set = single_set (out_insn); + if (!out_set) + { + out_pat = PATTERN (out_insn); + if (GET_CODE (out_pat) == PARALLEL) + { + for (i = 0; i < XVECLEN (out_pat, 0); i++) + { + out_exp = XVECEXP (out_pat, 0, i); + if ((GET_CODE (out_exp) == CLOBBER) + || (GET_CODE (out_exp) == USE)) + continue; + else if (GET_CODE (out_exp) != SET) + return false; + } + } + } + } + } + else + { + in_pat = PATTERN (in_insn); + if (GET_CODE (in_pat) != PARALLEL) + return false; + + for (i = 0; i < XVECLEN (in_pat, 0); i++) + { + in_exp = XVECEXP (in_pat, 0, i); + if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) + continue; + else if (GET_CODE (in_exp) != SET) + return false; + + if (MEM_P (SET_DEST (in_exp))) + { + out_set = single_set (out_insn); + if (!out_set) + { + out_pat = PATTERN (out_insn); + if (GET_CODE (out_pat) != PARALLEL) + return false; + for (j = 0; j < XVECLEN (out_pat, 0); j++) + { + out_exp = XVECEXP (out_pat, 0, j); + if ((GET_CODE (out_exp) == CLOBBER) + || (GET_CODE (out_exp) == USE)) + continue; + else if (GET_CODE (out_exp) != SET) + return false; + } + } + } + } + } + return store_data_bypass_p (out_insn, in_insn); +} + +/* Return true if we have D-form addressing in altivec registers. */ +static inline bool +mode_supports_vmx_dform (machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0); +} + +/* Return true if we have D-form addressing in VSX registers. This addressing + is more limited than normal d-form addressing in that the offset must be + aligned on a 16-byte boundary. */ +static inline bool +mode_supports_vsx_dform_quad (machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET) + != 0); +} + + +/* Target cpu costs. */ + +struct processor_costs { + const int mulsi; /* cost of SImode multiplication. */ + const int mulsi_const; /* cost of SImode multiplication by constant. */ + const int mulsi_const9; /* cost of SImode mult by short constant. */ + const int muldi; /* cost of DImode multiplication. */ + const int divsi; /* cost of SImode division. */ + const int divdi; /* cost of DImode division. */ + const int fp; /* cost of simple SFmode and DFmode insns. */ + const int dmul; /* cost of DFmode multiplication (and fmadd). */ + const int sdiv; /* cost of SFmode division (fdivs). */ + const int ddiv; /* cost of DFmode division (fdiv). */ + const int cache_line_size; /* cache line size in bytes. */ + const int l1_cache_size; /* size of l1 cache, in kilobytes. */ + const int l2_cache_size; /* size of l2 cache, in kilobytes. */ + const int simultaneous_prefetches; /* number of parallel prefetch + operations. */ + const int sfdf_convert; /* cost of SF->DF conversion. */ +}; + +const struct processor_costs *rs6000_cost; + +/* Processor costs (relative to an add) */ + +/* Instruction size costs on 32bit processors. */ +static const +struct processor_costs size32_cost = { + COSTS_N_INSNS (1), /* mulsi */ + COSTS_N_INSNS (1), /* mulsi_const */ + COSTS_N_INSNS (1), /* mulsi_const9 */ + COSTS_N_INSNS (1), /* muldi */ + COSTS_N_INSNS (1), /* divsi */ + COSTS_N_INSNS (1), /* divdi */ + COSTS_N_INSNS (1), /* fp */ + COSTS_N_INSNS (1), /* dmul */ + COSTS_N_INSNS (1), /* sdiv */ + COSTS_N_INSNS (1), /* ddiv */ + 32, /* cache line size */ + 0, /* l1 cache */ + 0, /* l2 cache */ + 0, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction size costs on 64bit processors. */ +static const +struct processor_costs size64_cost = { + COSTS_N_INSNS (1), /* mulsi */ + COSTS_N_INSNS (1), /* mulsi_const */ + COSTS_N_INSNS (1), /* mulsi_const9 */ + COSTS_N_INSNS (1), /* muldi */ + COSTS_N_INSNS (1), /* divsi */ + COSTS_N_INSNS (1), /* divdi */ + COSTS_N_INSNS (1), /* fp */ + COSTS_N_INSNS (1), /* dmul */ + COSTS_N_INSNS (1), /* sdiv */ + COSTS_N_INSNS (1), /* ddiv */ + 128, /* cache line size */ + 0, /* l1 cache */ + 0, /* l2 cache */ + 0, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on RS64A processors. */ +static const +struct processor_costs rs64a_cost = { + COSTS_N_INSNS (20), /* mulsi */ + COSTS_N_INSNS (12), /* mulsi_const */ + COSTS_N_INSNS (8), /* mulsi_const9 */ + COSTS_N_INSNS (34), /* muldi */ + COSTS_N_INSNS (65), /* divsi */ + COSTS_N_INSNS (67), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (31), /* sdiv */ + COSTS_N_INSNS (31), /* ddiv */ + 128, /* cache line size */ + 128, /* l1 cache */ + 2048, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on MPCCORE processors. */ +static const +struct processor_costs mpccore_cost = { + COSTS_N_INSNS (2), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (2), /* muldi */ + COSTS_N_INSNS (6), /* divsi */ + COSTS_N_INSNS (6), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (10), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 32, /* cache line size */ + 4, /* l1 cache */ + 16, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC403 processors. */ +static const +struct processor_costs ppc403_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (33), /* divsi */ + COSTS_N_INSNS (33), /* divdi */ + COSTS_N_INSNS (11), /* fp */ + COSTS_N_INSNS (11), /* dmul */ + COSTS_N_INSNS (11), /* sdiv */ + COSTS_N_INSNS (11), /* ddiv */ + 32, /* cache line size */ + 4, /* l1 cache */ + 16, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC405 processors. */ +static const +struct processor_costs ppc405_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (35), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (11), /* fp */ + COSTS_N_INSNS (11), /* dmul */ + COSTS_N_INSNS (11), /* sdiv */ + COSTS_N_INSNS (11), /* ddiv */ + 32, /* cache line size */ + 16, /* l1 cache */ + 128, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC440 processors. */ +static const +struct processor_costs ppc440_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (34), /* divsi */ + COSTS_N_INSNS (34), /* divdi */ + COSTS_N_INSNS (5), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (19), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC476 processors. */ +static const +struct processor_costs ppc476_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (11), /* divsi */ + COSTS_N_INSNS (11), /* divdi */ + COSTS_N_INSNS (6), /* fp */ + COSTS_N_INSNS (6), /* dmul */ + COSTS_N_INSNS (19), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, /* l1 cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC601 processors. */ +static const +struct processor_costs ppc601_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (5), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (36), /* divsi */ + COSTS_N_INSNS (36), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (31), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC603 processors. */ +static const +struct processor_costs ppc603_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (37), /* divsi */ + COSTS_N_INSNS (37), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, /* cache line size */ + 8, /* l1 cache */ + 64, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC604 processors. */ +static const +struct processor_costs ppc604_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (20), /* divsi */ + COSTS_N_INSNS (20), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (32), /* ddiv */ + 32, /* cache line size */ + 16, /* l1 cache */ + 512, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC604e processors. */ +static const +struct processor_costs ppc604e_cost = { + COSTS_N_INSNS (2), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (2), /* muldi */ + COSTS_N_INSNS (20), /* divsi */ + COSTS_N_INSNS (20), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (32), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC620 processors. */ +static const +struct processor_costs ppc620_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (7), /* muldi */ + COSTS_N_INSNS (21), /* divsi */ + COSTS_N_INSNS (37), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (32), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC630 processors. */ +static const +struct processor_costs ppc630_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (7), /* muldi */ + COSTS_N_INSNS (21), /* divsi */ + COSTS_N_INSNS (37), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (21), /* ddiv */ + 128, /* cache line size */ + 64, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on Cell processor. */ +/* COSTS_N_INSNS (1) ~ one add. */ +static const +struct processor_costs ppccell_cost = { + COSTS_N_INSNS (9/2)+2, /* mulsi */ + COSTS_N_INSNS (6/2), /* mulsi_const */ + COSTS_N_INSNS (6/2), /* mulsi_const9 */ + COSTS_N_INSNS (15/2)+2, /* muldi */ + COSTS_N_INSNS (38/2), /* divsi */ + COSTS_N_INSNS (70/2), /* divdi */ + COSTS_N_INSNS (10/2), /* fp */ + COSTS_N_INSNS (10/2), /* dmul */ + COSTS_N_INSNS (74/2), /* sdiv */ + COSTS_N_INSNS (74/2), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 6, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC750 and PPC7400 processors. */ +static const +struct processor_costs ppc750_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (17), /* divsi */ + COSTS_N_INSNS (17), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (31), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC7450 processors. */ +static const +struct processor_costs ppc7450_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (23), /* divsi */ + COSTS_N_INSNS (23), /* divdi */ + COSTS_N_INSNS (5), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (21), /* sdiv */ + COSTS_N_INSNS (35), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPC8540 processors. */ +static const +struct processor_costs ppc8540_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (19), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (29), /* sdiv */ + COSTS_N_INSNS (29), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on E300C2 and E300C3 cores. */ +static const +struct processor_costs ppce300c2c3_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (19), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, + 16, /* l1 cache */ + 16, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPCE500MC processors. */ +static const +struct processor_costs ppce500mc_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (8), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPCE500MC64 processors. */ +static const +struct processor_costs ppce500mc64_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPCE5500 processors. */ +static const +struct processor_costs ppce5500_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (7), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on PPCE6500 processors. */ +static const +struct processor_costs ppce6500_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (7), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on AppliedMicro Titan processors. */ +static const +struct processor_costs titan_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (5), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (18), /* divsi */ + COSTS_N_INSNS (18), /* divdi */ + COSTS_N_INSNS (10), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (46), /* sdiv */ + COSTS_N_INSNS (72), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on POWER4 and POWER5 processors. */ +static const +struct processor_costs power4_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (18), /* divsi */ + COSTS_N_INSNS (34), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 8, /* prefetch streams /*/ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on POWER6 processors. */ +static const +struct processor_costs power6_cost = { + COSTS_N_INSNS (8), /* mulsi */ + COSTS_N_INSNS (8), /* mulsi_const */ + COSTS_N_INSNS (8), /* mulsi_const9 */ + COSTS_N_INSNS (8), /* muldi */ + COSTS_N_INSNS (22), /* divsi */ + COSTS_N_INSNS (28), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (16), /* ddiv */ + 128, /* cache line size */ + 64, /* l1 cache */ + 2048, /* l2 cache */ + 16, /* prefetch streams */ + 0, /* SF->DF convert */ +}; + +/* Instruction costs on POWER7 processors. */ +static const +struct processor_costs power7_cost = { + COSTS_N_INSNS (2), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (2), /* muldi */ + COSTS_N_INSNS (18), /* divsi */ + COSTS_N_INSNS (34), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (16), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 12, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ +}; + +/* Instruction costs on POWER8 processors. */ +static const +struct processor_costs power8_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (14), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 12, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ +}; + +/* Instruction costs on POWER9 processors. */ +static const +struct processor_costs power9_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (8), /* divsi */ + COSTS_N_INSNS (12), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (18), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 8, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ +}; + +/* Instruction costs on POWER A2 processors. */ +static const +struct processor_costs ppca2_cost = { + COSTS_N_INSNS (16), /* mulsi */ + COSTS_N_INSNS (16), /* mulsi_const */ + COSTS_N_INSNS (16), /* mulsi_const9 */ + COSTS_N_INSNS (16), /* muldi */ + COSTS_N_INSNS (22), /* divsi */ + COSTS_N_INSNS (28), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (59), /* sdiv */ + COSTS_N_INSNS (72), /* ddiv */ + 64, + 16, /* l1 cache */ + 2048, /* l2 cache */ + 16, /* prefetch streams */ + 0, /* SF->DF convert */ +}; + + +/* Table that classifies rs6000 builtin functions (pure, const, etc.). */ +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + +struct rs6000_builtin_info_type { + const char *name; + const enum insn_code icode; + const HOST_WIDE_INT mask; + const unsigned attr; +}; + +static const struct rs6000_builtin_info_type rs6000_builtin_info[] = +{ +#include "powerpcspe-builtin.def" +}; + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +/* Support for -mveclibabi=<xxx> to control which vector library to use. */ +static tree (*rs6000_veclib_handler) (combined_fn, tree, tree); + + +static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool); +static bool spe_func_has_64bit_regs_p (void); +static struct machine_function * rs6000_init_machine_status (void); +static int rs6000_ra_ever_killed (void); +static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); +static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); +static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); +static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); +static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); +static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); +static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, + bool); +static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int, + unsigned int); +static bool is_microcoded_insn (rtx_insn *); +static bool is_nonpipeline_insn (rtx_insn *); +static bool is_cracked_insn (rtx_insn *); +static bool is_load_insn (rtx, rtx *); +static bool is_store_insn (rtx, rtx *); +static bool set_to_load_agen (rtx_insn *,rtx_insn *); +static bool insn_terminates_group_p (rtx_insn *, enum group_termination); +static bool insn_must_be_first_in_group (rtx_insn *); +static bool insn_must_be_last_in_group (rtx_insn *); +static void altivec_init_builtins (void); +static tree builtin_function_type (machine_mode, machine_mode, + machine_mode, machine_mode, + enum rs6000_builtins, const char *name); +static void rs6000_common_init_builtins (void); +static void paired_init_builtins (void); +static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx); +static void spe_init_builtins (void); +static void htm_init_builtins (void); +static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx); +static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx); +static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx); +static rs6000_stack_t *rs6000_stack_info (void); +static void is_altivec_return_reg (rtx, void *); +int easy_vector_constant (rtx, machine_mode); +static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode); +static rtx rs6000_legitimize_tls_address (rtx, enum tls_model); +static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree, + bool, bool); +#if TARGET_MACHO +static void macho_branch_islands (void); +#endif +static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int, + int, int *); +static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int, + int, int, int *); +static bool rs6000_mode_dependent_address (const_rtx); +static bool rs6000_debug_mode_dependent_address (const_rtx); +static enum reg_class rs6000_secondary_reload_class (enum reg_class, + machine_mode, rtx); +static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class, + machine_mode, + rtx); +static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class); +static enum reg_class rs6000_debug_preferred_reload_class (rtx, + enum reg_class); +static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class, + machine_mode); +static bool rs6000_debug_secondary_memory_needed (enum reg_class, + enum reg_class, + machine_mode); +static bool rs6000_cannot_change_mode_class (machine_mode, + machine_mode, + enum reg_class); +static bool rs6000_debug_cannot_change_mode_class (machine_mode, + machine_mode, + enum reg_class); +static bool rs6000_save_toc_in_prologue_p (void); +static rtx rs6000_internal_arg_pointer (void); + +rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int, + int, int *) + = rs6000_legitimize_reload_address; + +static bool (*rs6000_mode_dependent_address_ptr) (const_rtx) + = rs6000_mode_dependent_address; + +enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, + machine_mode, rtx) + = rs6000_secondary_reload_class; + +enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class) + = rs6000_preferred_reload_class; + +bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class, + machine_mode) + = rs6000_secondary_memory_needed; + +bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode, + machine_mode, + enum reg_class) + = rs6000_cannot_change_mode_class; + +const int INSN_NOT_AVAILABLE = -1; + +static void rs6000_print_isa_options (FILE *, int, const char *, + HOST_WIDE_INT); +static void rs6000_print_builtin_options (FILE *, int, const char *, + HOST_WIDE_INT); +static HOST_WIDE_INT rs6000_disable_incompatible_switches (void); + +static enum rs6000_reg_type register_to_reg_type (rtx, bool *); +static bool rs6000_secondary_reload_move (enum rs6000_reg_type, + enum rs6000_reg_type, + machine_mode, + secondary_reload_info *, + bool); +rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); +static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused)); +static tree rs6000_fold_builtin (tree, int, tree *, bool); + +/* Hash table stuff for keeping track of TOC entries. */ + +struct GTY((for_user)) toc_hash_struct +{ + /* `key' will satisfy CONSTANT_P; in fact, it will satisfy + ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */ + rtx key; + machine_mode key_mode; + int labelno; +}; + +struct toc_hasher : ggc_ptr_hash<toc_hash_struct> +{ + static hashval_t hash (toc_hash_struct *); + static bool equal (toc_hash_struct *, toc_hash_struct *); +}; + +static GTY (()) hash_table<toc_hasher> *toc_hash_table; + +/* Hash table to keep track of the argument types for builtin functions. */ + +struct GTY((for_user)) builtin_hash_struct +{ + tree type; + machine_mode mode[4]; /* return value + 3 arguments. */ + unsigned char uns_p[4]; /* and whether the types are unsigned. */ +}; + +struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct> +{ + static hashval_t hash (builtin_hash_struct *); + static bool equal (builtin_hash_struct *, builtin_hash_struct *); +}; + +static GTY (()) hash_table<builtin_hasher> *builtin_hash_table; + + +/* Default register names. */ +char rs6000_reg_names[][8] = +{ + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "mq", "lr", "ctr","ap", + "0", "1", "2", "3", "4", "5", "6", "7", + "ca", + /* AltiVec registers. */ + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "vrsave", "vscr", + /* SPE registers. */ + "spe_acc", "spefscr", + /* Soft frame pointer. */ + "sfp", + /* HTM SPR registers. */ + "tfhar", "tfiar", "texasr", + /* SPE High registers. */ + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31" +}; + +#ifdef TARGET_REGNAMES +static const char alt_reg_names[][8] = +{ + "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", + "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", + "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31", + "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", + "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", + "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", + "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", + "mq", "lr", "ctr", "ap", + "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7", + "ca", + /* AltiVec registers. */ + "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7", + "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15", + "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", + "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31", + "vrsave", "vscr", + /* SPE registers. */ + "spe_acc", "spefscr", + /* Soft frame pointer. */ + "sfp", + /* HTM SPR registers. */ + "tfhar", "tfiar", "texasr", + /* SPE High registers. */ + "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7", + "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15", + "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23", + "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31" +}; +#endif + +/* Table of valid machine attributes. */ + +static const struct attribute_spec rs6000_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute, + false }, + { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute, + false }, + { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute, + false }, + { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute, + false }, + { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute, + false }, +#ifdef SUBTARGET_ATTRIBUTE_TABLE + SUBTARGET_ATTRIBUTE_TABLE, +#endif + { NULL, 0, 0, false, false, false, NULL, false } +}; + +#ifndef TARGET_PROFILE_KERNEL +#define TARGET_PROFILE_KERNEL 0 +#endif + +/* The VRSAVE bitmask puts bit %v0 as the most significant bit. */ +#define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO)) + +/* Initialize the GCC target structure. */ +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table +#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES +#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes +#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P +#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p + +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP + +/* Default unaligned ops are only provided for ELF. Find the ops needed + for non-ELF systems. */ +#ifndef OBJECT_FORMAT_ELF +#if TARGET_XCOFF +/* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on + 64-bit targets. */ +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2," +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4," +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8," +#else +/* For Darwin. */ +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" +#endif +#endif + +/* This hook deals with fixups for relocatable code and DI-mode objects + in 64-bit code. */ +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER rs6000_assemble_integer + +#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO +#undef TARGET_ASM_ASSEMBLE_VISIBILITY +#define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility +#endif + +#undef TARGET_SET_UP_BY_PROLOGUE +#define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue + +#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS +#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components +#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB +#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb +#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS +#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components +#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components +#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components +#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS +#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components + +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry + +#undef TARGET_INTERNAL_ARG_POINTER +#define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer + +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS HAVE_AS_TLS + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address + +#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P +#define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p + +#undef TARGET_LEGITIMATE_COMBINED_INSN +#define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue + +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address + +#undef TARGET_SCHED_VARIABLE_ISSUE +#define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority +#undef TARGET_SCHED_IS_COSTLY_DEPENDENCE +#define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT rs6000_sched_init +#undef TARGET_SCHED_FINISH +#define TARGET_SCHED_FINISH rs6000_sched_finish +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER rs6000_sched_reorder +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 + +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead + +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard + +#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT +#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context +#undef TARGET_SCHED_INIT_SCHED_CONTEXT +#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context +#undef TARGET_SCHED_SET_SCHED_CONTEXT +#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context +#undef TARGET_SCHED_FREE_SCHED_CONTEXT +#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context + +#undef TARGET_SCHED_CAN_SPECULATE_INSN +#define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn + +#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD +#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load +#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT +#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ + rs6000_builtin_support_vector_misalignment +#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE +#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + rs6000_builtin_vectorization_cost +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ + rs6000_preferred_simd_mode +#undef TARGET_VECTORIZE_INIT_COST +#define TARGET_VECTORIZE_INIT_COST rs6000_init_cost +#undef TARGET_VECTORIZE_ADD_STMT_COST +#define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost +#undef TARGET_VECTORIZE_FINISH_COST +#define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost +#undef TARGET_VECTORIZE_DESTROY_COST_DATA +#define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS rs6000_init_builtins +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL rs6000_builtin_decl + +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN rs6000_fold_builtin +#undef TARGET_GIMPLE_FOLD_BUILTIN +#define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN rs6000_expand_builtin + +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE rs6000_mangle_type + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs + +#if TARGET_MACHO +#undef TARGET_BINDS_LOCAL_P +#define TARGET_BINDS_LOCAL_P darwin_binds_local_p +#endif + +#undef TARGET_MS_BITFIELD_LAYOUT_P +#define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk + +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS rs6000_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span + +#undef TARGET_INIT_DWARF_REG_SIZES_EXTRA +#define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra + +#undef TARGET_MEMBER_TYPE_FORCES_BLK +#define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory + +#undef TARGET_RETURN_IN_MSB +#define TARGET_RETURN_IN_MSB rs6000_return_in_msb + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs + +/* Always strict argument naming on rs6000. */ +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED +#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_SPLIT_COMPLEX_ARG +#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG rs6000_function_arg +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start + +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg + +#undef TARGET_EH_RETURN_FILTER_MODE +#define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p + +#undef TARGET_FLOATN_MODE +#define TARGET_FLOATN_MODE rs6000_floatn_mode + +#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN +#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn + +#undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP +#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip + +#undef TARGET_MD_ASM_ADJUST +#define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE rs6000_option_override + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ + rs6000_builtin_vectorized_function + +#undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \ + rs6000_builtin_md_vectorized_function + +#undef TARGET_STACK_PROTECT_GUARD +#define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard + +#if !TARGET_MACHO +#undef TARGET_STACK_PROTECT_FAIL +#define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail +#endif + +#ifdef HAVE_AS_TLS +#undef TARGET_ASM_OUTPUT_DWARF_DTPREL +#define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel +#endif + +/* Use a 32-bit anchor range. This leads to sequences like: + + addis tmp,anchor,high + add dest,tmp,low + + where tmp itself acts as an anchor, and can be shared between + accesses to the same 64k page. */ +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1 +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p +#undef TARGET_USE_BLOCKS_FOR_DECL_P +#define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p + +#undef TARGET_BUILTIN_RECIPROCAL +#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal + +#undef TARGET_EXPAND_TO_RTL_HOOK +#define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot + +#undef TARGET_INSTANTIATE_DECLS +#define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p + +#undef TARGET_MODE_DEPENDENT_ADDRESS_P +#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p + +#undef TARGET_LRA_P +#define TARGET_LRA_P rs6000_lra_p + +#undef TARGET_COMPUTE_PRESSURE_CLASSES +#define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE rs6000_can_eliminate + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage + +#undef TARGET_SCHED_REASSOCIATION_WIDTH +#define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE rs6000_function_value + +#undef TARGET_OPTION_VALID_ATTRIBUTE_P +#define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p + +#undef TARGET_OPTION_SAVE +#define TARGET_OPTION_SAVE rs6000_function_specific_save + +#undef TARGET_OPTION_RESTORE +#define TARGET_OPTION_RESTORE rs6000_function_specific_restore + +#undef TARGET_OPTION_PRINT +#define TARGET_OPTION_PRINT rs6000_function_specific_print + +#undef TARGET_CAN_INLINE_P +#define TARGET_CAN_INLINE_P rs6000_can_inline_p + +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p + +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok + +#undef TARGET_CAN_USE_DOLOOP_P +#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost + +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv + +#undef TARGET_LIBGCC_CMP_RETURN_MODE +#define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode +#undef TARGET_LIBGCC_SHIFT_COUNT_MODE +#define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode +#undef TARGET_UNWIND_WORD_MODE +#define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode + +#undef TARGET_OFFLOAD_OPTIONS +#define TARGET_OFFLOAD_OPTIONS rs6000_offload_options + +#undef TARGET_C_MODE_FOR_SUFFIX +#define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix + +#undef TARGET_INVALID_BINARY_OP +#define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op + +#undef TARGET_OPTAB_SUPPORTED_P +#define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p + +#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS +#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 + + +/* Processor table. */ +struct rs6000_ptt +{ + const char *const name; /* Canonical processor name. */ + const enum processor_type processor; /* Processor type enum value. */ + const HOST_WIDE_INT target_enable; /* Target flags to enable. */ +}; + +static struct rs6000_ptt const processor_target_table[] = +{ +#define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS }, +#include "powerpcspe-cpus.def" +#undef RS6000_CPU +}; + +/* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the + name is invalid. */ + +static int +rs6000_cpu_name_lookup (const char *name) +{ + size_t i; + + if (name != NULL) + { + for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) + if (! strcmp (name, processor_target_table[i].name)) + return (int)i; + } + + return -1; +} + + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + For the SPE, GPRs are 64 bits but only 32 bits are visible in + scalar instructions. The upper 32 bits are only available to the + SIMD instructions. + + POWER and PowerPC GPRs hold 32 bits worth; + PowerPC64 GPRs and FPRs point register holds 64 bits worth. */ + +static int +rs6000_hard_regno_nregs_internal (int regno, machine_mode mode) +{ + unsigned HOST_WIDE_INT reg_size; + + /* 128-bit floating point usually takes 2 registers, unless it is IEEE + 128-bit floating point that can go in vector registers, which has VSX + memory addressing. */ + if (FP_REGNO_P (regno)) + reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode) + ? UNITS_PER_VSX_WORD + : UNITS_PER_FP_WORD); + + else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) + reg_size = UNITS_PER_SPE_WORD; + + else if (ALTIVEC_REGNO_P (regno)) + reg_size = UNITS_PER_ALTIVEC_WORD; + + /* The value returned for SCmode in the E500 double case is 2 for + ABI compatibility; storing an SCmode value in a single register + would require function_arg and rs6000_spe_function_arg to handle + SCmode so as to pass the value correctly in a pair of + registers. */ + else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode + && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno)) + reg_size = UNITS_PER_FP_WORD; + + else + reg_size = UNITS_PER_WORD; + + return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; +} + +/* Value is 1 if hard register REGNO can hold a value of machine-mode + MODE. */ +static int +rs6000_hard_regno_mode_ok (int regno, machine_mode mode) +{ + int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; + + if (COMPLEX_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + /* PTImode can only go in GPRs. Quad word memory operations require even/odd + register combinations, and use PTImode where we need to deal with quad + word memory operations. Don't allow quad words in the argument or frame + pointer registers, just registers 0..31. */ + if (mode == PTImode) + return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) + && ((regno & 1) == 0)); + + /* VSX registers that overlap the FPR registers are larger than for non-VSX + implementations. Don't allow an item to be split between a FP register + and an Altivec register. Allow TImode in all VSX registers if the user + asked for it. */ + if (TARGET_VSX && VSX_REGNO_P (regno) + && (VECTOR_MEM_VSX_P (mode) + || FLOAT128_VECTOR_P (mode) + || reg_addr[mode].scalar_in_vmx_p + || (TARGET_VSX_TIMODE && mode == TImode) + || (TARGET_VADDUQM && mode == V1TImode))) + { + if (FP_REGNO_P (regno)) + return FP_REGNO_P (last_regno); + + if (ALTIVEC_REGNO_P (regno)) + { + if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p) + return 0; + + return ALTIVEC_REGNO_P (last_regno); + } + } + + /* The GPRs can hold any mode, but values bigger than one register + cannot go past R31. */ + if (INT_REGNO_P (regno)) + return INT_REGNO_P (last_regno); + + /* The float registers (except for VSX vector modes) can only hold floating + modes and DImode. */ + if (FP_REGNO_P (regno)) + { + if (FLOAT128_VECTOR_P (mode)) + return false; + + if (SCALAR_FLOAT_MODE_P (mode) + && (mode != TDmode || (regno % 2) == 0) + && FP_REGNO_P (last_regno)) + return 1; + + if (GET_MODE_CLASS (mode) == MODE_INT) + { + if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) + return 1; + + if (TARGET_VSX_SMALL_INTEGER) + { + if (mode == SImode) + return 1; + + if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) + return 1; + } + } + + if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT + && PAIRED_VECTOR_MODE (mode)) + return 1; + + return 0; + } + + /* The CR register can only hold CC modes. */ + if (CR_REGNO_P (regno)) + return GET_MODE_CLASS (mode) == MODE_CC; + + if (CA_REGNO_P (regno)) + return mode == Pmode || mode == SImode; + + /* AltiVec only in AldyVec registers. */ + if (ALTIVEC_REGNO_P (regno)) + return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) + || mode == V1TImode); + + /* ...but GPRs can hold SIMD data on the SPE in one register. */ + if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) + return 1; + + /* We cannot put non-VSX TImode or PTImode anywhere except general register + and it must be able to fit within the register set. */ + + return GET_MODE_SIZE (mode) <= UNITS_PER_WORD; +} + +/* Print interesting facts about registers. */ +static void +rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) +{ + int r, m; + + for (r = first_regno; r <= last_regno; ++r) + { + const char *comma = ""; + int len; + + if (first_regno == last_regno) + fprintf (stderr, "%s:\t", reg_name); + else + fprintf (stderr, "%s%d:\t", reg_name, r - first_regno); + + len = 8; + for (m = 0; m < NUM_MACHINE_MODES; ++m) + if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r]) + { + if (len > 70) + { + fprintf (stderr, ",\n\t"); + len = 8; + comma = ""; + } + + if (rs6000_hard_regno_nregs[m][r] > 1) + len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m), + rs6000_hard_regno_nregs[m][r]); + else + len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m)); + + comma = ", "; + } + + if (call_used_regs[r]) + { + if (len > 70) + { + fprintf (stderr, ",\n\t"); + len = 8; + comma = ""; + } + + len += fprintf (stderr, "%s%s", comma, "call-used"); + comma = ", "; + } + + if (fixed_regs[r]) + { + if (len > 70) + { + fprintf (stderr, ",\n\t"); + len = 8; + comma = ""; + } + + len += fprintf (stderr, "%s%s", comma, "fixed"); + comma = ", "; + } + + if (len > 70) + { + fprintf (stderr, ",\n\t"); + comma = ""; + } + + len += fprintf (stderr, "%sreg-class = %s", comma, + reg_class_names[(int)rs6000_regno_regclass[r]]); + comma = ", "; + + if (len > 70) + { + fprintf (stderr, ",\n\t"); + comma = ""; + } + + fprintf (stderr, "%sregno = %d\n", comma, r); + } +} + +static const char * +rs6000_debug_vector_unit (enum rs6000_vector v) +{ + const char *ret; + + switch (v) + { + case VECTOR_NONE: ret = "none"; break; + case VECTOR_ALTIVEC: ret = "altivec"; break; + case VECTOR_VSX: ret = "vsx"; break; + case VECTOR_P8_VECTOR: ret = "p8_vector"; break; + case VECTOR_PAIRED: ret = "paired"; break; + case VECTOR_SPE: ret = "spe"; break; + case VECTOR_OTHER: ret = "other"; break; + default: ret = "unknown"; break; + } + + return ret; +} + +/* Inner function printing just the address mask for a particular reload + register class. */ +DEBUG_FUNCTION char * +rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces) +{ + static char ret[8]; + char *p = ret; + + if ((mask & RELOAD_REG_VALID) != 0) + *p++ = 'v'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_MULTIPLE) != 0) + *p++ = 'm'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_INDEXED) != 0) + *p++ = 'i'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_QUAD_OFFSET) != 0) + *p++ = 'O'; + else if ((mask & RELOAD_REG_OFFSET) != 0) + *p++ = 'o'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_PRE_INCDEC) != 0) + *p++ = '+'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_PRE_MODIFY) != 0) + *p++ = '+'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_AND_M16) != 0) + *p++ = '&'; + else if (keep_spaces) + *p++ = ' '; + + *p = '\0'; + + return ret; +} + +/* Print the address masks in a human readble fashion. */ +DEBUG_FUNCTION void +rs6000_debug_print_mode (ssize_t m) +{ + ssize_t rc; + int spaces = 0; + bool fuse_extra_p; + + fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); + for (rc = 0; rc < N_RELOAD_REG; rc++) + fprintf (stderr, " %s: %s", reload_reg_map[rc].name, + rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true)); + + if ((reg_addr[m].reload_store != CODE_FOR_nothing) + || (reg_addr[m].reload_load != CODE_FOR_nothing)) + fprintf (stderr, " Reload=%c%c", + (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*', + (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*'); + else + spaces += sizeof (" Reload=sl") - 1; + + if (reg_addr[m].scalar_in_vmx_p) + { + fprintf (stderr, "%*s Upper=y", spaces, ""); + spaces = 0; + } + else + spaces += sizeof (" Upper=y") - 1; + + fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing) + || reg_addr[m].fused_toc); + if (!fuse_extra_p) + { + for (rc = 0; rc < N_RELOAD_REG; rc++) + { + if (rc != RELOAD_REG_ANY) + { + if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing + || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing + || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing + || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing + || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing) + { + fuse_extra_p = true; + break; + } + } + } + } + + if (fuse_extra_p) + { + fprintf (stderr, "%*s Fuse:", spaces, ""); + spaces = 0; + + for (rc = 0; rc < N_RELOAD_REG; rc++) + { + if (rc != RELOAD_REG_ANY) + { + char load, store; + + if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing) + load = 'l'; + else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing) + load = 'L'; + else + load = '-'; + + if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing) + store = 's'; + else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing) + store = 'S'; + else + store = '-'; + + if (load == '-' && store == '-') + spaces += 5; + else + { + fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "", + reload_reg_map[rc].name[0], load, store); + spaces = 0; + } + } + } + + if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing) + { + fprintf (stderr, "%*sP8gpr", (spaces + 1), ""); + spaces = 0; + } + else + spaces += sizeof (" P8gpr") - 1; + + if (reg_addr[m].fused_toc) + { + fprintf (stderr, "%*sToc", (spaces + 1), ""); + spaces = 0; + } + else + spaces += sizeof (" Toc") - 1; + } + else + spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1; + + if (rs6000_vector_unit[m] != VECTOR_NONE + || rs6000_vector_mem[m] != VECTOR_NONE) + { + fprintf (stderr, "%*s vector: arith=%-10s mem=%s", + spaces, "", + rs6000_debug_vector_unit (rs6000_vector_unit[m]), + rs6000_debug_vector_unit (rs6000_vector_mem[m])); + } + + fputs ("\n", stderr); +} + +#define DEBUG_FMT_ID "%-32s= " +#define DEBUG_FMT_D DEBUG_FMT_ID "%d\n" +#define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: " +#define DEBUG_FMT_S DEBUG_FMT_ID "%s\n" + +/* Print various interesting information with -mdebug=reg. */ +static void +rs6000_debug_reg_global (void) +{ + static const char *const tf[2] = { "false", "true" }; + const char *nl = (const char *)0; + int m; + size_t m1, m2, v; + char costly_num[20]; + char nop_num[20]; + char flags_buffer[40]; + const char *costly_str; + const char *nop_str; + const char *trace_str; + const char *abi_str; + const char *cmodel_str; + struct cl_target_option cl_opts; + + /* Modes we want tieable information on. */ + static const machine_mode print_tieable_modes[] = { + QImode, + HImode, + SImode, + DImode, + TImode, + PTImode, + SFmode, + DFmode, + TFmode, + IFmode, + KFmode, + SDmode, + DDmode, + TDmode, + V8QImode, + V4HImode, + V2SImode, + V16QImode, + V8HImode, + V4SImode, + V2DImode, + V1TImode, + V32QImode, + V16HImode, + V8SImode, + V4DImode, + V2TImode, + V2SFmode, + V4SFmode, + V2DFmode, + V8SFmode, + V4DFmode, + CCmode, + CCUNSmode, + CCEQmode, + }; + + /* Virtual regs we are interested in. */ + const static struct { + int regno; /* register number. */ + const char *name; /* register name. */ + } virtual_regs[] = { + { STACK_POINTER_REGNUM, "stack pointer:" }, + { TOC_REGNUM, "toc: " }, + { STATIC_CHAIN_REGNUM, "static chain: " }, + { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " }, + { HARD_FRAME_POINTER_REGNUM, "hard frame: " }, + { ARG_POINTER_REGNUM, "arg pointer: " }, + { FRAME_POINTER_REGNUM, "frame pointer:" }, + { FIRST_PSEUDO_REGISTER, "first pseudo: " }, + { FIRST_VIRTUAL_REGISTER, "first virtual:" }, + { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" }, + { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " }, + { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" }, + { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" }, + { VIRTUAL_CFA_REGNUM, "cfa (frame): " }, + { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" }, + { LAST_VIRTUAL_REGISTER, "last virtual: " }, + }; + + fputs ("\nHard register information:\n", stderr); + rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr"); + rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp"); + rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, + LAST_ALTIVEC_REGNO, + "vs"); + rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); + rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); + rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); + rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca"); + rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave"); + rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr"); + rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a"); + rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f"); + + fputs ("\nVirtual/stack/frame registers:\n", stderr); + for (v = 0; v < ARRAY_SIZE (virtual_regs); v++) + fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno); + + fprintf (stderr, + "\n" + "d reg_class = %s\n" + "f reg_class = %s\n" + "v reg_class = %s\n" + "wa reg_class = %s\n" + "wb reg_class = %s\n" + "wd reg_class = %s\n" + "we reg_class = %s\n" + "wf reg_class = %s\n" + "wg reg_class = %s\n" + "wh reg_class = %s\n" + "wi reg_class = %s\n" + "wj reg_class = %s\n" + "wk reg_class = %s\n" + "wl reg_class = %s\n" + "wm reg_class = %s\n" + "wo reg_class = %s\n" + "wp reg_class = %s\n" + "wq reg_class = %s\n" + "wr reg_class = %s\n" + "ws reg_class = %s\n" + "wt reg_class = %s\n" + "wu reg_class = %s\n" + "wv reg_class = %s\n" + "ww reg_class = %s\n" + "wx reg_class = %s\n" + "wy reg_class = %s\n" + "wz reg_class = %s\n" + "wA reg_class = %s\n" + "wH reg_class = %s\n" + "wI reg_class = %s\n" + "wJ reg_class = %s\n" + "wK reg_class = %s\n" + "\n", + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]); + + nl = "\n"; + for (m = 0; m < NUM_MACHINE_MODES; ++m) + rs6000_debug_print_mode (m); + + fputs ("\n", stderr); + + for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++) + { + machine_mode mode1 = print_tieable_modes[m1]; + bool first_time = true; + + nl = (const char *)0; + for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++) + { + machine_mode mode2 = print_tieable_modes[m2]; + if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2)) + { + if (first_time) + { + fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1)); + nl = "\n"; + first_time = false; + } + + fprintf (stderr, " %s", GET_MODE_NAME (mode2)); + } + } + + if (!first_time) + fputs ("\n", stderr); + } + + if (nl) + fputs (nl, stderr); + + if (rs6000_recip_control) + { + fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control); + + for (m = 0; m < NUM_MACHINE_MODES; ++m) + if (rs6000_recip_bits[m]) + { + fprintf (stderr, + "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n", + GET_MODE_NAME (m), + (RS6000_RECIP_AUTO_RE_P (m) + ? "auto" + : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")), + (RS6000_RECIP_AUTO_RSQRTE_P (m) + ? "auto" + : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none"))); + } + + fputs ("\n", stderr); + } + + if (rs6000_cpu_index >= 0) + { + const char *name = processor_target_table[rs6000_cpu_index].name; + HOST_WIDE_INT flags + = processor_target_table[rs6000_cpu_index].target_enable; + + sprintf (flags_buffer, "-mcpu=%s flags", name); + rs6000_print_isa_options (stderr, 0, flags_buffer, flags); + } + else + fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>"); + + if (rs6000_tune_index >= 0) + { + const char *name = processor_target_table[rs6000_tune_index].name; + HOST_WIDE_INT flags + = processor_target_table[rs6000_tune_index].target_enable; + + sprintf (flags_buffer, "-mtune=%s flags", name); + rs6000_print_isa_options (stderr, 0, flags_buffer, flags); + } + else + fprintf (stderr, DEBUG_FMT_S, "tune", "<none>"); + + cl_target_option_save (&cl_opts, &global_options); + rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags", + rs6000_isa_flags); + + rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit", + rs6000_isa_flags_explicit); + + rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask", + rs6000_builtin_mask); + + rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); + + fprintf (stderr, DEBUG_FMT_S, "--with-cpu default", + OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>"); + + switch (rs6000_sched_costly_dep) + { + case max_dep_latency: + costly_str = "max_dep_latency"; + break; + + case no_dep_costly: + costly_str = "no_dep_costly"; + break; + + case all_deps_costly: + costly_str = "all_deps_costly"; + break; + + case true_store_to_load_dep_costly: + costly_str = "true_store_to_load_dep_costly"; + break; + + case store_to_load_dep_costly: + costly_str = "store_to_load_dep_costly"; + break; + + default: + costly_str = costly_num; + sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep); + break; + } + + fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str); + + switch (rs6000_sched_insert_nops) + { + case sched_finish_regroup_exact: + nop_str = "sched_finish_regroup_exact"; + break; + + case sched_finish_pad_groups: + nop_str = "sched_finish_pad_groups"; + break; + + case sched_finish_none: + nop_str = "sched_finish_none"; + break; + + default: + nop_str = nop_num; + sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops); + break; + } + + fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str); + + switch (rs6000_sdata) + { + default: + case SDATA_NONE: + break; + + case SDATA_DATA: + fprintf (stderr, DEBUG_FMT_S, "sdata", "data"); + break; + + case SDATA_SYSV: + fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv"); + break; + + case SDATA_EABI: + fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi"); + break; + + } + + switch (rs6000_traceback) + { + case traceback_default: trace_str = "default"; break; + case traceback_none: trace_str = "none"; break; + case traceback_part: trace_str = "part"; break; + case traceback_full: trace_str = "full"; break; + default: trace_str = "unknown"; break; + } + + fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str); + + switch (rs6000_current_cmodel) + { + case CMODEL_SMALL: cmodel_str = "small"; break; + case CMODEL_MEDIUM: cmodel_str = "medium"; break; + case CMODEL_LARGE: cmodel_str = "large"; break; + default: cmodel_str = "unknown"; break; + } + + fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str); + + switch (rs6000_current_abi) + { + case ABI_NONE: abi_str = "none"; break; + case ABI_AIX: abi_str = "aix"; break; + case ABI_ELFv2: abi_str = "ELFv2"; break; + case ABI_V4: abi_str = "V4"; break; + case ABI_DARWIN: abi_str = "darwin"; break; + default: abi_str = "unknown"; break; + } + + fprintf (stderr, DEBUG_FMT_S, "abi", abi_str); + + if (rs6000_altivec_abi) + fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true"); + + if (rs6000_spe_abi) + fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true"); + + if (rs6000_darwin64_abi) + fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true"); + + if (rs6000_float_gprs) + fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true"); + + fprintf (stderr, DEBUG_FMT_S, "fprs", + (TARGET_FPRS ? "true" : "false")); + + fprintf (stderr, DEBUG_FMT_S, "single_float", + (TARGET_SINGLE_FLOAT ? "true" : "false")); + + fprintf (stderr, DEBUG_FMT_S, "double_float", + (TARGET_DOUBLE_FLOAT ? "true" : "false")); + + fprintf (stderr, DEBUG_FMT_S, "soft_float", + (TARGET_SOFT_FLOAT ? "true" : "false")); + + fprintf (stderr, DEBUG_FMT_S, "e500_single", + (TARGET_E500_SINGLE ? "true" : "false")); + + fprintf (stderr, DEBUG_FMT_S, "e500_double", + (TARGET_E500_DOUBLE ? "true" : "false")); + + if (TARGET_LINK_STACK) + fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); + + fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false"); + + if (TARGET_P8_FUSION) + { + char options[80]; + + strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8"); + if (TARGET_TOC_FUSION) + strcat (options, ", toc"); + + if (TARGET_P8_FUSION_SIGN) + strcat (options, ", sign"); + + fprintf (stderr, DEBUG_FMT_S, "fusion", options); + } + + fprintf (stderr, DEBUG_FMT_S, "plt-format", + TARGET_SECURE_PLT ? "secure" : "bss"); + fprintf (stderr, DEBUG_FMT_S, "struct-return", + aix_struct_return ? "aix" : "sysv"); + fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]); + fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]); + fprintf (stderr, DEBUG_FMT_S, "align_branch", + tf[!!rs6000_align_branch_targets]); + fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size); + fprintf (stderr, DEBUG_FMT_D, "long_double_size", + rs6000_long_double_type_size); + fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority", + (int)rs6000_sched_restricted_insns_priority); + fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins", + (int)END_BUILTINS); + fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins", + (int)RS6000_BUILTIN_COUNT); + + fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX", + (int)TARGET_FLOAT128_ENABLE_TYPE); + + if (TARGET_VSX) + fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element", + (int)VECTOR_ELEMENT_SCALAR_64BIT); + + if (TARGET_DIRECT_MOVE_128) + fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element", + (int)VECTOR_ELEMENT_MFVSRLD_64BIT); +} + + +/* Update the addr mask bits in reg_addr to help secondary reload and go if + legitimate address support to figure out the appropriate addressing to + use. */ + +static void +rs6000_setup_reg_addr_masks (void) +{ + ssize_t rc, reg, m, nregs; + addr_mask_type any_addr_mask, addr_mask; + + for (m = 0; m < NUM_MACHINE_MODES; ++m) + { + machine_mode m2 = (machine_mode) m; + bool complex_p = false; + bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode); + size_t msize; + + if (COMPLEX_MODE_P (m2)) + { + complex_p = true; + m2 = GET_MODE_INNER (m2); + } + + msize = GET_MODE_SIZE (m2); + + /* SDmode is special in that we want to access it only via REG+REG + addressing on power7 and above, since we want to use the LFIWZX and + STFIWZX instructions to load it. */ + bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); + + any_addr_mask = 0; + for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) + { + addr_mask = 0; + reg = reload_reg_map[rc].reg; + + /* Can mode values go in the GPR/FPR/Altivec registers? */ + if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) + { + bool small_int_vsx_p = (small_int_p + && (rc == RELOAD_REG_FPR + || rc == RELOAD_REG_VMX)); + + nregs = rs6000_hard_regno_nregs[m][reg]; + addr_mask |= RELOAD_REG_VALID; + + /* Indicate if the mode takes more than 1 physical register. If + it takes a single register, indicate it can do REG+REG + addressing. Small integers in VSX registers can only do + REG+REG addressing. */ + if (small_int_vsx_p) + addr_mask |= RELOAD_REG_INDEXED; + else if (nregs > 1 || m == BLKmode || complex_p) + addr_mask |= RELOAD_REG_MULTIPLE; + else + addr_mask |= RELOAD_REG_INDEXED; + + /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY + addressing. Restrict addressing on SPE for 64-bit types + because of the SUBREG hackery used to address 64-bit floats in + '32-bit' GPRs. If we allow scalars into Altivec registers, + don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */ + + if (TARGET_UPDATE + && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) + && msize <= 8 + && !VECTOR_MODE_P (m2) + && !FLOAT128_VECTOR_P (m2) + && !complex_p + && !small_int_vsx_p + && (m2 != DFmode || !TARGET_UPPER_REGS_DF) + && (m2 != SFmode || !TARGET_UPPER_REGS_SF) + && !(TARGET_E500_DOUBLE && msize == 8)) + { + addr_mask |= RELOAD_REG_PRE_INCDEC; + + /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that + we don't allow PRE_MODIFY for some multi-register + operations. */ + switch (m) + { + default: + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + + case DImode: + if (TARGET_POWERPC64) + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + + case DFmode: + case DDmode: + if (TARGET_DF_INSN) + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + } + } + } + + /* GPR and FPR registers can do REG+OFFSET addressing, except + possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing + for 64-bit scalars and 32-bit SFmode to altivec registers. */ + if ((addr_mask != 0) && !indexed_only_p + && msize <= 8 + && (rc == RELOAD_REG_GPR + || ((msize == 8 || m2 == SFmode) + && (rc == RELOAD_REG_FPR + || (rc == RELOAD_REG_VMX + && TARGET_P9_DFORM_SCALAR))))) + addr_mask |= RELOAD_REG_OFFSET; + + /* VSX registers can do REG+OFFSET addresssing if ISA 3.0 + instructions are enabled. The offset for 128-bit VSX registers is + only 12-bits. While GPRs can handle the full offset range, VSX + registers can only handle the restricted range. */ + else if ((addr_mask != 0) && !indexed_only_p + && msize == 16 && TARGET_P9_DFORM_VECTOR + && (ALTIVEC_OR_VSX_VECTOR_MODE (m2) + || (m2 == TImode && TARGET_VSX_TIMODE))) + { + addr_mask |= RELOAD_REG_OFFSET; + if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) + addr_mask |= RELOAD_REG_QUAD_OFFSET; + } + + /* VMX registers can do (REG & -16) and ((REG+REG) & -16) + addressing on 128-bit types. */ + if (rc == RELOAD_REG_VMX && msize == 16 + && (addr_mask & RELOAD_REG_VALID) != 0) + addr_mask |= RELOAD_REG_AND_M16; + + reg_addr[m].addr_mask[rc] = addr_mask; + any_addr_mask |= addr_mask; + } + + reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask; + } +} + + +/* Initialize the various global tables that are based on register size. */ +static void +rs6000_init_hard_regno_mode_ok (bool global_init_p) +{ + ssize_t r, m, c; + int align64; + int align32; + + /* Precalculate REGNO_REG_CLASS. */ + rs6000_regno_regclass[0] = GENERAL_REGS; + for (r = 1; r < 32; ++r) + rs6000_regno_regclass[r] = BASE_REGS; + + for (r = 32; r < 64; ++r) + rs6000_regno_regclass[r] = FLOAT_REGS; + + for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r) + rs6000_regno_regclass[r] = NO_REGS; + + for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r) + rs6000_regno_regclass[r] = ALTIVEC_REGS; + + rs6000_regno_regclass[CR0_REGNO] = CR0_REGS; + for (r = CR1_REGNO; r <= CR7_REGNO; ++r) + rs6000_regno_regclass[r] = CR_REGS; + + rs6000_regno_regclass[LR_REGNO] = LINK_REGS; + rs6000_regno_regclass[CTR_REGNO] = CTR_REGS; + rs6000_regno_regclass[CA_REGNO] = NO_REGS; + rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS; + rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS; + rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS; + rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS; + rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS; + rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS; + rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS; + rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; + rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; + + /* Precalculate register class to simpler reload register class. We don't + need all of the register classes that are combinations of different + classes, just the simple ones that have constraint letters. */ + for (c = 0; c < N_REG_CLASSES; c++) + reg_class_to_reg_type[c] = NO_REG_TYPE; + + reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE; + reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE; + + if (TARGET_VSX) + { + reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; + } + else + { + reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; + } + + /* Precalculate the valid memory formats as well as the vector information, + this must be set up before the rs6000_hard_regno_nregs_internal calls + below. */ + gcc_assert ((int)VECTOR_NONE == 0); + memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit)); + memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit)); + + gcc_assert ((int)CODE_FOR_nothing == 0); + memset ((void *) ®_addr[0], '\0', sizeof (reg_addr)); + + gcc_assert ((int)NO_REGS == 0); + memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints)); + + /* The VSX hardware allows native alignment for vectors, but control whether the compiler + believes it can use native alignment or still uses 128-bit alignment. */ + if (TARGET_VSX && !TARGET_VSX_ALIGN_128) + { + align64 = 64; + align32 = 32; + } + else + { + align64 = 128; + align32 = 128; + } + + /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so + only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */ + if (TARGET_FLOAT128_TYPE) + { + rs6000_vector_mem[KFmode] = VECTOR_VSX; + rs6000_vector_align[KFmode] = 128; + + if (FLOAT128_IEEE_P (TFmode)) + { + rs6000_vector_mem[TFmode] = VECTOR_VSX; + rs6000_vector_align[TFmode] = 128; + } + } + + /* V2DF mode, VSX only. */ + if (TARGET_VSX) + { + rs6000_vector_unit[V2DFmode] = VECTOR_VSX; + rs6000_vector_mem[V2DFmode] = VECTOR_VSX; + rs6000_vector_align[V2DFmode] = align64; + } + + /* V4SF mode, either VSX or Altivec. */ + if (TARGET_VSX) + { + rs6000_vector_unit[V4SFmode] = VECTOR_VSX; + rs6000_vector_mem[V4SFmode] = VECTOR_VSX; + rs6000_vector_align[V4SFmode] = align32; + } + else if (TARGET_ALTIVEC) + { + rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC; + rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC; + rs6000_vector_align[V4SFmode] = align32; + } + + /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads + and stores. */ + if (TARGET_ALTIVEC) + { + rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC; + rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC; + rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC; + rs6000_vector_align[V4SImode] = align32; + rs6000_vector_align[V8HImode] = align32; + rs6000_vector_align[V16QImode] = align32; + + if (TARGET_VSX) + { + rs6000_vector_mem[V4SImode] = VECTOR_VSX; + rs6000_vector_mem[V8HImode] = VECTOR_VSX; + rs6000_vector_mem[V16QImode] = VECTOR_VSX; + } + else + { + rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; + rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; + rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; + } + } + + /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to + do insert/splat/extract. Altivec doesn't have 64-bit integer support. */ + if (TARGET_VSX) + { + rs6000_vector_mem[V2DImode] = VECTOR_VSX; + rs6000_vector_unit[V2DImode] + = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; + rs6000_vector_align[V2DImode] = align64; + + rs6000_vector_mem[V1TImode] = VECTOR_VSX; + rs6000_vector_unit[V1TImode] + = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; + rs6000_vector_align[V1TImode] = 128; + } + + /* DFmode, see if we want to use the VSX unit. Memory is handled + differently, so don't set rs6000_vector_mem. */ + if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE) + { + rs6000_vector_unit[DFmode] = VECTOR_VSX; + rs6000_vector_align[DFmode] = 64; + } + + /* SFmode, see if we want to use the VSX unit. */ + if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT) + { + rs6000_vector_unit[SFmode] = VECTOR_VSX; + rs6000_vector_align[SFmode] = 32; + } + + /* Allow TImode in VSX register and set the VSX memory macros. */ + if (TARGET_VSX && TARGET_VSX_TIMODE) + { + rs6000_vector_mem[TImode] = VECTOR_VSX; + rs6000_vector_align[TImode] = align64; + } + + /* TODO add SPE and paired floating point vector support. */ + + /* Register class constraints for the constraints that depend on compile + switches. When the VSX code was added, different constraints were added + based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all + of the VSX registers are used. The register classes for scalar floating + point types is set, based on whether we allow that type into the upper + (Altivec) registers. GCC has register classes to target the Altivec + registers for load/store operations, to select using a VSX memory + operation instead of the traditional floating point operation. The + constraints are: + + d - Register class to use with traditional DFmode instructions. + f - Register class to use with traditional SFmode instructions. + v - Altivec register. + wa - Any VSX register. + wc - Reserved to represent individual CR bits (used in LLVM). + wd - Preferred register class for V2DFmode. + wf - Preferred register class for V4SFmode. + wg - Float register for power6x move insns. + wh - FP register for direct move instructions. + wi - FP or VSX register to hold 64-bit integers for VSX insns. + wj - FP or VSX register to hold 64-bit integers for direct moves. + wk - FP or VSX register to hold 64-bit doubles for direct moves. + wl - Float register if we can do 32-bit signed int loads. + wm - VSX register for ISA 2.07 direct move operations. + wn - always NO_REGS. + wr - GPR if 64-bit mode is permitted. + ws - Register class to do ISA 2.06 DF operations. + wt - VSX register for TImode in VSX registers. + wu - Altivec register for ISA 2.07 VSX SF/SI load/stores. + wv - Altivec register for ISA 2.06 VSX DF/DI load/stores. + ww - Register class to do SF conversions in with VSX operations. + wx - Float register if we can do 32-bit int stores. + wy - Register class to do ISA 2.07 SF operations. + wz - Float register if we can do 32-bit unsigned int loads. + wH - Altivec register if SImode is allowed in VSX registers. + wI - VSX register if SImode is allowed in VSX registers. + wJ - VSX register if QImode/HImode are allowed in VSX registers. + wK - Altivec register if QImode/HImode are allowed in VSX registers. */ + + if (TARGET_HARD_FLOAT && TARGET_FPRS) + rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */ + + if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) + rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */ + + if (TARGET_VSX) + { + rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */ + rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */ + + if (TARGET_VSX_TIMODE) + rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */ + + if (TARGET_UPPER_REGS_DF) /* DFmode */ + { + rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; + } + else + rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS; + + if (TARGET_UPPER_REGS_DI) /* DImode */ + rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; + else + rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; + } + + /* Add conditional constraints based on various options, to allow us to + collapse multiple insn patterns. */ + if (TARGET_ALTIVEC) + rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS; + + if (TARGET_MFPGPR) /* DFmode */ + rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS; + + if (TARGET_LFIWAX) + rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */ + + if (TARGET_DIRECT_MOVE) + { + rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */ + = rs6000_constraints[RS6000_CONSTRAINT_wi]; + rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */ + = rs6000_constraints[RS6000_CONSTRAINT_ws]; + rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS; + } + + if (TARGET_POWERPC64) + { + rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS; + } + + if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */ + { + rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS; + } + else if (TARGET_P8_VECTOR) + { + rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS; + rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; + } + else if (TARGET_VSX) + rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; + + if (TARGET_STFIWX) + rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */ + + if (TARGET_LFIWZX) + rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */ + + if (TARGET_FLOAT128_TYPE) + { + rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */ + if (FLOAT128_IEEE_P (TFmode)) + rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */ + } + + /* Support for new D-form instructions. */ + if (TARGET_P9_DFORM_SCALAR) + rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS; + + /* Support for ISA 3.0 (power9) vectors. */ + if (TARGET_P9_VECTOR) + rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS; + + /* Support for new direct moves (ISA 3.0 + 64bit). */ + if (TARGET_DIRECT_MOVE_128) + rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; + + /* Support small integers in VSX registers. */ + if (TARGET_VSX_SMALL_INTEGER) + { + rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS; + if (TARGET_P9_VECTOR) + { + rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS; + } + } + + /* Set up the reload helper and direct move functions. */ + if (TARGET_VSX || TARGET_ALTIVEC) + { + if (TARGET_64BIT) + { + reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store; + reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; + reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; + reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; + reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; + reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; + reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; + reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store; + reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load; + reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; + reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; + reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; + reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; + + if (FLOAT128_VECTOR_P (KFmode)) + { + reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store; + reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load; + } + + if (FLOAT128_VECTOR_P (TFmode)) + { + reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store; + reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; + } + + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are + available. */ + if (TARGET_NO_SDMODE_STACK) + { + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; + } + + if (TARGET_VSX_TIMODE) + { + reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; + reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; + } + + if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128) + { + reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; + reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; + reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; + reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; + reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; + reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; + reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; + reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; + + reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; + reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; + reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; + reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; + reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; + reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; + reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; + reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; + + if (FLOAT128_VECTOR_P (KFmode)) + { + reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf; + reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf; + } + + if (FLOAT128_VECTOR_P (TFmode)) + { + reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf; + reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf; + } + } + } + else + { + reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store; + reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load; + reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store; + reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load; + reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store; + reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; + reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; + reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; + reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; + reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; + reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; + reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; + reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; + reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; + + if (FLOAT128_VECTOR_P (KFmode)) + { + reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store; + reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load; + } + + if (FLOAT128_IEEE_P (TFmode)) + { + reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store; + reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; + } + + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are + available. */ + if (TARGET_NO_SDMODE_STACK) + { + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; + } + + if (TARGET_VSX_TIMODE) + { + reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; + reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; + } + + if (TARGET_DIRECT_MOVE) + { + reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; + reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; + reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; + } + } + + if (TARGET_UPPER_REGS_DF) + reg_addr[DFmode].scalar_in_vmx_p = true; + + if (TARGET_UPPER_REGS_DI) + reg_addr[DImode].scalar_in_vmx_p = true; + + if (TARGET_UPPER_REGS_SF) + reg_addr[SFmode].scalar_in_vmx_p = true; + + if (TARGET_VSX_SMALL_INTEGER) + { + reg_addr[SImode].scalar_in_vmx_p = true; + if (TARGET_P9_VECTOR) + { + reg_addr[HImode].scalar_in_vmx_p = true; + reg_addr[QImode].scalar_in_vmx_p = true; + } + } + } + + /* Setup the fusion operations. */ + if (TARGET_P8_FUSION) + { + reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi; + reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi; + reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si; + if (TARGET_64BIT) + reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di; + } + + if (TARGET_P9_FUSION) + { + struct fuse_insns { + enum machine_mode mode; /* mode of the fused type. */ + enum machine_mode pmode; /* pointer mode. */ + enum rs6000_reload_reg_type rtype; /* register type. */ + enum insn_code load; /* load insn. */ + enum insn_code store; /* store insn. */ + }; + + static const struct fuse_insns addis_insns[] = { + { SFmode, DImode, RELOAD_REG_FPR, + CODE_FOR_fusion_vsx_di_sf_load, + CODE_FOR_fusion_vsx_di_sf_store }, + + { SFmode, SImode, RELOAD_REG_FPR, + CODE_FOR_fusion_vsx_si_sf_load, + CODE_FOR_fusion_vsx_si_sf_store }, + + { DFmode, DImode, RELOAD_REG_FPR, + CODE_FOR_fusion_vsx_di_df_load, + CODE_FOR_fusion_vsx_di_df_store }, + + { DFmode, SImode, RELOAD_REG_FPR, + CODE_FOR_fusion_vsx_si_df_load, + CODE_FOR_fusion_vsx_si_df_store }, + + { DImode, DImode, RELOAD_REG_FPR, + CODE_FOR_fusion_vsx_di_di_load, + CODE_FOR_fusion_vsx_di_di_store }, + + { DImode, SImode, RELOAD_REG_FPR, + CODE_FOR_fusion_vsx_si_di_load, + CODE_FOR_fusion_vsx_si_di_store }, + + { QImode, DImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_di_qi_load, + CODE_FOR_fusion_gpr_di_qi_store }, + + { QImode, SImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_si_qi_load, + CODE_FOR_fusion_gpr_si_qi_store }, + + { HImode, DImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_di_hi_load, + CODE_FOR_fusion_gpr_di_hi_store }, + + { HImode, SImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_si_hi_load, + CODE_FOR_fusion_gpr_si_hi_store }, + + { SImode, DImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_di_si_load, + CODE_FOR_fusion_gpr_di_si_store }, + + { SImode, SImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_si_si_load, + CODE_FOR_fusion_gpr_si_si_store }, + + { SFmode, DImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_di_sf_load, + CODE_FOR_fusion_gpr_di_sf_store }, + + { SFmode, SImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_si_sf_load, + CODE_FOR_fusion_gpr_si_sf_store }, + + { DImode, DImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_di_di_load, + CODE_FOR_fusion_gpr_di_di_store }, + + { DFmode, DImode, RELOAD_REG_GPR, + CODE_FOR_fusion_gpr_di_df_load, + CODE_FOR_fusion_gpr_di_df_store }, + }; + + enum machine_mode cur_pmode = Pmode; + size_t i; + + for (i = 0; i < ARRAY_SIZE (addis_insns); i++) + { + enum machine_mode xmode = addis_insns[i].mode; + enum rs6000_reload_reg_type rtype = addis_insns[i].rtype; + + if (addis_insns[i].pmode != cur_pmode) + continue; + + if (rtype == RELOAD_REG_FPR + && (!TARGET_HARD_FLOAT || !TARGET_FPRS)) + continue; + + reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load; + reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store; + + if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR) + { + reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX] + = addis_insns[i].load; + reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX] + = addis_insns[i].store; + } + } + } + + /* Note which types we support fusing TOC setup plus memory insn. We only do + fused TOCs for medium/large code models. */ + if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64 + && (TARGET_CMODEL != CMODEL_SMALL)) + { + reg_addr[QImode].fused_toc = true; + reg_addr[HImode].fused_toc = true; + reg_addr[SImode].fused_toc = true; + reg_addr[DImode].fused_toc = true; + if (TARGET_HARD_FLOAT && TARGET_FPRS) + { + if (TARGET_SINGLE_FLOAT) + reg_addr[SFmode].fused_toc = true; + if (TARGET_DOUBLE_FLOAT) + reg_addr[DFmode].fused_toc = true; + } + } + + /* Precalculate HARD_REGNO_NREGS. */ + for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r) + for (m = 0; m < NUM_MACHINE_MODES; ++m) + rs6000_hard_regno_nregs[m][r] + = rs6000_hard_regno_nregs_internal (r, (machine_mode)m); + + /* Precalculate HARD_REGNO_MODE_OK. */ + for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r) + for (m = 0; m < NUM_MACHINE_MODES; ++m) + if (rs6000_hard_regno_mode_ok (r, (machine_mode)m)) + rs6000_hard_regno_mode_ok_p[m][r] = true; + + /* Precalculate CLASS_MAX_NREGS sizes. */ + for (c = 0; c < LIM_REG_CLASSES; ++c) + { + int reg_size; + + if (TARGET_VSX && VSX_REG_CLASS_P (c)) + reg_size = UNITS_PER_VSX_WORD; + + else if (c == ALTIVEC_REGS) + reg_size = UNITS_PER_ALTIVEC_WORD; + + else if (c == FLOAT_REGS) + reg_size = UNITS_PER_FP_WORD; + + else + reg_size = UNITS_PER_WORD; + + for (m = 0; m < NUM_MACHINE_MODES; ++m) + { + machine_mode m2 = (machine_mode)m; + int reg_size2 = reg_size; + + /* TDmode & IBM 128-bit floating point always takes 2 registers, even + in VSX. */ + if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m)) + reg_size2 = UNITS_PER_FP_WORD; + + rs6000_class_max_nregs[m][c] + = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2; + } + } + + if (TARGET_E500_DOUBLE) + rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1; + + /* Calculate which modes to automatically generate code to use a the + reciprocal divide and square root instructions. In the future, possibly + automatically generate the instructions even if the user did not specify + -mrecip. The older machines double precision reciprocal sqrt estimate is + not accurate enough. */ + memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits)); + if (TARGET_FRES) + rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE; + if (TARGET_FRE) + rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE; + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) + rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE; + if (VECTOR_UNIT_VSX_P (V2DFmode)) + rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE; + + if (TARGET_FRSQRTES) + rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + if (TARGET_FRSQRTE) + rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) + rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + if (VECTOR_UNIT_VSX_P (V2DFmode)) + rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + + if (rs6000_recip_control) + { + if (!flag_finite_math_only) + warning (0, "-mrecip requires -ffinite-math or -ffast-math"); + if (flag_trapping_math) + warning (0, "-mrecip requires -fno-trapping-math or -ffast-math"); + if (!flag_reciprocal_math) + warning (0, "-mrecip requires -freciprocal-math or -ffast-math"); + if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math) + { + if (RS6000_RECIP_HAVE_RE_P (SFmode) + && (rs6000_recip_control & RECIP_SF_DIV) != 0) + rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RE_P (DFmode) + && (rs6000_recip_control & RECIP_DF_DIV) != 0) + rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RE_P (V4SFmode) + && (rs6000_recip_control & RECIP_V4SF_DIV) != 0) + rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RE_P (V2DFmode) + && (rs6000_recip_control & RECIP_V2DF_DIV) != 0) + rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode) + && (rs6000_recip_control & RECIP_SF_RSQRT) != 0) + rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode) + && (rs6000_recip_control & RECIP_DF_RSQRT) != 0) + rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode) + && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0) + rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode) + && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0) + rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + } + } + + /* Update the addr mask bits in reg_addr to help secondary reload and go if + legitimate address support to figure out the appropriate addressing to + use. */ + rs6000_setup_reg_addr_masks (); + + if (global_init_p || TARGET_DEBUG_TARGET) + { + if (TARGET_DEBUG_REG) + rs6000_debug_reg_global (); + + if (TARGET_DEBUG_COST || TARGET_DEBUG_REG) + fprintf (stderr, + "SImode variable mult cost = %d\n" + "SImode constant mult cost = %d\n" + "SImode short constant mult cost = %d\n" + "DImode multipliciation cost = %d\n" + "SImode division cost = %d\n" + "DImode division cost = %d\n" + "Simple fp operation cost = %d\n" + "DFmode multiplication cost = %d\n" + "SFmode division cost = %d\n" + "DFmode division cost = %d\n" + "cache line size = %d\n" + "l1 cache size = %d\n" + "l2 cache size = %d\n" + "simultaneous prefetches = %d\n" + "\n", + rs6000_cost->mulsi, + rs6000_cost->mulsi_const, + rs6000_cost->mulsi_const9, + rs6000_cost->muldi, + rs6000_cost->divsi, + rs6000_cost->divdi, + rs6000_cost->fp, + rs6000_cost->dmul, + rs6000_cost->sdiv, + rs6000_cost->ddiv, + rs6000_cost->cache_line_size, + rs6000_cost->l1_cache_size, + rs6000_cost->l2_cache_size, + rs6000_cost->simultaneous_prefetches); + } +} + +#if TARGET_MACHO +/* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */ + +static void +darwin_rs6000_override_options (void) +{ + /* The Darwin ABI always includes AltiVec, can't be (validly) turned + off. */ + rs6000_altivec_abi = 1; + TARGET_ALTIVEC_VRSAVE = 1; + rs6000_current_abi = ABI_DARWIN; + + if (DEFAULT_ABI == ABI_DARWIN + && TARGET_64BIT) + darwin_one_byte_bool = 1; + + if (TARGET_64BIT && ! TARGET_POWERPC64) + { + rs6000_isa_flags |= OPTION_MASK_POWERPC64; + warning (0, "-m64 requires PowerPC64 architecture, enabling"); + } + if (flag_mkernel) + { + rs6000_default_long_calls = 1; + rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; + } + + /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes + Altivec. */ + if (!flag_mkernel && !flag_apple_kext + && TARGET_64BIT + && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)) + rs6000_isa_flags |= OPTION_MASK_ALTIVEC; + + /* Unless the user (not the configurer) has explicitly overridden + it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to + G4 unless targeting the kernel. */ + if (!flag_mkernel + && !flag_apple_kext + && strverscmp (darwin_macosx_version_min, "10.5") >= 0 + && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC) + && ! global_options_set.x_rs6000_cpu_index) + { + rs6000_isa_flags |= OPTION_MASK_ALTIVEC; + } +} +#endif + +/* If not otherwise specified by a target, make 'long double' equivalent to + 'double'. */ + +#ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE +#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64 +#endif + +/* Return the builtin mask of the various options used that could affect which + builtins were used. In the past we used target_flags, but we've run out of + bits, and some options like SPE and PAIRED are no longer in + target_flags. */ + +HOST_WIDE_INT +rs6000_builtin_mask_calculate (void) +{ + return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) + | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0) + | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) + | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) + | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) + | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) + | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) + | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) + | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) + | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) + | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) + | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) + | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0) + | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0) + | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0) + | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0) + | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0) + | ((TARGET_HTM) ? RS6000_BTM_HTM : 0) + | ((TARGET_DFP) ? RS6000_BTM_DFP : 0) + | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0) + | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0) + | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)); +} + +/* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered + to clobber the XER[CA] bit because clobbering that bit without telling + the compiler worked just fine with versions of GCC before GCC 5, and + breaking a lot of older code in ways that are hard to track down is + not such a great idea. */ + +static rtx_insn * +rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, + vec<const char *> &/*constraints*/, + vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) +{ + clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); + SET_HARD_REG_BIT (clobbered_regs, CA_REGNO); + return NULL; +} + +/* Override command line options. + + Combine build-specific configuration information with options + specified on the command line to set various state variables which + influence code generation, optimization, and expansion of built-in + functions. Assure that command-line configuration preferences are + compatible with each other and with the build configuration; issue + warnings while adjusting configuration or error messages while + rejecting configuration. + + Upon entry to this function: + + This function is called once at the beginning of + compilation, and then again at the start and end of compiling + each section of code that has a different configuration, as + indicated, for example, by adding the + + __attribute__((__target__("cpu=power9"))) + + qualifier to a function definition or, for example, by bracketing + code between + + #pragma GCC target("altivec") + + and + + #pragma GCC reset_options + + directives. Parameter global_init_p is true for the initial + invocation, which initializes global variables, and false for all + subsequent invocations. + + + Various global state information is assumed to be valid. This + includes OPTION_TARGET_CPU_DEFAULT, representing the name of the + default CPU specified at build configure time, TARGET_DEFAULT, + representing the default set of option flags for the default + target, and global_options_set.x_rs6000_isa_flags, representing + which options were requested on the command line. + + Upon return from this function: + + rs6000_isa_flags_explicit has a non-zero bit for each flag that + was set by name on the command line. Additionally, if certain + attributes are automatically enabled or disabled by this function + in order to assure compatibility between options and + configuration, the flags associated with those attributes are + also set. By setting these "explicit bits", we avoid the risk + that other code might accidentally overwrite these particular + attributes with "default values". + + The various bits of rs6000_isa_flags are set to indicate the + target options that have been selected for the most current + compilation efforts. This has the effect of also turning on the + associated TARGET_XXX values since these are macros which are + generally defined to test the corresponding bit of the + rs6000_isa_flags variable. + + The variable rs6000_builtin_mask is set to represent the target + options for the most current compilation efforts, consistent with + the current contents of rs6000_isa_flags. This variable controls + expansion of built-in functions. + + Various other global variables and fields of global structures + (over 50 in all) are initialized to reflect the desired options + for the most current compilation efforts. */ + +static bool +rs6000_option_override_internal (bool global_init_p) +{ + bool ret = true; + bool have_cpu = false; + + /* The default cpu requested at configure time, if any. */ + const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT; + + HOST_WIDE_INT set_masks; + HOST_WIDE_INT ignore_masks; + int cpu_index; + int tune_index; + struct cl_target_option *main_target_opt + = ((global_init_p || target_option_default_node == NULL) + ? NULL : TREE_TARGET_OPTION (target_option_default_node)); + + /* Print defaults. */ + if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p) + rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); + + /* Remember the explicit arguments. */ + if (global_init_p) + rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags; + + /* On 64-bit Darwin, power alignment is ABI-incompatible with some C + library functions, so warn about it. The flag may be useful for + performance studies from time to time though, so don't disable it + entirely. */ + if (global_options_set.x_rs6000_alignment_flags + && rs6000_alignment_flags == MASK_ALIGN_POWER + && DEFAULT_ABI == ABI_DARWIN + && TARGET_64BIT) + warning (0, "-malign-power is not supported for 64-bit Darwin;" + " it is incompatible with the installed C and C++ libraries"); + + /* Numerous experiment shows that IRA based loop pressure + calculation works better for RTL loop invariant motion on targets + with enough (>= 32) registers. It is an expensive optimization. + So it is on only for peak performance. */ + if (optimize >= 3 && global_init_p + && !global_options_set.x_flag_ira_loop_pressure) + flag_ira_loop_pressure = 1; + + /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order + for tracebacks to be complete but not if any -fasynchronous-unwind-tables + options were already specified. */ + if (flag_sanitize & SANITIZE_USER_ADDRESS + && !global_options_set.x_flag_asynchronous_unwind_tables) + flag_asynchronous_unwind_tables = 1; + + /* Set the pointer size. */ + if (TARGET_64BIT) + { + rs6000_pmode = (int)DImode; + rs6000_pointer_size = 64; + } + else + { + rs6000_pmode = (int)SImode; + rs6000_pointer_size = 32; + } + + /* Some OSs don't support saving the high part of 64-bit registers on context + switch. Other OSs don't support saving Altivec registers. On those OSs, + we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings; + if the user wants either, the user must explicitly specify them and we + won't interfere with the user's specification. */ + + set_masks = POWERPC_MASKS; +#ifdef OS_MISSING_POWERPC64 + if (OS_MISSING_POWERPC64) + set_masks &= ~OPTION_MASK_POWERPC64; +#endif +#ifdef OS_MISSING_ALTIVEC + if (OS_MISSING_ALTIVEC) + set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX + | OTHER_VSX_VECTOR_MASKS); +#endif + + /* Don't override by the processor default if given explicitly. */ + set_masks &= ~rs6000_isa_flags_explicit; + + /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed + the cpu in a target attribute or pragma, but did not specify a tuning + option, use the cpu for the tuning option rather than the option specified + with -mtune on the command line. Process a '--with-cpu' configuration + request as an implicit --cpu. */ + if (rs6000_cpu_index >= 0) + { + cpu_index = rs6000_cpu_index; + have_cpu = true; + } + else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0) + { + rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index; + have_cpu = true; + } + else if (implicit_cpu) + { + rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu); + have_cpu = true; + } + else + { + /* PowerPC 64-bit LE requires at least ISA 2.07. */ + const char *default_cpu = ((!TARGET_POWERPC64) + ? "powerpc" + : ((BYTES_BIG_ENDIAN) + ? "powerpc64" + : "powerpc64le")); + + rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu); + have_cpu = false; + } + + gcc_assert (cpu_index >= 0); + + if (have_cpu) + { +#ifndef HAVE_AS_POWER9 + if (processor_target_table[rs6000_cpu_index].processor + == PROCESSOR_POWER9) + { + have_cpu = false; + warning (0, "will not generate power9 instructions because " + "assembler lacks power9 support"); + } +#endif +#ifndef HAVE_AS_POWER8 + if (processor_target_table[rs6000_cpu_index].processor + == PROCESSOR_POWER8) + { + have_cpu = false; + warning (0, "will not generate power8 instructions because " + "assembler lacks power8 support"); + } +#endif +#ifndef HAVE_AS_POPCNTD + if (processor_target_table[rs6000_cpu_index].processor + == PROCESSOR_POWER7) + { + have_cpu = false; + warning (0, "will not generate power7 instructions because " + "assembler lacks power7 support"); + } +#endif +#ifndef HAVE_AS_DFP + if (processor_target_table[rs6000_cpu_index].processor + == PROCESSOR_POWER6) + { + have_cpu = false; + warning (0, "will not generate power6 instructions because " + "assembler lacks power6 support"); + } +#endif +#ifndef HAVE_AS_POPCNTB + if (processor_target_table[rs6000_cpu_index].processor + == PROCESSOR_POWER5) + { + have_cpu = false; + warning (0, "will not generate power5 instructions because " + "assembler lacks power5 support"); + } +#endif + + if (!have_cpu) + { + /* PowerPC 64-bit LE requires at least ISA 2.07. */ + const char *default_cpu = (!TARGET_POWERPC64 + ? "powerpc" + : (BYTES_BIG_ENDIAN + ? "powerpc64" + : "powerpc64le")); + + rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu); + } + } + + /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the + compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits + with those from the cpu, except for options that were explicitly set. If + we don't have a cpu, do not override the target bits set in + TARGET_DEFAULT. */ + if (have_cpu) + { + rs6000_isa_flags &= ~set_masks; + rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable + & set_masks); + } + else + { + /* If no -mcpu=<xxx>, inherit any default options that were cleared via + POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize + target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched + to using rs6000_isa_flags, we need to do the initialization here. + + If there is a TARGET_DEFAULT, use that. Otherwise fall back to using + -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */ + HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT + : processor_target_table[cpu_index].target_enable); + rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit); + } + + if (rs6000_tune_index >= 0) + tune_index = rs6000_tune_index; + else if (have_cpu) + rs6000_tune_index = tune_index = cpu_index; + else + { + size_t i; + enum processor_type tune_proc + = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT); + + tune_index = -1; + for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) + if (processor_target_table[i].processor == tune_proc) + { + rs6000_tune_index = tune_index = i; + break; + } + } + + gcc_assert (tune_index >= 0); + rs6000_cpu = processor_target_table[tune_index].processor; + + /* Pick defaults for SPE related control flags. Do this early to make sure + that the TARGET_ macros are representative ASAP. */ + { + int spe_capable_cpu = + (rs6000_cpu == PROCESSOR_PPC8540 + || rs6000_cpu == PROCESSOR_PPC8548); + + if (!global_options_set.x_rs6000_spe_abi) + rs6000_spe_abi = spe_capable_cpu; + + if (!global_options_set.x_rs6000_spe) + rs6000_spe = spe_capable_cpu; + + if (!global_options_set.x_rs6000_float_gprs) + rs6000_float_gprs = + (rs6000_cpu == PROCESSOR_PPC8540 ? 1 + : rs6000_cpu == PROCESSOR_PPC8548 ? 2 + : 0); + } + + if (global_options_set.x_rs6000_spe_abi + && rs6000_spe_abi + && !TARGET_SPE_ABI) + error ("not configured for SPE ABI"); + + if (global_options_set.x_rs6000_spe + && rs6000_spe + && !TARGET_SPE) + error ("not configured for SPE instruction set"); + + if (main_target_opt != NULL + && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi) + || (main_target_opt->x_rs6000_spe != rs6000_spe) + || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs))) + error ("target attribute or pragma changes SPE ABI"); + + if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3 + || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 + || rs6000_cpu == PROCESSOR_PPCE5500) + { + if (TARGET_ALTIVEC) + error ("AltiVec not supported in this target"); + if (TARGET_SPE) + error ("SPE not supported in this target"); + } + if (rs6000_cpu == PROCESSOR_PPCE6500) + { + if (TARGET_SPE) + error ("SPE not supported in this target"); + } + + /* Disable Cell microcode if we are optimizing for the Cell + and not optimizing for size. */ + if (rs6000_gen_cell_microcode == -1) + rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL + && !optimize_size); + + /* If we are optimizing big endian systems for space and it's OK to + use instructions that would be microcoded on the Cell, use the + load/store multiple and string instructions. */ + if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode) + rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE + | OPTION_MASK_STRING); + + /* Don't allow -mmultiple or -mstring on little endian systems + unless the cpu is a 750, because the hardware doesn't support the + instructions used in little endian mode, and causes an alignment + trap. The 750 does not cause an alignment trap (except when the + target is unaligned). */ + + if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750) + { + if (TARGET_MULTIPLE) + { + rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE; + if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0) + warning (0, "-mmultiple is not supported on little endian systems"); + } + + if (TARGET_STRING) + { + rs6000_isa_flags &= ~OPTION_MASK_STRING; + if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0) + warning (0, "-mstring is not supported on little endian systems"); + } + } + + /* If little-endian, default to -mstrict-align on older processors. + Testing for htm matches power8 and later. */ + if (!BYTES_BIG_ENDIAN + && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM)) + rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN; + + /* -maltivec={le,be} implies -maltivec. */ + if (rs6000_altivec_element_order != 0) + rs6000_isa_flags |= OPTION_MASK_ALTIVEC; + + /* Disallow -maltivec=le in big endian mode for now. This is not + known to be useful for anyone. */ + if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1) + { + warning (0, N_("-maltivec=le not allowed for big-endian targets")); + rs6000_altivec_element_order = 0; + } + + /* Add some warnings for VSX. */ + if (TARGET_VSX) + { + const char *msg = NULL; + if (!TARGET_HARD_FLOAT || !TARGET_FPRS + || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) + msg = N_("-mvsx requires hardware floating point"); + else + { + rs6000_isa_flags &= ~ OPTION_MASK_VSX; + rs6000_isa_flags_explicit |= OPTION_MASK_VSX; + } + } + else if (TARGET_PAIRED_FLOAT) + msg = N_("-mvsx and -mpaired are incompatible"); + else if (TARGET_AVOID_XFORM > 0) + msg = N_("-mvsx needs indexed addressing"); + else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit + & OPTION_MASK_ALTIVEC)) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) + msg = N_("-mvsx and -mno-altivec are incompatible"); + else + msg = N_("-mno-altivec disables vsx"); + } + + if (msg) + { + warning (0, msg); + rs6000_isa_flags &= ~ OPTION_MASK_VSX; + rs6000_isa_flags_explicit |= OPTION_MASK_VSX; + } + } + + /* If hard-float/altivec/vsx were explicitly turned off then don't allow + the -mcpu setting to enable options that conflict. */ + if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) + && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT + | OPTION_MASK_ALTIVEC + | OPTION_MASK_VSX)) != 0) + rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO + | OPTION_MASK_DIRECT_MOVE) + & ~rs6000_isa_flags_explicit); + + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) + rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags); + + /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn + off all of the options that depend on those flags. */ + ignore_masks = rs6000_disable_incompatible_switches (); + + /* For the newer switches (vsx, dfp, etc.) set some of the older options, + unless the user explicitly used the -mno-<option> to disable the code. */ + if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR + || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0) + rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); + else if (TARGET_P9_MINMAX) + { + if (have_cpu) + { + if (cpu_index == PROCESSOR_POWER9) + { + /* legacy behavior: allow -mcpu-power9 with certain + capabilities explicitly disabled. */ + rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); + /* However, reject this automatic fix if certain + capabilities required for TARGET_P9_MINMAX support + have been explicitly disabled. */ + if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF + | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags) + != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF + | OPTION_MASK_UPPER_REGS_DF)) + error ("-mpower9-minmax incompatible with explicitly disabled options"); + } + else + error ("Power9 target option is incompatible with -mcpu=<xxx> for " + "<xxx> less than power9"); + } + else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit) + != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags + & rs6000_isa_flags_explicit)) + /* Enforce that none of the ISA_3_0_MASKS_SERVER flags + were explicitly cleared. */ + error ("-mpower9-minmax incompatible with explicitly disabled options"); + else + rs6000_isa_flags |= ISA_3_0_MASKS_SERVER; + } + else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO) + rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks); + else if (TARGET_VSX) + rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks); + else if (TARGET_POPCNTD) + rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks); + else if (TARGET_DFP) + rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks); + else if (TARGET_CMPB) + rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks); + else if (TARGET_FPRND) + rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks); + else if (TARGET_POPCNTB) + rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks); + else if (TARGET_ALTIVEC) + rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks); + + if (TARGET_CRYPTO && !TARGET_ALTIVEC) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) + error ("-mcrypto requires -maltivec"); + rs6000_isa_flags &= ~OPTION_MASK_CRYPTO; + } + + if (TARGET_DIRECT_MOVE && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) + error ("-mdirect-move requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE; + } + + if (TARGET_P8_VECTOR && !TARGET_ALTIVEC) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) + error ("-mpower8-vector requires -maltivec"); + rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; + } + + if (TARGET_P8_VECTOR && !TARGET_VSX) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) + && (rs6000_isa_flags_explicit & OPTION_MASK_VSX)) + error ("-mpower8-vector requires -mvsx"); + else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0) + { + rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; + if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) + rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; + } + else + { + /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is + not explicit. */ + rs6000_isa_flags |= OPTION_MASK_VSX; + rs6000_isa_flags_explicit |= OPTION_MASK_VSX; + } + } + + if (TARGET_VSX_TIMODE && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) + error ("-mvsx-timode requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; + } + + if (TARGET_DFP && !TARGET_HARD_FLOAT) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_DFP) + error ("-mhard-dfp requires -mhard-float"); + rs6000_isa_flags &= ~OPTION_MASK_DFP; + } + + /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di, + and -mupper-regs-sf, depending on the cpu, unless the user explicitly also + set the individual option. */ + if (TARGET_UPPER_REGS > 0) + { + if (TARGET_VSX + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) + { + rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; + } + if (TARGET_VSX + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) + { + rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; + } + if (TARGET_P8_VECTOR + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) + { + rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; + } + } + else if (TARGET_UPPER_REGS == 0) + { + if (TARGET_VSX + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) + { + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; + } + if (TARGET_VSX + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) + { + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; + } + if (TARGET_P8_VECTOR + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) + { + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; + } + } + + if (TARGET_UPPER_REGS_DF && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) + error ("-mupper-regs-df requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; + } + + if (TARGET_UPPER_REGS_DI && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI) + error ("-mupper-regs-di requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; + } + + if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) + error ("-mupper-regs-sf requires -mpower8-vector"); + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; + } + + /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, + silently turn off quad memory mode. */ + if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) + warning (0, N_("-mquad-memory requires 64-bit mode")); + + if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0) + warning (0, N_("-mquad-memory-atomic requires 64-bit mode")); + + rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY + | OPTION_MASK_QUAD_MEMORY_ATOMIC); + } + + /* Non-atomic quad memory load/store are disabled for little endian, since + the words are reversed, but atomic operations can still be done by + swapping the words. */ + if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) + warning (0, N_("-mquad-memory is not available in little endian mode")); + + rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; + } + + /* Assume if the user asked for normal quad memory instructions, they want + the atomic versions as well, unless they explicity told us not to use quad + word atomic instructions. */ + if (TARGET_QUAD_MEMORY + && !TARGET_QUAD_MEMORY_ATOMIC + && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0)) + rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC; + + /* Enable power8 fusion if we are tuning for power8, even if we aren't + generating power8 instructions. */ + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) + rs6000_isa_flags |= (processor_target_table[tune_index].target_enable + & OPTION_MASK_P8_FUSION); + + /* Setting additional fusion flags turns on base fusion. */ + if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION)) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) + { + if (TARGET_P8_FUSION_SIGN) + error ("-mpower8-fusion-sign requires -mpower8-fusion"); + + if (TARGET_TOC_FUSION) + error ("-mtoc-fusion requires -mpower8-fusion"); + + rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION; + } + else + rs6000_isa_flags |= OPTION_MASK_P8_FUSION; + } + + /* Power9 fusion is a superset over power8 fusion. */ + if (TARGET_P9_FUSION && !TARGET_P8_FUSION) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) + { + /* We prefer to not mention undocumented options in + error messages. However, if users have managed to select + power9-fusion without selecting power8-fusion, they + already know about undocumented flags. */ + error ("-mpower9-fusion requires -mpower8-fusion"); + rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION; + } + else + rs6000_isa_flags |= OPTION_MASK_P8_FUSION; + } + + /* Enable power9 fusion if we are tuning for power9, even if we aren't + generating power9 instructions. */ + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION)) + rs6000_isa_flags |= (processor_target_table[tune_index].target_enable + & OPTION_MASK_P9_FUSION); + + /* Power8 does not fuse sign extended loads with the addis. If we are + optimizing at high levels for speed, convert a sign extended load into a + zero extending load, and an explicit sign extension. */ + if (TARGET_P8_FUSION + && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) + && optimize_function_for_speed_p (cfun) + && optimize >= 3) + rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; + + /* TOC fusion requires 64-bit and medium/large code model. */ + if (TARGET_TOC_FUSION && !TARGET_POWERPC64) + { + rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION; + if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0) + warning (0, N_("-mtoc-fusion requires 64-bit")); + } + + if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL)) + { + rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION; + if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0) + warning (0, N_("-mtoc-fusion requires medium/large code model")); + } + + /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code + model. */ + if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64 + && (TARGET_CMODEL != CMODEL_SMALL) + && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION)) + rs6000_isa_flags |= OPTION_MASK_TOC_FUSION; + + /* ISA 3.0 vector instructions include ISA 2.07. */ + if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR) + { + /* We prefer to not mention undocumented options in + error messages. However, if users have managed to select + power9-vector without selecting power8-vector, they + already know about undocumented flags. */ + if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) && + (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)) + error ("-mpower9-vector requires -mpower8-vector"); + else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0) + { + rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR; + if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) + rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; + } + else + { + /* OPTION_MASK_P9_VECTOR is explicit and + OPTION_MASK_P8_VECTOR is not explicit. */ + rs6000_isa_flags |= OPTION_MASK_P8_VECTOR; + rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; + } + } + + /* -mpower9-dform turns on both -mpower9-dform-scalar and + -mpower9-dform-vector. */ + if (TARGET_P9_DFORM_BOTH > 0) + { + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR)) + rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR; + + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR)) + rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR; + } + else if (TARGET_P9_DFORM_BOTH == 0) + { + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR)) + rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR; + + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR)) + rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; + } + + /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */ + if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR) + { + /* We prefer to not mention undocumented options in + error messages. However, if users have managed to select + power9-dform without selecting power9-vector, they + already know about undocumented flags. */ + if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) + && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR + | OPTION_MASK_P9_DFORM_VECTOR))) + error ("-mpower9-dform requires -mpower9-vector"); + else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) + { + rs6000_isa_flags &= + ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); + rs6000_isa_flags_explicit |= + (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); + } + else + { + /* We know that OPTION_MASK_P9_VECTOR is not explicit and + OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR + may be explicit. */ + rs6000_isa_flags |= OPTION_MASK_P9_VECTOR; + rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; + } + } + + if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) + && !TARGET_DIRECT_MOVE) + { + /* We prefer to not mention undocumented options in + error messages. However, if users have managed to select + power9-dform without selecting direct-move, they + already know about undocumented flags. */ + if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) + && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) || + (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) || + (TARGET_P9_DFORM_BOTH == 1))) + error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar" + " require -mdirect-move"); + else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0) + { + rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE; + rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE; + } + else + { + rs6000_isa_flags &= + ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); + rs6000_isa_flags_explicit |= + (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); + } + } + + if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF) + { + /* We prefer to not mention undocumented options in + error messages. However, if users have managed to select + power9-dform without selecting upper-regs-df, they + already know about undocumented flags. */ + if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) + error ("-mpower9-dform requires -mupper-regs-df"); + rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; + } + + if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) + error ("-mpower9-dform requires -mupper-regs-sf"); + rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; + } + + /* Enable LRA by default. */ + if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0) + rs6000_isa_flags |= OPTION_MASK_LRA; + + /* There have been bugs with -mvsx-timode that don't show up with -mlra, + but do show up with -mno-lra. Given -mlra will become the default once + PR 69847 is fixed, turn off the options with problems by default if + -mno-lra was used, and warn if the user explicitly asked for the option. + + Enable -mpower9-dform-vector by default if LRA and other power9 options. + Enable -mvsx-timode by default if LRA and VSX. */ + if (!TARGET_LRA) + { + if (TARGET_VSX_TIMODE) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0) + warning (0, "-mvsx-timode might need -mlra"); + + else + rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; + } + } + + else + { + if (TARGET_VSX && !TARGET_VSX_TIMODE + && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0) + rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE; + } + + /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 + support. If we only have ISA 2.06 support, and the user did not specify + the switch, leave it set to -1 so the movmisalign patterns are enabled, + but we don't enable the full vectorization support */ + if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) + TARGET_ALLOW_MOVMISALIGN = 1; + + else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) + { + if (TARGET_ALLOW_MOVMISALIGN > 0 + && global_options_set.x_TARGET_ALLOW_MOVMISALIGN) + error ("-mallow-movmisalign requires -mvsx"); + + TARGET_ALLOW_MOVMISALIGN = 0; + } + + /* Determine when unaligned vector accesses are permitted, and when + they are preferred over masked Altivec loads. Note that if + TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then + TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is + not true. */ + if (TARGET_EFFICIENT_UNALIGNED_VSX) + { + if (!TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mvsx"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + + else if (!TARGET_ALLOW_MOVMISALIGN) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mallow-movmisalign"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + } + + /* Check whether we should allow small integers into VSX registers. We + require direct move to prevent the register allocator from having to move + variables through memory to do moves. SImode can be used on ISA 2.07, + while HImode and QImode require ISA 3.0. */ + if (TARGET_VSX_SMALL_INTEGER + && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI)) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER) + error ("-mvsx-small-integer requires -mpower8-vector, " + "-mupper-regs-di, and -mdirect-move"); + + rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER; + } + + /* Set long double size before the IEEE 128-bit tests. */ + if (!global_options_set.x_rs6000_long_double_type_size) + { + if (main_target_opt != NULL + && (main_target_opt->x_rs6000_long_double_type_size + != RS6000_DEFAULT_LONG_DOUBLE_SIZE)) + error ("target attribute or pragma changes long double size"); + else + rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE; + } + + /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin + explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not + pick up this default. */ +#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) + if (!global_options_set.x_rs6000_ieeequad) + rs6000_ieeequad = 1; +#endif + + /* Enable the default support for IEEE 128-bit floating point on Linux VSX + sytems, but don't enable the __float128 keyword. */ + if (TARGET_VSX && TARGET_LONG_DOUBLE_128 + && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD) + && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0)) + rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE; + + /* IEEE 128-bit floating point requires VSX support. */ + if (!TARGET_VSX) + { + if (TARGET_FLOAT128_KEYWORD) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) + error ("-mfloat128 requires VSX support"); + + rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE + | OPTION_MASK_FLOAT128_KEYWORD + | OPTION_MASK_FLOAT128_HW); + } + + else if (TARGET_FLOAT128_TYPE) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0) + error ("-mfloat128-type requires VSX support"); + + rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE + | OPTION_MASK_FLOAT128_KEYWORD + | OPTION_MASK_FLOAT128_HW); + } + } + + /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE + 128-bit floating point support to be enabled. */ + if (!TARGET_FLOAT128_TYPE) + { + if (TARGET_FLOAT128_KEYWORD) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) + { + error ("-mfloat128 requires -mfloat128-type"); + rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE + | OPTION_MASK_FLOAT128_KEYWORD + | OPTION_MASK_FLOAT128_HW); + } + else + rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE; + } + + if (TARGET_FLOAT128_HW) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) + { + error ("-mfloat128-hardware requires -mfloat128-type"); + rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; + } + else + rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE + | OPTION_MASK_FLOAT128_KEYWORD + | OPTION_MASK_FLOAT128_HW); + } + } + + /* If we have -mfloat128-type and full ISA 3.0 support, enable + -mfloat128-hardware by default. However, don't enable the __float128 + keyword. If the user explicitly turned on -mfloat128-hardware, enable the + -mfloat128 option as well if it was not already set. */ + if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW + && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE + && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW)) + rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW; + + if (TARGET_FLOAT128_HW + && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) + error ("-mfloat128-hardware requires full ISA 3.0 support"); + + rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; + } + + if (TARGET_FLOAT128_HW && !TARGET_64BIT) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) + error ("-mfloat128-hardware requires -m64"); + + rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; + } + + if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD + && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0 + && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0) + rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD; + + /* Print the options after updating the defaults. */ + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) + rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); + + /* E500mc does "better" if we inline more aggressively. Respect the + user's opinion, though. */ + if (rs6000_block_move_inline_limit == 0 + && (rs6000_cpu == PROCESSOR_PPCE500MC + || rs6000_cpu == PROCESSOR_PPCE500MC64 + || rs6000_cpu == PROCESSOR_PPCE5500 + || rs6000_cpu == PROCESSOR_PPCE6500)) + rs6000_block_move_inline_limit = 128; + + /* store_one_arg depends on expand_block_move to handle at least the + size of reg_parm_stack_space. */ + if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32)) + rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32); + + if (global_init_p) + { + /* If the appropriate debug option is enabled, replace the target hooks + with debug versions that call the real version and then prints + debugging information. */ + if (TARGET_DEBUG_COST) + { + targetm.rtx_costs = rs6000_debug_rtx_costs; + targetm.address_cost = rs6000_debug_address_cost; + targetm.sched.adjust_cost = rs6000_debug_adjust_cost; + } + + if (TARGET_DEBUG_ADDR) + { + targetm.legitimate_address_p = rs6000_debug_legitimate_address_p; + targetm.legitimize_address = rs6000_debug_legitimize_address; + rs6000_secondary_reload_class_ptr + = rs6000_debug_secondary_reload_class; + rs6000_secondary_memory_needed_ptr + = rs6000_debug_secondary_memory_needed; + rs6000_cannot_change_mode_class_ptr + = rs6000_debug_cannot_change_mode_class; + rs6000_preferred_reload_class_ptr + = rs6000_debug_preferred_reload_class; + rs6000_legitimize_reload_address_ptr + = rs6000_debug_legitimize_reload_address; + rs6000_mode_dependent_address_ptr + = rs6000_debug_mode_dependent_address; + } + + if (rs6000_veclibabi_name) + { + if (strcmp (rs6000_veclibabi_name, "mass") == 0) + rs6000_veclib_handler = rs6000_builtin_vectorized_libmass; + else + { + error ("unknown vectorization library ABI type (%s) for " + "-mveclibabi= switch", rs6000_veclibabi_name); + ret = false; + } + } + } + + /* Disable VSX and Altivec silently if the user switched cpus to power7 in a + target attribute or pragma which automatically enables both options, + unless the altivec ABI was set. This is set by default for 64-bit, but + not for 32-bit. */ + if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) + rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC + | OPTION_MASK_FLOAT128_TYPE + | OPTION_MASK_FLOAT128_KEYWORD) + & ~rs6000_isa_flags_explicit); + + /* Enable Altivec ABI for AIX -maltivec. */ + if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX)) + { + if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) + error ("target attribute or pragma changes AltiVec ABI"); + else + rs6000_altivec_abi = 1; + } + + /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For + PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can + be explicitly overridden in either case. */ + if (TARGET_ELF) + { + if (!global_options_set.x_rs6000_altivec_abi + && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX)) + { + if (main_target_opt != NULL && + !main_target_opt->x_rs6000_altivec_abi) + error ("target attribute or pragma changes AltiVec ABI"); + else + rs6000_altivec_abi = 1; + } + } + + /* Set the Darwin64 ABI as default for 64-bit Darwin. + So far, the only darwin64 targets are also MACH-O. */ + if (TARGET_MACHO + && DEFAULT_ABI == ABI_DARWIN + && TARGET_64BIT) + { + if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi) + error ("target attribute or pragma changes darwin64 ABI"); + else + { + rs6000_darwin64_abi = 1; + /* Default to natural alignment, for better performance. */ + rs6000_alignment_flags = MASK_ALIGN_NATURAL; + } + } + + /* Place FP constants in the constant pool instead of TOC + if section anchors enabled. */ + if (flag_section_anchors + && !global_options_set.x_TARGET_NO_FP_IN_TOC) + TARGET_NO_FP_IN_TOC = 1; + + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) + rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags); + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif +#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS + SUBSUBTARGET_OVERRIDE_OPTIONS; +#endif +#ifdef SUB3TARGET_OVERRIDE_OPTIONS + SUB3TARGET_OVERRIDE_OPTIONS; +#endif + + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) + rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags); + + /* For the E500 family of cores, reset the single/double FP flags to let us + check that they remain constant across attributes or pragmas. Also, + clear a possible request for string instructions, not supported and which + we might have silently queried above for -Os. + + For other families, clear ISEL in case it was set implicitly. + */ + + switch (rs6000_cpu) + { + case PROCESSOR_PPC8540: + case PROCESSOR_PPC8548: + case PROCESSOR_PPCE500MC: + case PROCESSOR_PPCE500MC64: + case PROCESSOR_PPCE5500: + case PROCESSOR_PPCE6500: + + rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE; + rs6000_double_float = TARGET_E500_DOUBLE; + + rs6000_isa_flags &= ~OPTION_MASK_STRING; + + break; + + default: + + if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL)) + rs6000_isa_flags &= ~OPTION_MASK_ISEL; + + break; + } + + if (main_target_opt) + { + if (main_target_opt->x_rs6000_single_float != rs6000_single_float) + error ("target attribute or pragma changes single precision floating " + "point"); + if (main_target_opt->x_rs6000_double_float != rs6000_double_float) + error ("target attribute or pragma changes double precision floating " + "point"); + } + + /* Detect invalid option combinations with E500. */ + CHECK_E500_OPTIONS; + + rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4 + && rs6000_cpu != PROCESSOR_POWER5 + && rs6000_cpu != PROCESSOR_POWER6 + && rs6000_cpu != PROCESSOR_POWER7 + && rs6000_cpu != PROCESSOR_POWER8 + && rs6000_cpu != PROCESSOR_POWER9 + && rs6000_cpu != PROCESSOR_PPCA2 + && rs6000_cpu != PROCESSOR_CELL + && rs6000_cpu != PROCESSOR_PPC476); + rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8); + rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER6 + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8 + || rs6000_cpu == PROCESSOR_POWER9 + || rs6000_cpu == PROCESSOR_PPCE500MC + || rs6000_cpu == PROCESSOR_PPCE500MC64 + || rs6000_cpu == PROCESSOR_PPCE5500 + || rs6000_cpu == PROCESSOR_PPCE6500); + + /* Allow debug switches to override the above settings. These are set to -1 + in powerpcspe.opt to indicate the user hasn't directly set the switch. */ + if (TARGET_ALWAYS_HINT >= 0) + rs6000_always_hint = TARGET_ALWAYS_HINT; + + if (TARGET_SCHED_GROUPS >= 0) + rs6000_sched_groups = TARGET_SCHED_GROUPS; + + if (TARGET_ALIGN_BRANCH_TARGETS >= 0) + rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS; + + rs6000_sched_restricted_insns_priority + = (rs6000_sched_groups ? 1 : 0); + + /* Handle -msched-costly-dep option. */ + rs6000_sched_costly_dep + = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly); + + if (rs6000_sched_costly_dep_str) + { + if (! strcmp (rs6000_sched_costly_dep_str, "no")) + rs6000_sched_costly_dep = no_dep_costly; + else if (! strcmp (rs6000_sched_costly_dep_str, "all")) + rs6000_sched_costly_dep = all_deps_costly; + else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load")) + rs6000_sched_costly_dep = true_store_to_load_dep_costly; + else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load")) + rs6000_sched_costly_dep = store_to_load_dep_costly; + else + rs6000_sched_costly_dep = ((enum rs6000_dependence_cost) + atoi (rs6000_sched_costly_dep_str)); + } + + /* Handle -minsert-sched-nops option. */ + rs6000_sched_insert_nops + = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none); + + if (rs6000_sched_insert_nops_str) + { + if (! strcmp (rs6000_sched_insert_nops_str, "no")) + rs6000_sched_insert_nops = sched_finish_none; + else if (! strcmp (rs6000_sched_insert_nops_str, "pad")) + rs6000_sched_insert_nops = sched_finish_pad_groups; + else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact")) + rs6000_sched_insert_nops = sched_finish_regroup_exact; + else + rs6000_sched_insert_nops = ((enum rs6000_nop_insertion) + atoi (rs6000_sched_insert_nops_str)); + } + + /* Handle stack protector */ + if (!global_options_set.x_rs6000_stack_protector_guard) +#ifdef TARGET_THREAD_SSP_OFFSET + rs6000_stack_protector_guard = SSP_TLS; +#else + rs6000_stack_protector_guard = SSP_GLOBAL; +#endif + +#ifdef TARGET_THREAD_SSP_OFFSET + rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; + rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2; +#endif + + if (global_options_set.x_rs6000_stack_protector_guard_offset_str) + { + char *endp; + const char *str = rs6000_stack_protector_guard_offset_str; + + errno = 0; + long offset = strtol (str, &endp, 0); + if (!*str || *endp || errno) + error ("%qs is not a valid number " + "in -mstack-protector-guard-offset=", str); + + if (!IN_RANGE (offset, -0x8000, 0x7fff) + || (TARGET_64BIT && (offset & 3))) + error ("%qs is not a valid offset " + "in -mstack-protector-guard-offset=", str); + + rs6000_stack_protector_guard_offset = offset; + } + + if (global_options_set.x_rs6000_stack_protector_guard_reg_str) + { + const char *str = rs6000_stack_protector_guard_reg_str; + int reg = decode_reg_name (str); + + if (!IN_RANGE (reg, 1, 31)) + error ("%qs is not a valid base register " + "in -mstack-protector-guard-reg=", str); + + rs6000_stack_protector_guard_reg = reg; + } + + if (rs6000_stack_protector_guard == SSP_TLS + && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31)) + error ("-mstack-protector-guard=tls needs a valid base register"); + + if (global_init_p) + { +#ifdef TARGET_REGNAMES + /* If the user desires alternate register names, copy in the + alternate names now. */ + if (TARGET_REGNAMES) + memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names)); +#endif + + /* Set aix_struct_return last, after the ABI is determined. + If -maix-struct-return or -msvr4-struct-return was explicitly + used, don't override with the ABI default. */ + if (!global_options_set.x_aix_struct_return) + aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET); + +#if 0 + /* IBM XL compiler defaults to unsigned bitfields. */ + if (TARGET_XL_COMPAT) + flag_signed_bitfields = 0; +#endif + + if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD) + REAL_MODE_FORMAT (TFmode) = &ibm_extended_format; + + ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1); + + /* We can only guarantee the availability of DI pseudo-ops when + assembling for 64-bit targets. */ + if (!TARGET_64BIT) + { + targetm.asm_out.aligned_op.di = NULL; + targetm.asm_out.unaligned_op.di = NULL; + } + + + /* Set branch target alignment, if not optimizing for size. */ + if (!optimize_size) + { + /* Cell wants to be aligned 8byte for dual issue. Titan wants to be + aligned 8byte to avoid misprediction by the branch predictor. */ + if (rs6000_cpu == PROCESSOR_TITAN + || rs6000_cpu == PROCESSOR_CELL) + { + if (align_functions <= 0) + align_functions = 8; + if (align_jumps <= 0) + align_jumps = 8; + if (align_loops <= 0) + align_loops = 8; + } + if (rs6000_align_branch_targets) + { + if (align_functions <= 0) + align_functions = 16; + if (align_jumps <= 0) + align_jumps = 16; + if (align_loops <= 0) + { + can_override_loop_align = 1; + align_loops = 16; + } + } + if (align_jumps_max_skip <= 0) + align_jumps_max_skip = 15; + if (align_loops_max_skip <= 0) + align_loops_max_skip = 15; + } + + /* Arrange to save and restore machine status around nested functions. */ + init_machine_status = rs6000_init_machine_status; + + /* We should always be splitting complex arguments, but we can't break + Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */ + if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) + targetm.calls.split_complex_arg = NULL; + + /* The AIX and ELFv1 ABIs define standard function descriptors. */ + if (DEFAULT_ABI == ABI_AIX) + targetm.calls.custom_function_descriptors = 0; + } + + /* Initialize rs6000_cost with the appropriate target costs. */ + if (optimize_size) + rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; + else + switch (rs6000_cpu) + { + case PROCESSOR_RS64A: + rs6000_cost = &rs64a_cost; + break; + + case PROCESSOR_MPCCORE: + rs6000_cost = &mpccore_cost; + break; + + case PROCESSOR_PPC403: + rs6000_cost = &ppc403_cost; + break; + + case PROCESSOR_PPC405: + rs6000_cost = &ppc405_cost; + break; + + case PROCESSOR_PPC440: + rs6000_cost = &ppc440_cost; + break; + + case PROCESSOR_PPC476: + rs6000_cost = &ppc476_cost; + break; + + case PROCESSOR_PPC601: + rs6000_cost = &ppc601_cost; + break; + + case PROCESSOR_PPC603: + rs6000_cost = &ppc603_cost; + break; + + case PROCESSOR_PPC604: + rs6000_cost = &ppc604_cost; + break; + + case PROCESSOR_PPC604e: + rs6000_cost = &ppc604e_cost; + break; + + case PROCESSOR_PPC620: + rs6000_cost = &ppc620_cost; + break; + + case PROCESSOR_PPC630: + rs6000_cost = &ppc630_cost; + break; + + case PROCESSOR_CELL: + rs6000_cost = &ppccell_cost; + break; + + case PROCESSOR_PPC750: + case PROCESSOR_PPC7400: + rs6000_cost = &ppc750_cost; + break; + + case PROCESSOR_PPC7450: + rs6000_cost = &ppc7450_cost; + break; + + case PROCESSOR_PPC8540: + case PROCESSOR_PPC8548: + rs6000_cost = &ppc8540_cost; + break; + + case PROCESSOR_PPCE300C2: + case PROCESSOR_PPCE300C3: + rs6000_cost = &ppce300c2c3_cost; + break; + + case PROCESSOR_PPCE500MC: + rs6000_cost = &ppce500mc_cost; + break; + + case PROCESSOR_PPCE500MC64: + rs6000_cost = &ppce500mc64_cost; + break; + + case PROCESSOR_PPCE5500: + rs6000_cost = &ppce5500_cost; + break; + + case PROCESSOR_PPCE6500: + rs6000_cost = &ppce6500_cost; + break; + + case PROCESSOR_TITAN: + rs6000_cost = &titan_cost; + break; + + case PROCESSOR_POWER4: + case PROCESSOR_POWER5: + rs6000_cost = &power4_cost; + break; + + case PROCESSOR_POWER6: + rs6000_cost = &power6_cost; + break; + + case PROCESSOR_POWER7: + rs6000_cost = &power7_cost; + break; + + case PROCESSOR_POWER8: + rs6000_cost = &power8_cost; + break; + + case PROCESSOR_POWER9: + rs6000_cost = &power9_cost; + break; + + case PROCESSOR_PPCA2: + rs6000_cost = &ppca2_cost; + break; + + default: + gcc_unreachable (); + } + + if (global_init_p) + { + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, + rs6000_cost->simultaneous_prefetches, + global_options.x_param_values, + global_options_set.x_param_values); + maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size, + global_options.x_param_values, + global_options_set.x_param_values); + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, + rs6000_cost->cache_line_size, + global_options.x_param_values, + global_options_set.x_param_values); + maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size, + global_options.x_param_values, + global_options_set.x_param_values); + + /* Increase loop peeling limits based on performance analysis. */ + maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400, + global_options.x_param_values, + global_options_set.x_param_values); + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400, + global_options.x_param_values, + global_options_set.x_param_values); + + /* Use the 'model' -fsched-pressure algorithm by default. */ + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, + SCHED_PRESSURE_MODEL, + global_options.x_param_values, + global_options_set.x_param_values); + + /* If using typedef char *va_list, signal that + __builtin_va_start (&ap, 0) can be optimized to + ap = __builtin_next_arg (0). */ + if (DEFAULT_ABI != ABI_V4) + targetm.expand_builtin_va_start = NULL; + } + + /* Set up single/double float flags. + If TARGET_HARD_FLOAT is set, but neither single or double is set, + then set both flags. */ + if (TARGET_HARD_FLOAT && TARGET_FPRS + && rs6000_single_float == 0 && rs6000_double_float == 0) + rs6000_single_float = rs6000_double_float = 1; + + /* If not explicitly specified via option, decide whether to generate indexed + load/store instructions. A value of -1 indicates that the + initial value of this variable has not been overwritten. During + compilation, TARGET_AVOID_XFORM is either 0 or 1. */ + if (TARGET_AVOID_XFORM == -1) + /* Avoid indexed addressing when targeting Power6 in order to avoid the + DERAT mispredict penalty. However the LVE and STVE altivec instructions + need indexed accesses and the type used is the scalar type of the element + being loaded or stored. */ + TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB + && !TARGET_ALTIVEC); + + /* Set the -mrecip options. */ + if (rs6000_recip_name) + { + char *p = ASTRDUP (rs6000_recip_name); + char *q; + unsigned int mask, i; + bool invert; + + while ((q = strtok (p, ",")) != NULL) + { + p = NULL; + if (*q == '!') + { + invert = true; + q++; + } + else + invert = false; + + if (!strcmp (q, "default")) + mask = ((TARGET_RECIP_PRECISION) + ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION); + else + { + for (i = 0; i < ARRAY_SIZE (recip_options); i++) + if (!strcmp (q, recip_options[i].string)) + { + mask = recip_options[i].mask; + break; + } + + if (i == ARRAY_SIZE (recip_options)) + { + error ("unknown option for -mrecip=%s", q); + invert = false; + mask = 0; + ret = false; + } + } + + if (invert) + rs6000_recip_control &= ~mask; + else + rs6000_recip_control |= mask; + } + } + + /* Set the builtin mask of the various options used that could affect which + builtins were used. In the past we used target_flags, but we've run out + of bits, and some options like SPE and PAIRED are no longer in + target_flags. */ + rs6000_builtin_mask = rs6000_builtin_mask_calculate (); + if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) + rs6000_print_builtin_options (stderr, 0, "builtin mask", + rs6000_builtin_mask); + + /* Initialize all of the registers. */ + rs6000_init_hard_regno_mode_ok (global_init_p); + + /* Save the initial options in case the user does function specific options */ + if (global_init_p) + target_option_default_node = target_option_current_node + = build_target_option_node (&global_options); + + /* If not explicitly specified via option, decide whether to generate the + extra blr's required to preserve the link stack on some cpus (eg, 476). */ + if (TARGET_LINK_STACK == -1) + SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic); + + return ret; +} + +/* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to + define the target cpu type. */ + +static void +rs6000_option_override (void) +{ + (void) rs6000_option_override_internal (true); +} + + +/* Implement targetm.vectorize.builtin_mask_for_load. */ +static tree +rs6000_builtin_mask_for_load (void) +{ + /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ + if ((TARGET_ALTIVEC && !TARGET_VSX) + || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) + return altivec_builtin_mask_for_load; + else + return 0; +} + +/* Implement LOOP_ALIGN. */ +int +rs6000_loop_align (rtx label) +{ + basic_block bb; + int ninsns; + + /* Don't override loop alignment if -falign-loops was specified. */ + if (!can_override_loop_align) + return align_loops_log; + + bb = BLOCK_FOR_INSN (label); + ninsns = num_loop_insns(bb->loop_father); + + /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */ + if (ninsns > 4 && ninsns <= 8 + && (rs6000_cpu == PROCESSOR_POWER4 + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER6 + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8 + || rs6000_cpu == PROCESSOR_POWER9)) + return 5; + else + return align_loops_log; +} + +/* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */ +static int +rs6000_loop_align_max_skip (rtx_insn *label) +{ + return (1 << rs6000_loop_align (label)) - 1; +} + +/* Return true iff, data reference of TYPE can reach vector alignment (16) + after applying N number of iterations. This routine does not determine + how may iterations are required to reach desired alignment. */ + +static bool +rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed) +{ + if (is_packed) + return false; + + if (TARGET_32BIT) + { + if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) + return true; + + if (rs6000_alignment_flags == MASK_ALIGN_POWER) + return true; + + return false; + } + else + { + if (TARGET_MACHO) + return false; + + /* Assuming that all other types are naturally aligned. CHECKME! */ + return true; + } +} + +/* Return true if the vector misalignment factor is supported by the + target. */ +static bool +rs6000_builtin_support_vector_misalignment (machine_mode mode, + const_tree type, + int misalignment, + bool is_packed) +{ + if (TARGET_VSX) + { + if (TARGET_EFFICIENT_UNALIGNED_VSX) + return true; + + /* Return if movmisalign pattern is not supported for this mode. */ + if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) + return false; + + if (misalignment == -1) + { + /* Misalignment factor is unknown at compile time but we know + it's word aligned. */ + if (rs6000_vector_alignment_reachable (type, is_packed)) + { + int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type)); + + if (element_size == 64 || element_size == 32) + return true; + } + + return false; + } + + /* VSX supports word-aligned vector. */ + if (misalignment % 4 == 0) + return true; + } + return false; +} + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, int misalign) +{ + unsigned elements; + tree elem_type; + + switch (type_of_cost) + { + case scalar_stmt: + case scalar_load: + case scalar_store: + case vector_stmt: + case vector_load: + case vector_store: + case vec_to_scalar: + case scalar_to_vec: + case cond_branch_not_taken: + return 1; + + case vec_perm: + if (TARGET_VSX) + return 3; + else + return 1; + + case vec_promote_demote: + if (TARGET_VSX) + return 4; + else + return 1; + + case cond_branch_taken: + return 3; + + case unaligned_load: + if (TARGET_P9_VECTOR) + return 3; + + if (TARGET_EFFICIENT_UNALIGNED_VSX) + return 1; + + if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) + { + elements = TYPE_VECTOR_SUBPARTS (vectype); + if (elements == 2) + /* Double word aligned. */ + return 2; + + if (elements == 4) + { + switch (misalign) + { + case 8: + /* Double word aligned. */ + return 2; + + case -1: + /* Unknown misalignment. */ + case 4: + case 12: + /* Word aligned. */ + return 22; + + default: + gcc_unreachable (); + } + } + } + + if (TARGET_ALTIVEC) + /* Misaligned loads are not supported. */ + gcc_unreachable (); + + return 2; + + case unaligned_store: + if (TARGET_EFFICIENT_UNALIGNED_VSX) + return 1; + + if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) + { + elements = TYPE_VECTOR_SUBPARTS (vectype); + if (elements == 2) + /* Double word aligned. */ + return 2; + + if (elements == 4) + { + switch (misalign) + { + case 8: + /* Double word aligned. */ + return 2; + + case -1: + /* Unknown misalignment. */ + case 4: + case 12: + /* Word aligned. */ + return 23; + + default: + gcc_unreachable (); + } + } + } + + if (TARGET_ALTIVEC) + /* Misaligned stores are not supported. */ + gcc_unreachable (); + + return 2; + + case vec_construct: + /* This is a rough approximation assuming non-constant elements + constructed into a vector via element insertion. FIXME: + vec_construct is not granular enough for uniformly good + decisions. If the initialization is a splat, this is + cheaper than we estimate. Improve this someday. */ + elem_type = TREE_TYPE (vectype); + /* 32-bit vectors loaded into registers are stored as double + precision, so we need 2 permutes, 2 converts, and 1 merge + to construct a vector of short floats from them. */ + if (SCALAR_FLOAT_TYPE_P (elem_type) + && TYPE_PRECISION (elem_type) == 32) + return 5; + /* On POWER9, integer vector types are built up in GPRs and then + use a direct move (2 cycles). For POWER8 this is even worse, + as we need two direct moves and a merge, and the direct moves + are five cycles. */ + else if (INTEGRAL_TYPE_P (elem_type)) + { + if (TARGET_P9_VECTOR) + return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2; + else + return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11; + } + else + /* V2DFmode doesn't need a direct move. */ + return 2; + + default: + gcc_unreachable (); + } +} + +/* Implement targetm.vectorize.preferred_simd_mode. */ + +static machine_mode +rs6000_preferred_simd_mode (machine_mode mode) +{ + if (TARGET_VSX) + switch (mode) + { + case DFmode: + return V2DFmode; + default:; + } + if (TARGET_ALTIVEC || TARGET_VSX) + switch (mode) + { + case SFmode: + return V4SFmode; + case TImode: + return V1TImode; + case DImode: + return V2DImode; + case SImode: + return V4SImode; + case HImode: + return V8HImode; + case QImode: + return V16QImode; + default:; + } + if (TARGET_SPE) + switch (mode) + { + case SFmode: + return V2SFmode; + case SImode: + return V2SImode; + default:; + } + if (TARGET_PAIRED_FLOAT + && mode == SFmode) + return V2SFmode; + return word_mode; +} + +typedef struct _rs6000_cost_data +{ + struct loop *loop_info; + unsigned cost[3]; +} rs6000_cost_data; + +/* Test for likely overcommitment of vector hardware resources. If a + loop iteration is relatively large, and too large a percentage of + instructions in the loop are vectorized, the cost model may not + adequately reflect delays from unavailable vector resources. + Penalize the loop body cost for this case. */ + +static void +rs6000_density_test (rs6000_cost_data *data) +{ + const int DENSITY_PCT_THRESHOLD = 85; + const int DENSITY_SIZE_THRESHOLD = 70; + const int DENSITY_PENALTY = 10; + struct loop *loop = data->loop_info; + basic_block *bbs = get_loop_body (loop); + int nbbs = loop->num_nodes; + int vec_cost = data->cost[vect_body], not_vec_cost = 0; + int i, density_pct; + + for (i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_IN_PATTERN_P (stmt_info)) + not_vec_cost++; + } + } + + free (bbs); + density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost); + + if (density_pct > DENSITY_PCT_THRESHOLD + && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD) + { + data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "density %d%%, cost %d exceeds threshold, penalizing " + "loop body cost by %d%%", density_pct, + vec_cost + not_vec_cost, DENSITY_PENALTY); + } +} + +/* Implement targetm.vectorize.init_cost. */ + +/* For each vectorized loop, this var holds TRUE iff a non-memory vector + instruction is needed by the vectorization. */ +static bool rs6000_vect_nonmem; + +static void * +rs6000_init_cost (struct loop *loop_info) +{ + rs6000_cost_data *data = XNEW (struct _rs6000_cost_data); + data->loop_info = loop_info; + data->cost[vect_prologue] = 0; + data->cost[vect_body] = 0; + data->cost[vect_epilogue] = 0; + rs6000_vect_nonmem = false; + return data; +} + +/* Implement targetm.vectorize.add_stmt_cost. */ + +static unsigned +rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, int misalign, + enum vect_cost_model_location where) +{ + rs6000_cost_data *cost_data = (rs6000_cost_data*) data; + unsigned retval = 0; + + if (flag_vect_cost_model) + { + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype, + misalign); + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ + + retval = (unsigned) (count * stmt_cost); + cost_data->cost[where] += retval; + + /* Check whether we're doing something other than just a copy loop. + Not all such loops may be profitably vectorized; see + rs6000_finish_cost. */ + if ((kind == vec_to_scalar || kind == vec_perm + || kind == vec_promote_demote || kind == vec_construct + || kind == scalar_to_vec) + || (where == vect_body && kind == vector_stmt)) + rs6000_vect_nonmem = true; + } + + return retval; +} + +/* Implement targetm.vectorize.finish_cost. */ + +static void +rs6000_finish_cost (void *data, unsigned *prologue_cost, + unsigned *body_cost, unsigned *epilogue_cost) +{ + rs6000_cost_data *cost_data = (rs6000_cost_data*) data; + + if (cost_data->loop_info) + rs6000_density_test (cost_data); + + /* Don't vectorize minimum-vectorization-factor, simple copy loops + that require versioning for any reason. The vectorization is at + best a wash inside the loop, and the versioning checks make + profitability highly unlikely and potentially quite harmful. */ + if (cost_data->loop_info) + { + loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info); + if (!rs6000_vect_nonmem + && LOOP_VINFO_VECT_FACTOR (vec_info) == 2 + && LOOP_REQUIRES_VERSIONING (vec_info)) + cost_data->cost[vect_body] += 10000; + } + + *prologue_cost = cost_data->cost[vect_prologue]; + *body_cost = cost_data->cost[vect_body]; + *epilogue_cost = cost_data->cost[vect_epilogue]; +} + +/* Implement targetm.vectorize.destroy_cost_data. */ + +static void +rs6000_destroy_cost_data (void *data) +{ + free (data); +} + +/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a + library with vectorized intrinsics. */ + +static tree +rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out, + tree type_in) +{ + char name[32]; + const char *suffix = NULL; + tree fntype, new_fndecl, bdecl = NULL_TREE; + int n_args = 1; + const char *bname; + machine_mode el_mode, in_mode; + int n, in_n; + + /* Libmass is suitable for unsafe math only as it does not correctly support + parts of IEEE with the required precision such as denormals. Only support + it if we have VSX to use the simd d2 or f4 functions. + XXX: Add variable length support. */ + if (!flag_unsafe_math_optimizations || !TARGET_VSX) + return NULL_TREE; + + el_mode = TYPE_MODE (TREE_TYPE (type_out)); + n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (el_mode != in_mode + || n != in_n) + return NULL_TREE; + + switch (fn) + { + CASE_CFN_ATAN2: + CASE_CFN_HYPOT: + CASE_CFN_POW: + n_args = 2; + gcc_fallthrough (); + + CASE_CFN_ACOS: + CASE_CFN_ACOSH: + CASE_CFN_ASIN: + CASE_CFN_ASINH: + CASE_CFN_ATAN: + CASE_CFN_ATANH: + CASE_CFN_CBRT: + CASE_CFN_COS: + CASE_CFN_COSH: + CASE_CFN_ERF: + CASE_CFN_ERFC: + CASE_CFN_EXP2: + CASE_CFN_EXP: + CASE_CFN_EXPM1: + CASE_CFN_LGAMMA: + CASE_CFN_LOG10: + CASE_CFN_LOG1P: + CASE_CFN_LOG2: + CASE_CFN_LOG: + CASE_CFN_SIN: + CASE_CFN_SINH: + CASE_CFN_SQRT: + CASE_CFN_TAN: + CASE_CFN_TANH: + if (el_mode == DFmode && n == 2) + { + bdecl = mathfn_built_in (double_type_node, fn); + suffix = "d2"; /* pow -> powd2 */ + } + else if (el_mode == SFmode && n == 4) + { + bdecl = mathfn_built_in (float_type_node, fn); + suffix = "4"; /* powf -> powf4 */ + } + else + return NULL_TREE; + if (!bdecl) + return NULL_TREE; + break; + + default: + return NULL_TREE; + } + + gcc_assert (suffix != NULL); + bname = IDENTIFIER_POINTER (DECL_NAME (bdecl)); + if (!bname) + return NULL_TREE; + + strcpy (name, bname + sizeof ("__builtin_") - 1); + strcat (name, suffix); + + if (n_args == 1) + fntype = build_function_type_list (type_out, type_in, NULL); + else if (n_args == 2) + fntype = build_function_type_list (type_out, type_in, type_in, NULL); + else + gcc_unreachable (); + + /* Build a function declaration for the vectorized function. */ + new_fndecl = build_decl (BUILTINS_LOCATION, + FUNCTION_DECL, get_identifier (name), fntype); + TREE_PUBLIC (new_fndecl) = 1; + DECL_EXTERNAL (new_fndecl) = 1; + DECL_IS_NOVOPS (new_fndecl) = 1; + TREE_READONLY (new_fndecl) = 1; + + return new_fndecl; +} + +/* Returns a function decl for a vectorized version of the builtin function + with builtin function code FN and the result vector type TYPE, or NULL_TREE + if it is not available. */ + +static tree +rs6000_builtin_vectorized_function (unsigned int fn, tree type_out, + tree type_in) +{ + machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n", + combined_fn_name (combined_fn (fn)), + GET_MODE_NAME (TYPE_MODE (type_out)), + GET_MODE_NAME (TYPE_MODE (type_in))); + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE + || !TARGET_VECTORIZE_BUILTINS) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + + switch (fn) + { + CASE_CFN_COPYSIGN: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP]; + if (VECTOR_UNIT_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF]; + break; + CASE_CFN_CEIL: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP]; + if (VECTOR_UNIT_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP]; + break; + CASE_CFN_FLOOR: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM]; + if (VECTOR_UNIT_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM]; + break; + CASE_CFN_FMA: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP]; + if (VECTOR_UNIT_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP]; + break; + CASE_CFN_TRUNC: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ]; + if (VECTOR_UNIT_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ]; + if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ]; + break; + CASE_CFN_NEARBYINT: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && flag_unsafe_math_optimizations + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI]; + if (VECTOR_UNIT_VSX_P (V4SFmode) + && flag_unsafe_math_optimizations + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI]; + break; + CASE_CFN_RINT: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && !flag_trapping_math + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC]; + if (VECTOR_UNIT_VSX_P (V4SFmode) + && !flag_trapping_math + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC]; + break; + default: + break; + } + + /* Generate calls to libmass if appropriate. */ + if (rs6000_veclib_handler) + return rs6000_veclib_handler (combined_fn (fn), type_out, type_in); + + return NULL_TREE; +} + +/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */ + +static tree +rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, + tree type_in) +{ + machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n", + IDENTIFIER_POINTER (DECL_NAME (fndecl)), + GET_MODE_NAME (TYPE_MODE (type_out)), + GET_MODE_NAME (TYPE_MODE (type_in))); + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE + || !TARGET_VECTORIZE_BUILTINS) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + + enum rs6000_builtins fn + = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + switch (fn) + { + case RS6000_BUILTIN_RSQRTF: + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP]; + break; + case RS6000_BUILTIN_RSQRT: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; + break; + case RS6000_BUILTIN_RECIPF: + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP]; + break; + case RS6000_BUILTIN_RECIP: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF]; + break; + default: + break; + } + return NULL_TREE; +} + +/* Default CPU string for rs6000*_file_start functions. */ +static const char *rs6000_default_cpu; + +/* Do anything needed at the start of the asm file. */ + +static void +rs6000_file_start (void) +{ + char buffer[80]; + const char *start = buffer; + FILE *file = asm_out_file; + + rs6000_default_cpu = TARGET_CPU_DEFAULT; + + default_file_start (); + + if (flag_verbose_asm) + { + sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START); + + if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') + { + fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu); + start = ""; + } + + if (global_options_set.x_rs6000_cpu_index) + { + fprintf (file, "%s -mcpu=%s", start, + processor_target_table[rs6000_cpu_index].name); + start = ""; + } + + if (global_options_set.x_rs6000_tune_index) + { + fprintf (file, "%s -mtune=%s", start, + processor_target_table[rs6000_tune_index].name); + start = ""; + } + + if (PPC405_ERRATUM77) + { + fprintf (file, "%s PPC405CR_ERRATUM77", start); + start = ""; + } + +#ifdef USING_ELFOS_H + switch (rs6000_sdata) + { + case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break; + case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break; + case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break; + case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break; + } + + if (rs6000_sdata && g_switch_value) + { + fprintf (file, "%s -G %d", start, + g_switch_value); + start = ""; + } +#endif + + if (*start == '\0') + putc ('\n', file); + } + +#ifdef USING_ELFOS_H + if (!(rs6000_default_cpu && rs6000_default_cpu[0]) + && !global_options_set.x_rs6000_cpu_index) + { + fputs ("\t.machine ", asm_out_file); + if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0) + fputs ("power9\n", asm_out_file); + else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0) + fputs ("power8\n", asm_out_file); + else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0) + fputs ("power7\n", asm_out_file); + else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0) + fputs ("power6\n", asm_out_file); + else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0) + fputs ("power5\n", asm_out_file); + else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0) + fputs ("power4\n", asm_out_file); + else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0) + fputs ("ppc64\n", asm_out_file); + else + fputs ("ppc\n", asm_out_file); + } +#endif + + if (DEFAULT_ABI == ABI_ELFv2) + fprintf (file, "\t.abiversion 2\n"); +} + + +/* Return nonzero if this function is known to have a null epilogue. */ + +int +direct_return (void) +{ + if (reload_completed) + { + rs6000_stack_t *info = rs6000_stack_info (); + + if (info->first_gp_reg_save == 32 + && info->first_fp_reg_save == 64 + && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1 + && ! info->lr_save_p + && ! info->cr_save_p + && info->vrsave_size == 0 + && ! info->push_p) + return 1; + } + + return 0; +} + +/* Return the number of instructions it takes to form a constant in an + integer register. */ + +int +num_insns_constant_wide (HOST_WIDE_INT value) +{ + /* signed constant loadable with addi */ + if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000) + return 1; + + /* constant loadable with addis */ + else if ((value & 0xffff) == 0 + && (value >> 31 == -1 || value >> 31 == 0)) + return 1; + + else if (TARGET_POWERPC64) + { + HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; + HOST_WIDE_INT high = value >> 31; + + if (high == 0 || high == -1) + return 2; + + high >>= 1; + + if (low == 0) + return num_insns_constant_wide (high) + 1; + else if (high == 0) + return num_insns_constant_wide (low) + 1; + else + return (num_insns_constant_wide (high) + + num_insns_constant_wide (low) + 1); + } + + else + return 2; +} + +int +num_insns_constant (rtx op, machine_mode mode) +{ + HOST_WIDE_INT low, high; + + switch (GET_CODE (op)) + { + case CONST_INT: + if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1 + && rs6000_is_valid_and_mask (op, mode)) + return 2; + else + return num_insns_constant_wide (INTVAL (op)); + + case CONST_WIDE_INT: + { + int i; + int ins = CONST_WIDE_INT_NUNITS (op) - 1; + for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++) + ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i)); + return ins; + } + + case CONST_DOUBLE: + if (mode == SFmode || mode == SDmode) + { + long l; + + if (DECIMAL_FLOAT_MODE_P (mode)) + REAL_VALUE_TO_TARGET_DECIMAL32 + (*CONST_DOUBLE_REAL_VALUE (op), l); + else + REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l); + return num_insns_constant_wide ((HOST_WIDE_INT) l); + } + + long l[2]; + if (DECIMAL_FLOAT_MODE_P (mode)) + REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l); + else + REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l); + high = l[WORDS_BIG_ENDIAN == 0]; + low = l[WORDS_BIG_ENDIAN != 0]; + + if (TARGET_32BIT) + return (num_insns_constant_wide (low) + + num_insns_constant_wide (high)); + else + { + if ((high == 0 && low >= 0) + || (high == -1 && low < 0)) + return num_insns_constant_wide (low); + + else if (rs6000_is_valid_and_mask (op, mode)) + return 2; + + else if (low == 0) + return num_insns_constant_wide (high) + 1; + + else + return (num_insns_constant_wide (high) + + num_insns_constant_wide (low) + 1); + } + + default: + gcc_unreachable (); + } +} + +/* Interpret element ELT of the CONST_VECTOR OP as an integer value. + If the mode of OP is MODE_VECTOR_INT, this simply returns the + corresponding element of the vector, but for V4SFmode and V2SFmode, + the corresponding "float" is interpreted as an SImode integer. */ + +HOST_WIDE_INT +const_vector_elt_as_int (rtx op, unsigned int elt) +{ + rtx tmp; + + /* We can't handle V2DImode and V2DFmode vector constants here yet. */ + gcc_assert (GET_MODE (op) != V2DImode + && GET_MODE (op) != V2DFmode); + + tmp = CONST_VECTOR_ELT (op, elt); + if (GET_MODE (op) == V4SFmode + || GET_MODE (op) == V2SFmode) + tmp = gen_lowpart (SImode, tmp); + return INTVAL (tmp); +} + +/* Return true if OP can be synthesized with a particular vspltisb, vspltish + or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used + depends on STEP and COPIES, one of which will be 1. If COPIES > 1, + all items are set to the same value and contain COPIES replicas of the + vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's + operand and the others are set to the value of the operand's msb. */ + +static bool +vspltis_constant (rtx op, unsigned step, unsigned copies) +{ + machine_mode mode = GET_MODE (op); + machine_mode inner = GET_MODE_INNER (mode); + + unsigned i; + unsigned nunits; + unsigned bitsize; + unsigned mask; + + HOST_WIDE_INT val; + HOST_WIDE_INT splat_val; + HOST_WIDE_INT msb_val; + + if (mode == V2DImode || mode == V2DFmode || mode == V1TImode) + return false; + + nunits = GET_MODE_NUNITS (mode); + bitsize = GET_MODE_BITSIZE (inner); + mask = GET_MODE_MASK (inner); + + val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); + splat_val = val; + msb_val = val >= 0 ? 0 : -1; + + /* Construct the value to be splatted, if possible. If not, return 0. */ + for (i = 2; i <= copies; i *= 2) + { + HOST_WIDE_INT small_val; + bitsize /= 2; + small_val = splat_val >> bitsize; + mask >>= bitsize; + if (splat_val != ((HOST_WIDE_INT) + ((unsigned HOST_WIDE_INT) small_val << bitsize) + | (small_val & mask))) + return false; + splat_val = small_val; + } + + /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ + if (EASY_VECTOR_15 (splat_val)) + ; + + /* Also check if we can splat, and then add the result to itself. Do so if + the value is positive, of if the splat instruction is using OP's mode; + for splat_val < 0, the splat and the add should use the same mode. */ + else if (EASY_VECTOR_15_ADD_SELF (splat_val) + && (splat_val >= 0 || (step == 1 && copies == 1))) + ; + + /* Also check if are loading up the most significant bit which can be done by + loading up -1 and shifting the value left by -1. */ + else if (EASY_VECTOR_MSB (splat_val, inner)) + ; + + else + return false; + + /* Check if VAL is present in every STEP-th element, and the + other elements are filled with its most significant bit. */ + for (i = 1; i < nunits; ++i) + { + HOST_WIDE_INT desired_val; + unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; + if ((i & (step - 1)) == 0) + desired_val = val; + else + desired_val = msb_val; + + if (desired_val != const_vector_elt_as_int (op, elt)) + return false; + } + + return true; +} + +/* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI + instruction, filling in the bottom elements with 0 or -1. + + Return 0 if the constant cannot be generated with VSLDOI. Return positive + for the number of zeroes to shift in, or negative for the number of 0xff + bytes to shift in. + + OP is a CONST_VECTOR. */ + +int +vspltis_shifted (rtx op) +{ + machine_mode mode = GET_MODE (op); + machine_mode inner = GET_MODE_INNER (mode); + + unsigned i, j; + unsigned nunits; + unsigned mask; + + HOST_WIDE_INT val; + + if (mode != V16QImode && mode != V8HImode && mode != V4SImode) + return false; + + /* We need to create pseudo registers to do the shift, so don't recognize + shift vector constants after reload. */ + if (!can_create_pseudo_p ()) + return false; + + nunits = GET_MODE_NUNITS (mode); + mask = GET_MODE_MASK (inner); + + val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1); + + /* Check if the value can really be the operand of a vspltis[bhw]. */ + if (EASY_VECTOR_15 (val)) + ; + + /* Also check if we are loading up the most significant bit which can be done + by loading up -1 and shifting the value left by -1. */ + else if (EASY_VECTOR_MSB (val, inner)) + ; + + else + return 0; + + /* Check if VAL is present in every STEP-th element until we find elements + that are 0 or all 1 bits. */ + for (i = 1; i < nunits; ++i) + { + unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i; + HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); + + /* If the value isn't the splat value, check for the remaining elements + being 0/-1. */ + if (val != elt_val) + { + if (elt_val == 0) + { + for (j = i+1; j < nunits; ++j) + { + unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; + if (const_vector_elt_as_int (op, elt2) != 0) + return 0; + } + + return (nunits - i) * GET_MODE_SIZE (inner); + } + + else if ((elt_val & mask) == mask) + { + for (j = i+1; j < nunits; ++j) + { + unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; + if ((const_vector_elt_as_int (op, elt2) & mask) != mask) + return 0; + } + + return -((nunits - i) * GET_MODE_SIZE (inner)); + } + + else + return 0; + } + } + + /* If all elements are equal, we don't need to do VLSDOI. */ + return 0; +} + + +/* Return true if OP is of the given MODE and can be synthesized + with a vspltisb, vspltish or vspltisw. */ + +bool +easy_altivec_constant (rtx op, machine_mode mode) +{ + unsigned step, copies; + + if (mode == VOIDmode) + mode = GET_MODE (op); + else if (mode != GET_MODE (op)) + return false; + + /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy + constants. */ + if (mode == V2DFmode) + return zero_constant (op, mode); + + else if (mode == V2DImode) + { + if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT + || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT) + return false; + + if (zero_constant (op, mode)) + return true; + + if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1 + && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) + return true; + + return false; + } + + /* V1TImode is a special container for TImode. Ignore for now. */ + else if (mode == V1TImode) + return false; + + /* Start with a vspltisw. */ + step = GET_MODE_NUNITS (mode) / 4; + copies = 1; + + if (vspltis_constant (op, step, copies)) + return true; + + /* Then try with a vspltish. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return true; + + /* And finally a vspltisb. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return true; + + if (vspltis_shifted (op) != 0) + return true; + + return false; +} + +/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose + result is OP. Abort if it is not possible. */ + +rtx +gen_easy_altivec_constant (rtx op) +{ + machine_mode mode = GET_MODE (op); + int nunits = GET_MODE_NUNITS (mode); + rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); + unsigned step = nunits / 4; + unsigned copies = 1; + + /* Start with a vspltisw. */ + if (vspltis_constant (op, step, copies)) + return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val)); + + /* Then try with a vspltish. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val)); + + /* And finally a vspltisb. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val)); + + gcc_unreachable (); +} + +/* Return true if OP is of the given MODE and can be synthesized with ISA 3.0 + instructions (xxspltib, vupkhsb/vextsb2w/vextb2d). + + Return the number of instructions needed (1 or 2) into the address pointed + via NUM_INSNS_PTR. + + Return the constant that is being split via CONSTANT_PTR. */ + +bool +xxspltib_constant_p (rtx op, + machine_mode mode, + int *num_insns_ptr, + int *constant_ptr) +{ + size_t nunits = GET_MODE_NUNITS (mode); + size_t i; + HOST_WIDE_INT value; + rtx element; + + /* Set the returned values to out of bound values. */ + *num_insns_ptr = -1; + *constant_ptr = 256; + + if (!TARGET_P9_VECTOR) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode) + return false; + + /* Handle (vec_duplicate <constant>). */ + if (GET_CODE (op) == VEC_DUPLICATE) + { + if (mode != V16QImode && mode != V8HImode && mode != V4SImode + && mode != V2DImode) + return false; + + element = XEXP (op, 0); + if (!CONST_INT_P (element)) + return false; + + value = INTVAL (element); + if (!IN_RANGE (value, -128, 127)) + return false; + } + + /* Handle (const_vector [...]). */ + else if (GET_CODE (op) == CONST_VECTOR) + { + if (mode != V16QImode && mode != V8HImode && mode != V4SImode + && mode != V2DImode) + return false; + + element = CONST_VECTOR_ELT (op, 0); + if (!CONST_INT_P (element)) + return false; + + value = INTVAL (element); + if (!IN_RANGE (value, -128, 127)) + return false; + + for (i = 1; i < nunits; i++) + { + element = CONST_VECTOR_ELT (op, i); + if (!CONST_INT_P (element)) + return false; + + if (value != INTVAL (element)) + return false; + } + } + + /* Handle integer constants being loaded into the upper part of the VSX + register as a scalar. If the value isn't 0/-1, only allow it if the mode + can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */ + else if (CONST_INT_P (op)) + { + if (!SCALAR_INT_MODE_P (mode)) + return false; + + value = INTVAL (op); + if (!IN_RANGE (value, -128, 127)) + return false; + + if (!IN_RANGE (value, -1, 0)) + { + if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID)) + return false; + + if (EASY_VECTOR_15 (value)) + return false; + } + } + + else + return false; + + /* See if we could generate vspltisw/vspltish directly instead of xxspltib + + sign extend. Special case 0/-1 to allow getting any VSX register instead + of an Altivec register. */ + if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0) + && EASY_VECTOR_15 (value)) + return false; + + /* Return # of instructions and the constant byte for XXSPLTIB. */ + if (mode == V16QImode) + *num_insns_ptr = 1; + + else if (IN_RANGE (value, -1, 0)) + *num_insns_ptr = 1; + + else + *num_insns_ptr = 2; + + *constant_ptr = (int) value; + return true; +} + +const char * +output_vec_const_move (rtx *operands) +{ + int cst, cst2, shift; + machine_mode mode; + rtx dest, vec; + + dest = operands[0]; + vec = operands[1]; + mode = GET_MODE (dest); + + if (TARGET_VSX) + { + bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest)); + int xxspltib_value = 256; + int num_insns = -1; + + if (zero_constant (vec, mode)) + { + if (TARGET_P9_VECTOR) + return "xxspltib %x0,0"; + + else if (dest_vmx_p) + return "vspltisw %0,0"; + + else + return "xxlxor %x0,%x0,%x0"; + } + + if (all_ones_constant (vec, mode)) + { + if (TARGET_P9_VECTOR) + return "xxspltib %x0,255"; + + else if (dest_vmx_p) + return "vspltisw %0,-1"; + + else if (TARGET_P8_VECTOR) + return "xxlorc %x0,%x0,%x0"; + + else + gcc_unreachable (); + } + + if (TARGET_P9_VECTOR + && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) + { + if (num_insns == 1) + { + operands[2] = GEN_INT (xxspltib_value & 0xff); + return "xxspltib %x0,%2"; + } + + return "#"; + } + } + + if (TARGET_ALTIVEC) + { + rtx splat_vec; + + gcc_assert (ALTIVEC_REGNO_P (REGNO (dest))); + if (zero_constant (vec, mode)) + return "vspltisw %0,0"; + + if (all_ones_constant (vec, mode)) + return "vspltisw %0,-1"; + + /* Do we need to construct a value using VSLDOI? */ + shift = vspltis_shifted (vec); + if (shift != 0) + return "#"; + + splat_vec = gen_easy_altivec_constant (vec); + gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); + operands[1] = XEXP (splat_vec, 0); + if (!EASY_VECTOR_15 (INTVAL (operands[1]))) + return "#"; + + switch (GET_MODE (splat_vec)) + { + case V4SImode: + return "vspltisw %0,%1"; + + case V8HImode: + return "vspltish %0,%1"; + + case V16QImode: + return "vspltisb %0,%1"; + + default: + gcc_unreachable (); + } + } + + gcc_assert (TARGET_SPE); + + /* Vector constant 0 is handled as a splitter of V2SI, and in the + pattern of V1DI, V4HI, and V2SF. + + FIXME: We should probably return # and add post reload + splitters for these, but this way is so easy ;-). */ + cst = INTVAL (CONST_VECTOR_ELT (vec, 0)); + cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1)); + operands[1] = CONST_VECTOR_ELT (vec, 0); + operands[2] = CONST_VECTOR_ELT (vec, 1); + if (cst == cst2) + return "li %0,%1\n\tevmergelo %0,%0,%0"; + else if (WORDS_BIG_ENDIAN) + return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2"; + else + return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1"; +} + +/* Initialize TARGET of vector PAIRED to VALS. */ + +void +paired_expand_vector_init (rtx target, rtx vals) +{ + machine_mode mode = GET_MODE (target); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0; + rtx x, new_rtx, tmp, constant_op, op1, op2; + int i; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) + ++n_var; + } + if (n_var == 0) + { + /* Load from constant pool. */ + emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); + return; + } + + if (n_var == 2) + { + /* The vector is initialized only with non-constants. */ + new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0), + XVECEXP (vals, 0, 1)); + + emit_move_insn (target, new_rtx); + return; + } + + /* One field is non-constant and the other one is a constant. Load the + constant from the constant pool and use ps_merge instruction to + construct the whole vector. */ + op1 = XVECEXP (vals, 0, 0); + op2 = XVECEXP (vals, 0, 1); + + constant_op = (CONSTANT_P (op1)) ? op1 : op2; + + tmp = gen_reg_rtx (GET_MODE (constant_op)); + emit_move_insn (tmp, constant_op); + + if (CONSTANT_P (op1)) + new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2); + else + new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp); + + emit_move_insn (target, new_rtx); +} + +void +paired_expand_vector_move (rtx operands[]) +{ + rtx op0 = operands[0], op1 = operands[1]; + + emit_move_insn (op0, op1); +} + +/* Emit vector compare for code RCODE. DEST is destination, OP1 and + OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two + operands for the relation operation COND. This is a recursive + function. */ + +static void +paired_emit_vector_compare (enum rtx_code rcode, + rtx dest, rtx op0, rtx op1, + rtx cc_op0, rtx cc_op1) +{ + rtx tmp = gen_reg_rtx (V2SFmode); + rtx tmp1, max, min; + + gcc_assert (TARGET_PAIRED_FLOAT); + gcc_assert (GET_MODE (op0) == GET_MODE (op1)); + + switch (rcode) + { + case LT: + case LTU: + paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1); + return; + case GE: + case GEU: + emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1)); + emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode))); + return; + case LE: + case LEU: + paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0); + return; + case GT: + paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1); + return; + case EQ: + tmp1 = gen_reg_rtx (V2SFmode); + max = gen_reg_rtx (V2SFmode); + min = gen_reg_rtx (V2SFmode); + gen_reg_rtx (V2SFmode); + + emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1)); + emit_insn (gen_selv2sf4 + (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode))); + emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0)); + emit_insn (gen_selv2sf4 + (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode))); + emit_insn (gen_subv2sf3 (tmp1, min, max)); + emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode))); + return; + case NE: + paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1); + return; + case UNLE: + paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1); + return; + case UNLT: + paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1); + return; + case UNGE: + paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1); + return; + case UNGT: + paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1); + return; + default: + gcc_unreachable (); + } + + return; +} + +/* Emit vector conditional expression. + DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. + CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ + +int +paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, + rtx cond, rtx cc_op0, rtx cc_op1) +{ + enum rtx_code rcode = GET_CODE (cond); + + if (!TARGET_PAIRED_FLOAT) + return 0; + + paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1); + + return 1; +} + +/* Initialize vector TARGET to VALS. */ + +void +rs6000_expand_vector_init (rtx target, rtx vals) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true, all_const_zero = true; + rtx x, mem; + int i; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) + ++n_var, one_var = i; + else if (x != CONST0_RTX (inner_mode)) + all_const_zero = false; + + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (n_var == 0) + { + rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); + bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); + if ((int_vector_p || TARGET_VSX) && all_const_zero) + { + /* Zero register. */ + emit_move_insn (target, CONST0_RTX (mode)); + return; + } + else if (int_vector_p && easy_vector_constant (const_vec, mode)) + { + /* Splat immediate. */ + emit_insn (gen_rtx_SET (target, const_vec)); + return; + } + else + { + /* Load from constant pool. */ + emit_move_insn (target, const_vec); + return; + } + } + + /* Double word values on VSX can use xxpermdi or lxvdsx. */ + if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) + { + rtx op[2]; + size_t i; + size_t num_elements = all_same ? 1 : 2; + for (i = 0; i < num_elements; i++) + { + op[i] = XVECEXP (vals, 0, i); + /* Just in case there is a SUBREG with a smaller mode, do a + conversion. */ + if (GET_MODE (op[i]) != inner_mode) + { + rtx tmp = gen_reg_rtx (inner_mode); + convert_move (tmp, op[i], 0); + op[i] = tmp; + } + /* Allow load with splat double word. */ + else if (MEM_P (op[i])) + { + if (!all_same) + op[i] = force_reg (inner_mode, op[i]); + } + else if (!REG_P (op[i])) + op[i] = force_reg (inner_mode, op[i]); + } + + if (all_same) + { + if (mode == V2DFmode) + emit_insn (gen_vsx_splat_v2df (target, op[0])); + else + emit_insn (gen_vsx_splat_v2di (target, op[0])); + } + else + { + if (mode == V2DFmode) + emit_insn (gen_vsx_concat_v2df (target, op[0], op[1])); + else + emit_insn (gen_vsx_concat_v2di (target, op[0], op[1])); + } + return; + } + + /* Special case initializing vector int if we are on 64-bit systems with + direct move or we have the ISA 3.0 instructions. */ + if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode) + && TARGET_DIRECT_MOVE_64BIT) + { + if (all_same) + { + rtx element0 = XVECEXP (vals, 0, 0); + if (MEM_P (element0)) + element0 = rs6000_address_for_fpconvert (element0); + else + element0 = force_reg (SImode, element0); + + if (TARGET_P9_VECTOR) + emit_insn (gen_vsx_splat_v4si (target, element0)); + else + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendsidi2 (tmp, element0)); + emit_insn (gen_vsx_splat_v4si_di (target, tmp)); + } + return; + } + else + { + rtx elements[4]; + size_t i; + + for (i = 0; i < 4; i++) + { + elements[i] = XVECEXP (vals, 0, i); + if (!CONST_INT_P (elements[i]) && !REG_P (elements[i])) + elements[i] = copy_to_mode_reg (SImode, elements[i]); + } + + emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1], + elements[2], elements[3])); + return; + } + } + + /* With single precision floating point on VSX, know that internally single + precision is actually represented as a double, and either make 2 V2DF + vectors, and convert these vectors to single precision, or do one + conversion, and splat the result to the other elements. */ + if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode)) + { + if (all_same) + { + rtx element0 = XVECEXP (vals, 0, 0); + + if (TARGET_P9_VECTOR) + { + if (MEM_P (element0)) + element0 = rs6000_address_for_fpconvert (element0); + + emit_insn (gen_vsx_splat_v4sf (target, element0)); + } + + else + { + rtx freg = gen_reg_rtx (V4SFmode); + rtx sreg = force_reg (SFmode, element0); + rtx cvt = (TARGET_XSCVDPSPN + ? gen_vsx_xscvdpspn_scalar (freg, sreg) + : gen_vsx_xscvdpsp_scalar (freg, sreg)); + + emit_insn (cvt); + emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, + const0_rtx)); + } + } + else + { + rtx dbl_even = gen_reg_rtx (V2DFmode); + rtx dbl_odd = gen_reg_rtx (V2DFmode); + rtx flt_even = gen_reg_rtx (V4SFmode); + rtx flt_odd = gen_reg_rtx (V4SFmode); + rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0)); + rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1)); + rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2)); + rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3)); + + /* Use VMRGEW if we can instead of doing a permute. */ + if (TARGET_P8_VECTOR) + { + emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2)); + emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3)); + emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); + emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd)); + else + emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even)); + } + else + { + emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1)); + emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3)); + emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); + emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); + rs6000_expand_extract_even (target, flt_even, flt_odd); + } + } + return; + } + + /* Special case initializing vector short/char that are splats if we are on + 64-bit systems with direct move. */ + if (all_same && TARGET_DIRECT_MOVE_64BIT + && (mode == V16QImode || mode == V8HImode)) + { + rtx op0 = XVECEXP (vals, 0, 0); + rtx di_tmp = gen_reg_rtx (DImode); + + if (!REG_P (op0)) + op0 = force_reg (GET_MODE_INNER (mode), op0); + + if (mode == V16QImode) + { + emit_insn (gen_zero_extendqidi2 (di_tmp, op0)); + emit_insn (gen_vsx_vspltb_di (target, di_tmp)); + return; + } + + if (mode == V8HImode) + { + emit_insn (gen_zero_extendhidi2 (di_tmp, op0)); + emit_insn (gen_vsx_vsplth_di (target, di_tmp)); + return; + } + } + + /* Store value to stack temp. Load vector element. Splat. However, splat + of 64-bit items is not supported on Altivec. */ + if (all_same && GET_MODE_SIZE (inner_mode) <= 4) + { + mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); + emit_move_insn (adjust_address_nv (mem, inner_mode, 0), + XVECEXP (vals, 0, 0)); + x = gen_rtx_UNSPEC (VOIDmode, + gen_rtvec (1, const0_rtx), UNSPEC_LVE); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (target, mem), + x))); + x = gen_rtx_VEC_SELECT (inner_mode, target, + gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (1, const0_rtx))); + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x))); + return; + } + + /* One field is non-constant. Load constant then overwrite + varying field. */ + if (n_var == 1) + { + rtx copy = copy_rtx (vals); + + /* Load constant part of vector, substitute neighboring value for + varying element. */ + XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); + rs6000_expand_vector_init (target, copy); + + /* Insert variable. */ + rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var); + return; + } + + /* Construct the vector in memory one field at a time + and load the whole vector. */ + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + for (i = 0; i < n_elts; i++) + emit_move_insn (adjust_address_nv (mem, inner_mode, + i * GET_MODE_SIZE (inner_mode)), + XVECEXP (vals, 0, i)); + emit_move_insn (target, mem); +} + +/* Set field ELT of TARGET to VAL. */ + +void +rs6000_expand_vector_set (rtx target, rtx val, int elt) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + rtx reg = gen_reg_rtx (mode); + rtx mask, mem, x; + int width = GET_MODE_SIZE (inner_mode); + int i; + + val = force_reg (GET_MODE (val), val); + + if (VECTOR_MEM_VSX_P (mode)) + { + rtx insn = NULL_RTX; + rtx elt_rtx = GEN_INT (elt); + + if (mode == V2DFmode) + insn = gen_vsx_set_v2df (target, target, val, elt_rtx); + + else if (mode == V2DImode) + insn = gen_vsx_set_v2di (target, target, val, elt_rtx); + + else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64) + { + if (mode == V4SImode) + insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx); + else if (mode == V8HImode) + insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx); + else if (mode == V16QImode) + insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx); + } + + if (insn) + { + emit_insn (insn); + return; + } + } + + /* Simplify setting single element vectors like V1TImode. */ + if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0) + { + emit_move_insn (target, gen_lowpart (mode, val)); + return; + } + + /* Load single variable value. */ + mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); + emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val); + x = gen_rtx_UNSPEC (VOIDmode, + gen_rtvec (1, const0_rtx), UNSPEC_LVE); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (reg, mem), + x))); + + /* Linear sequence. */ + mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); + for (i = 0; i < 16; ++i) + XVECEXP (mask, 0, i) = GEN_INT (i); + + /* Set permute mask to insert element into target. */ + for (i = 0; i < width; ++i) + XVECEXP (mask, 0, elt*width + i) + = GEN_INT (i + 0x10); + x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0)); + + if (BYTES_BIG_ENDIAN) + x = gen_rtx_UNSPEC (mode, + gen_rtvec (3, target, reg, + force_reg (V16QImode, x)), + UNSPEC_VPERM); + else + { + if (TARGET_P9_VECTOR) + x = gen_rtx_UNSPEC (mode, + gen_rtvec (3, target, reg, + force_reg (V16QImode, x)), + UNSPEC_VPERMR); + else + { + /* Invert selector. We prefer to generate VNAND on P8 so + that future fusion opportunities can kick in, but must + generate VNOR elsewhere. */ + rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); + rtx iorx = (TARGET_P8_VECTOR + ? gen_rtx_IOR (V16QImode, notx, notx) + : gen_rtx_AND (V16QImode, notx, notx)); + rtx tmp = gen_reg_rtx (V16QImode); + emit_insn (gen_rtx_SET (tmp, iorx)); + + /* Permute with operands reversed and adjusted selector. */ + x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), + UNSPEC_VPERM); + } + } + + emit_insn (gen_rtx_SET (target, x)); +} + +/* Extract field ELT from VEC into TARGET. */ + +void +rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) +{ + machine_mode mode = GET_MODE (vec); + machine_mode inner_mode = GET_MODE_INNER (mode); + rtx mem; + + if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt)) + { + switch (mode) + { + default: + break; + case V1TImode: + gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode); + emit_move_insn (target, gen_lowpart (TImode, vec)); + break; + case V2DFmode: + emit_insn (gen_vsx_extract_v2df (target, vec, elt)); + return; + case V2DImode: + emit_insn (gen_vsx_extract_v2di (target, vec, elt)); + return; + case V4SFmode: + emit_insn (gen_vsx_extract_v4sf (target, vec, elt)); + return; + case V16QImode: + if (TARGET_DIRECT_MOVE_64BIT) + { + emit_insn (gen_vsx_extract_v16qi (target, vec, elt)); + return; + } + else + break; + case V8HImode: + if (TARGET_DIRECT_MOVE_64BIT) + { + emit_insn (gen_vsx_extract_v8hi (target, vec, elt)); + return; + } + else + break; + case V4SImode: + if (TARGET_DIRECT_MOVE_64BIT) + { + emit_insn (gen_vsx_extract_v4si (target, vec, elt)); + return; + } + break; + } + } + else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt) + && TARGET_DIRECT_MOVE_64BIT) + { + if (GET_MODE (elt) != DImode) + { + rtx tmp = gen_reg_rtx (DImode); + convert_move (tmp, elt, 0); + elt = tmp; + } + else if (!REG_P (elt)) + elt = force_reg (DImode, elt); + + switch (mode) + { + case V2DFmode: + emit_insn (gen_vsx_extract_v2df_var (target, vec, elt)); + return; + + case V2DImode: + emit_insn (gen_vsx_extract_v2di_var (target, vec, elt)); + return; + + case V4SFmode: + emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt)); + return; + + case V4SImode: + emit_insn (gen_vsx_extract_v4si_var (target, vec, elt)); + return; + + case V8HImode: + emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt)); + return; + + case V16QImode: + emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt)); + return; + + default: + gcc_unreachable (); + } + } + + gcc_assert (CONST_INT_P (elt)); + + /* Allocate mode-sized buffer. */ + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + + emit_move_insn (mem, vec); + + /* Add offset to field within buffer matching vector element. */ + mem = adjust_address_nv (mem, inner_mode, + INTVAL (elt) * GET_MODE_SIZE (inner_mode)); + + emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); +} + +/* Helper function to return the register number of a RTX. */ +static inline int +regno_or_subregno (rtx op) +{ + if (REG_P (op)) + return REGNO (op); + else if (SUBREG_P (op)) + return subreg_regno (op); + else + gcc_unreachable (); +} + +/* Adjust a memory address (MEM) of a vector type to point to a scalar field + within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register + temporary (BASE_TMP) to fixup the address. Return the new memory address + that is valid for reads or writes to a given register (SCALAR_REG). */ + +rtx +rs6000_adjust_vec_address (rtx scalar_reg, + rtx mem, + rtx element, + rtx base_tmp, + machine_mode scalar_mode) +{ + unsigned scalar_size = GET_MODE_SIZE (scalar_mode); + rtx addr = XEXP (mem, 0); + rtx element_offset; + rtx new_addr; + bool valid_addr_p; + + /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */ + gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC); + + /* Calculate what we need to add to the address to get the element + address. */ + if (CONST_INT_P (element)) + element_offset = GEN_INT (INTVAL (element) * scalar_size); + else + { + int byte_shift = exact_log2 (scalar_size); + gcc_assert (byte_shift >= 0); + + if (byte_shift == 0) + element_offset = element; + + else + { + if (TARGET_POWERPC64) + emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift))); + else + emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift))); + + element_offset = base_tmp; + } + } + + /* Create the new address pointing to the element within the vector. If we + are adding 0, we don't have to change the address. */ + if (element_offset == const0_rtx) + new_addr = addr; + + /* A simple indirect address can be converted into a reg + offset + address. */ + else if (REG_P (addr) || SUBREG_P (addr)) + new_addr = gen_rtx_PLUS (Pmode, addr, element_offset); + + /* Optimize D-FORM addresses with constant offset with a constant element, to + include the element offset in the address directly. */ + else if (GET_CODE (addr) == PLUS) + { + rtx op0 = XEXP (addr, 0); + rtx op1 = XEXP (addr, 1); + rtx insn; + + gcc_assert (REG_P (op0) || SUBREG_P (op0)); + if (CONST_INT_P (op1) && CONST_INT_P (element_offset)) + { + HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset); + rtx offset_rtx = GEN_INT (offset); + + if (IN_RANGE (offset, -32768, 32767) + && (scalar_size < 8 || (offset & 0x3) == 0)) + new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); + else + { + emit_move_insn (base_tmp, offset_rtx); + new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); + } + } + else + { + bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1)); + bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset)); + + /* Note, ADDI requires the register being added to be a base + register. If the register was R0, load it up into the temporary + and do the add. */ + if (op1_reg_p + && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO)) + { + insn = gen_add3_insn (base_tmp, op1, element_offset); + gcc_assert (insn != NULL_RTX); + emit_insn (insn); + } + + else if (ele_reg_p + && reg_or_subregno (element_offset) != FIRST_GPR_REGNO) + { + insn = gen_add3_insn (base_tmp, element_offset, op1); + gcc_assert (insn != NULL_RTX); + emit_insn (insn); + } + + else + { + emit_move_insn (base_tmp, op1); + emit_insn (gen_add2_insn (base_tmp, element_offset)); + } + + new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); + } + } + + else + { + emit_move_insn (base_tmp, addr); + new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); + } + + /* If we have a PLUS, we need to see whether the particular register class + allows for D-FORM or X-FORM addressing. */ + if (GET_CODE (new_addr) == PLUS) + { + rtx op1 = XEXP (new_addr, 1); + addr_mask_type addr_mask; + int scalar_regno = regno_or_subregno (scalar_reg); + + gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER); + if (INT_REGNO_P (scalar_regno)) + addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR]; + + else if (FP_REGNO_P (scalar_regno)) + addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR]; + + else if (ALTIVEC_REGNO_P (scalar_regno)) + addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX]; + + else + gcc_unreachable (); + + if (REG_P (op1) || SUBREG_P (op1)) + valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0; + else + valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0; + } + + else if (REG_P (new_addr) || SUBREG_P (new_addr)) + valid_addr_p = true; + + else + valid_addr_p = false; + + if (!valid_addr_p) + { + emit_move_insn (base_tmp, new_addr); + new_addr = base_tmp; + } + + return change_address (mem, scalar_mode, new_addr); +} + +/* Split a variable vec_extract operation into the component instructions. */ + +void +rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, + rtx tmp_altivec) +{ + machine_mode mode = GET_MODE (src); + machine_mode scalar_mode = GET_MODE (dest); + unsigned scalar_size = GET_MODE_SIZE (scalar_mode); + int byte_shift = exact_log2 (scalar_size); + + gcc_assert (byte_shift >= 0); + + /* If we are given a memory address, optimize to load just the element. We + don't have to adjust the vector element number on little endian + systems. */ + if (MEM_P (src)) + { + gcc_assert (REG_P (tmp_gpr)); + emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element, + tmp_gpr, scalar_mode)); + return; + } + + else if (REG_P (src) || SUBREG_P (src)) + { + int bit_shift = byte_shift + 3; + rtx element2; + int dest_regno = regno_or_subregno (dest); + int src_regno = regno_or_subregno (src); + int element_regno = regno_or_subregno (element); + + gcc_assert (REG_P (tmp_gpr)); + + /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in + a general purpose register. */ + if (TARGET_P9_VECTOR + && (mode == V16QImode || mode == V8HImode || mode == V4SImode) + && INT_REGNO_P (dest_regno) + && ALTIVEC_REGNO_P (src_regno) + && INT_REGNO_P (element_regno)) + { + rtx dest_si = gen_rtx_REG (SImode, dest_regno); + rtx element_si = gen_rtx_REG (SImode, element_regno); + + if (mode == V16QImode) + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextublx (dest_si, element_si, src) + : gen_vextubrx (dest_si, element_si, src)); + + else if (mode == V8HImode) + { + rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); + emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextuhlx (dest_si, tmp_gpr_si, src) + : gen_vextuhrx (dest_si, tmp_gpr_si, src)); + } + + + else + { + rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); + emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx)); + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextuwlx (dest_si, tmp_gpr_si, src) + : gen_vextuwrx (dest_si, tmp_gpr_si, src)); + } + + return; + } + + + gcc_assert (REG_P (tmp_altivec)); + + /* For little endian, adjust element ordering. For V2DI/V2DF, we can use + an XOR, otherwise we need to subtract. The shift amount is so VSLO + will shift the element into the upper position (adding 3 to convert a + byte shift into a bit shift). */ + if (scalar_size == 8) + { + if (!VECTOR_ELT_ORDER_BIG) + { + emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx)); + element2 = tmp_gpr; + } + else + element2 = element; + + /* Generate RLDIC directly to shift left 6 bits and retrieve 1 + bit. */ + emit_insn (gen_rtx_SET (tmp_gpr, + gen_rtx_AND (DImode, + gen_rtx_ASHIFT (DImode, + element2, + GEN_INT (6)), + GEN_INT (64)))); + } + else + { + if (!VECTOR_ELT_ORDER_BIG) + { + rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1); + + emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1)); + emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr)); + element2 = tmp_gpr; + } + else + element2 = element; + + emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift))); + } + + /* Get the value into the lower byte of the Altivec register where VSLO + expects it. */ + if (TARGET_P9_VECTOR) + emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr)); + else if (can_create_pseudo_p ()) + emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr)); + else + { + rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); + emit_move_insn (tmp_di, tmp_gpr); + emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di)); + } + + /* Do the VSLO to get the value into the final location. */ + switch (mode) + { + case V2DFmode: + emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec)); + return; + + case V2DImode: + emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec)); + return; + + case V4SFmode: + { + rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); + rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec)); + rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); + emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, + tmp_altivec)); + + emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf)); + return; + } + + case V4SImode: + case V8HImode: + case V16QImode: + { + rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); + rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); + rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest)); + emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, + tmp_altivec)); + emit_move_insn (tmp_gpr_di, tmp_altivec_di); + emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di, + GEN_INT (64 - (8 * scalar_size)))); + return; + } + + default: + gcc_unreachable (); + } + + return; + } + else + gcc_unreachable (); + } + +/* Helper function for rs6000_split_v4si_init to build up a DImode value from + two SImode values. */ + +static void +rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp) +{ + const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff); + + if (CONST_INT_P (si1) && CONST_INT_P (si2)) + { + unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32; + unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit; + + emit_move_insn (dest, GEN_INT (const1 | const2)); + return; + } + + /* Put si1 into upper 32-bits of dest. */ + if (CONST_INT_P (si1)) + emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32)); + else + { + /* Generate RLDIC. */ + rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1)); + rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32)); + rtx mask_rtx = GEN_INT (mask_32bit << 32); + rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx); + gcc_assert (!reg_overlap_mentioned_p (dest, si1)); + emit_insn (gen_rtx_SET (dest, and_rtx)); + } + + /* Put si2 into the temporary. */ + gcc_assert (!reg_overlap_mentioned_p (dest, tmp)); + if (CONST_INT_P (si2)) + emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit)); + else + emit_insn (gen_zero_extendsidi2 (tmp, si2)); + + /* Combine the two parts. */ + emit_insn (gen_iordi3 (dest, dest, tmp)); + return; +} + +/* Split a V4SI initialization. */ + +void +rs6000_split_v4si_init (rtx operands[]) +{ + rtx dest = operands[0]; + + /* Destination is a GPR, build up the two DImode parts in place. */ + if (REG_P (dest) || SUBREG_P (dest)) + { + int d_regno = regno_or_subregno (dest); + rtx scalar1 = operands[1]; + rtx scalar2 = operands[2]; + rtx scalar3 = operands[3]; + rtx scalar4 = operands[4]; + rtx tmp1 = operands[5]; + rtx tmp2 = operands[6]; + + /* Even though we only need one temporary (plus the destination, which + has an early clobber constraint, try to use two temporaries, one for + each double word created. That way the 2nd insn scheduling pass can + rearrange things so the two parts are done in parallel. */ + if (BYTES_BIG_ENDIAN) + { + rtx di_lo = gen_rtx_REG (DImode, d_regno); + rtx di_hi = gen_rtx_REG (DImode, d_regno + 1); + rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1); + rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2); + } + else + { + rtx di_lo = gen_rtx_REG (DImode, d_regno + 1); + rtx di_hi = gen_rtx_REG (DImode, d_regno); + gcc_assert (!VECTOR_ELT_ORDER_BIG); + rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1); + rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2); + } + return; + } + + else + gcc_unreachable (); +} + +/* Return TRUE if OP is an invalid SUBREG operation on the e500. */ + +bool +invalid_e500_subreg (rtx op, machine_mode mode) +{ + if (TARGET_E500_DOUBLE) + { + /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or + subreg:TI and reg:TF. Decimal float modes are like integer + modes (only low part of each register used) for this + purpose. */ + if (GET_CODE (op) == SUBREG + && (mode == SImode || mode == DImode || mode == TImode + || mode == DDmode || mode == TDmode || mode == PTImode) + && REG_P (SUBREG_REG (op)) + && (GET_MODE (SUBREG_REG (op)) == DFmode + || GET_MODE (SUBREG_REG (op)) == TFmode + || GET_MODE (SUBREG_REG (op)) == IFmode + || GET_MODE (SUBREG_REG (op)) == KFmode)) + return true; + + /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and + reg:TI. */ + if (GET_CODE (op) == SUBREG + && (mode == DFmode || mode == TFmode || mode == IFmode + || mode == KFmode) + && REG_P (SUBREG_REG (op)) + && (GET_MODE (SUBREG_REG (op)) == DImode + || GET_MODE (SUBREG_REG (op)) == TImode + || GET_MODE (SUBREG_REG (op)) == PTImode + || GET_MODE (SUBREG_REG (op)) == DDmode + || GET_MODE (SUBREG_REG (op)) == TDmode)) + return true; + } + + if (TARGET_SPE + && GET_CODE (op) == SUBREG + && mode == SImode + && REG_P (SUBREG_REG (op)) + && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op)))) + return true; + + return false; +} + +/* Return alignment of TYPE. Existing alignment is ALIGN. HOW + selects whether the alignment is abi mandated, optional, or + both abi and optional alignment. */ + +unsigned int +rs6000_data_alignment (tree type, unsigned int align, enum data_align how) +{ + if (how != align_opt) + { + if (TREE_CODE (type) == VECTOR_TYPE) + { + if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type))) + || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))) + { + if (align < 64) + align = 64; + } + else if (align < 128) + align = 128; + } + else if (TARGET_E500_DOUBLE + && TREE_CODE (type) == REAL_TYPE + && TYPE_MODE (type) == DFmode) + { + if (align < 64) + align = 64; + } + } + + if (how != align_abi) + { + if (TREE_CODE (type) == ARRAY_TYPE + && TYPE_MODE (TREE_TYPE (type)) == QImode) + { + if (align < BITS_PER_WORD) + align = BITS_PER_WORD; + } + } + + return align; +} + +/* Previous GCC releases forced all vector types to have 16-byte alignment. */ + +bool +rs6000_special_adjust_field_align_p (tree type, unsigned int computed) +{ + if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE) + { + if (computed != 128) + { + static bool warned; + if (!warned && warn_psabi) + { + warned = true; + inform (input_location, + "the layout of aggregates containing vectors with" + " %d-byte alignment has changed in GCC 5", + computed / BITS_PER_UNIT); + } + } + /* In current GCC there is no special case. */ + return false; + } + + return false; +} + +/* AIX increases natural record alignment to doubleword if the first + field is an FP double while the FP fields remain word aligned. */ + +unsigned int +rs6000_special_round_type_align (tree type, unsigned int computed, + unsigned int specified) +{ + unsigned int align = MAX (computed, specified); + tree field = TYPE_FIELDS (type); + + /* Skip all non field decls */ + while (field != NULL && TREE_CODE (field) != FIELD_DECL) + field = DECL_CHAIN (field); + + if (field != NULL && field != type) + { + type = TREE_TYPE (field); + while (TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + + if (type != error_mark_node && TYPE_MODE (type) == DFmode) + align = MAX (align, 64); + } + + return align; +} + +/* Darwin increases record alignment to the natural alignment of + the first field. */ + +unsigned int +darwin_rs6000_special_round_type_align (tree type, unsigned int computed, + unsigned int specified) +{ + unsigned int align = MAX (computed, specified); + + if (TYPE_PACKED (type)) + return align; + + /* Find the first field, looking down into aggregates. */ + do { + tree field = TYPE_FIELDS (type); + /* Skip all non field decls */ + while (field != NULL && TREE_CODE (field) != FIELD_DECL) + field = DECL_CHAIN (field); + if (! field) + break; + /* A packed field does not contribute any extra alignment. */ + if (DECL_PACKED (field)) + return align; + type = TREE_TYPE (field); + while (TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + } while (AGGREGATE_TYPE_P (type)); + + if (! AGGREGATE_TYPE_P (type) && type != error_mark_node) + align = MAX (align, TYPE_ALIGN (type)); + + return align; +} + +/* Return 1 for an operand in small memory on V.4/eabi. */ + +int +small_data_operand (rtx op ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED) +{ +#if TARGET_ELF + rtx sym_ref; + + if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA) + return 0; + + if (DEFAULT_ABI != ABI_V4) + return 0; + + /* Vector and float memory instructions have a limited offset on the + SPE, so using a vector or float variable directly as an operand is + not useful. */ + if (TARGET_SPE + && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode))) + return 0; + + if (GET_CODE (op) == SYMBOL_REF) + sym_ref = op; + + else if (GET_CODE (op) != CONST + || GET_CODE (XEXP (op, 0)) != PLUS + || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF + || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT) + return 0; + + else + { + rtx sum = XEXP (op, 0); + HOST_WIDE_INT summand; + + /* We have to be careful here, because it is the referenced address + that must be 32k from _SDA_BASE_, not just the symbol. */ + summand = INTVAL (XEXP (sum, 1)); + if (summand < 0 || summand > g_switch_value) + return 0; + + sym_ref = XEXP (sum, 0); + } + + return SYMBOL_REF_SMALL_P (sym_ref); +#else + return 0; +#endif +} + +/* Return true if either operand is a general purpose register. */ + +bool +gpr_or_gpr_p (rtx op0, rtx op1) +{ + return ((REG_P (op0) && INT_REGNO_P (REGNO (op0))) + || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); +} + +/* Return true if this is a move direct operation between GPR registers and + floating point/VSX registers. */ + +bool +direct_move_p (rtx op0, rtx op1) +{ + int regno0, regno1; + + if (!REG_P (op0) || !REG_P (op1)) + return false; + + if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR) + return false; + + regno0 = REGNO (op0); + regno1 = REGNO (op1); + if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER) + return false; + + if (INT_REGNO_P (regno0)) + return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1); + + else if (INT_REGNO_P (regno1)) + { + if (TARGET_MFPGPR && FP_REGNO_P (regno0)) + return true; + + else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0)) + return true; + } + + return false; +} + +/* Return true if the OFFSET is valid for the quad address instructions that + use d-form (register + offset) addressing. */ + +static inline bool +quad_address_offset_p (HOST_WIDE_INT offset) +{ + return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0); +} + +/* Return true if the ADDR is an acceptable address for a quad memory + operation of mode MODE (either LQ/STQ for general purpose registers, or + LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address + is intended for LQ/STQ. If it is false, the address is intended for the ISA + 3.0 LXV/STXV instruction. */ + +bool +quad_address_p (rtx addr, machine_mode mode, bool strict) +{ + rtx op0, op1; + + if (GET_MODE_SIZE (mode) != 16) + return false; + + if (legitimate_indirect_address_p (addr, strict)) + return true; + + if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode)) + return false; + + if (GET_CODE (addr) != PLUS) + return false; + + op0 = XEXP (addr, 0); + if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict)) + return false; + + op1 = XEXP (addr, 1); + if (!CONST_INT_P (op1)) + return false; + + return quad_address_offset_p (INTVAL (op1)); +} + +/* Return true if this is a load or store quad operation. This function does + not handle the atomic quad memory instructions. */ + +bool +quad_load_store_p (rtx op0, rtx op1) +{ + bool ret; + + if (!TARGET_QUAD_MEMORY) + ret = false; + + else if (REG_P (op0) && MEM_P (op1)) + ret = (quad_int_reg_operand (op0, GET_MODE (op0)) + && quad_memory_operand (op1, GET_MODE (op1)) + && !reg_overlap_mentioned_p (op0, op1)); + + else if (MEM_P (op0) && REG_P (op1)) + ret = (quad_memory_operand (op0, GET_MODE (op0)) + && quad_int_reg_operand (op1, GET_MODE (op1))); + + else + ret = false; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n========== quad_load_store, return %s\n", + ret ? "true" : "false"); + debug_rtx (gen_rtx_SET (op0, op1)); + } + + return ret; +} + +/* Given an address, return a constant offset term if one exists. */ + +static rtx +address_offset (rtx op) +{ + if (GET_CODE (op) == PRE_INC + || GET_CODE (op) == PRE_DEC) + op = XEXP (op, 0); + else if (GET_CODE (op) == PRE_MODIFY + || GET_CODE (op) == LO_SUM) + op = XEXP (op, 1); + + if (GET_CODE (op) == CONST) + op = XEXP (op, 0); + + if (GET_CODE (op) == PLUS) + op = XEXP (op, 1); + + if (CONST_INT_P (op)) + return op; + + return NULL_RTX; +} + +/* Return true if the MEM operand is a memory operand suitable for use + with a (full width, possibly multiple) gpr load/store. On + powerpc64 this means the offset must be divisible by 4. + Implements 'Y' constraint. + + Accept direct, indexed, offset, lo_sum and tocref. Since this is + a constraint function we know the operand has satisfied a suitable + memory predicate. Also accept some odd rtl generated by reload + (see rs6000_legitimize_reload_address for various forms). It is + important that reload rtl be accepted by appropriate constraints + but not by the operand predicate. + + Offsetting a lo_sum should not be allowed, except where we know by + alignment that a 32k boundary is not crossed, but see the ??? + comment in rs6000_legitimize_reload_address. Note that by + "offsetting" here we mean a further offset to access parts of the + MEM. It's fine to have a lo_sum where the inner address is offset + from a sym, since the same sym+offset will appear in the high part + of the address calculation. */ + +bool +mem_operand_gpr (rtx op, machine_mode mode) +{ + unsigned HOST_WIDE_INT offset; + int extra; + rtx addr = XEXP (op, 0); + + op = address_offset (addr); + if (op == NULL_RTX) + return true; + + offset = INTVAL (op); + if (TARGET_POWERPC64 && (offset & 3) != 0) + return false; + + extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; + if (extra < 0) + extra = 0; + + if (GET_CODE (addr) == LO_SUM) + /* For lo_sum addresses, we must allow any offset except one that + causes a wrap, so test only the low 16 bits. */ + offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; + + return offset + 0x8000 < 0x10000u - extra; +} + +/* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr, + enforce an offset divisible by 4 even for 32-bit. */ + +bool +mem_operand_ds_form (rtx op, machine_mode mode) +{ + unsigned HOST_WIDE_INT offset; + int extra; + rtx addr = XEXP (op, 0); + + if (!offsettable_address_p (false, mode, addr)) + return false; + + op = address_offset (addr); + if (op == NULL_RTX) + return true; + + offset = INTVAL (op); + if ((offset & 3) != 0) + return false; + + extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; + if (extra < 0) + extra = 0; + + if (GET_CODE (addr) == LO_SUM) + /* For lo_sum addresses, we must allow any offset except one that + causes a wrap, so test only the low 16 bits. */ + offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; + + return offset + 0x8000 < 0x10000u - extra; +} + +/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */ + +static bool +reg_offset_addressing_ok_p (machine_mode mode) +{ + switch (mode) + { + case V16QImode: + case V8HImode: + case V4SFmode: + case V4SImode: + case V2DFmode: + case V2DImode: + case V1TImode: + case TImode: + case TFmode: + case KFmode: + /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the + ISA 3.0 vector d-form addressing mode was added. While TImode is not + a vector mode, if we want to use the VSX registers to move it around, + we need to restrict ourselves to reg+reg addressing. Similarly for + IEEE 128-bit floating point that is passed in a single vector + register. */ + if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) + return mode_supports_vsx_dform_quad (mode); + break; + + case V4HImode: + case V2SImode: + case V1DImode: + case V2SFmode: + /* Paired vector modes. Only reg+reg addressing is valid. */ + if (TARGET_PAIRED_FLOAT) + return false; + break; + + case SDmode: + /* If we can do direct load/stores of SDmode, restrict it to reg+reg + addressing for the LFIWZX and STFIWX instructions. */ + if (TARGET_NO_SDMODE_STACK) + return false; + break; + + default: + break; + } + + return true; +} + +static bool +virtual_stack_registers_memory_p (rtx op) +{ + int regnum; + + if (GET_CODE (op) == REG) + regnum = REGNO (op); + + else if (GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == REG + && GET_CODE (XEXP (op, 1)) == CONST_INT) + regnum = REGNO (XEXP (op, 0)); + + else + return false; + + return (regnum >= FIRST_VIRTUAL_REGISTER + && regnum <= LAST_VIRTUAL_POINTER_REGISTER); +} + +/* Return true if a MODE sized memory accesses to OP plus OFFSET + is known to not straddle a 32k boundary. This function is used + to determine whether -mcmodel=medium code can use TOC pointer + relative addressing for OP. This means the alignment of the TOC + pointer must also be taken into account, and unfortunately that is + only 8 bytes. */ + +#ifndef POWERPC64_TOC_POINTER_ALIGNMENT +#define POWERPC64_TOC_POINTER_ALIGNMENT 8 +#endif + +static bool +offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset, + machine_mode mode) +{ + tree decl; + unsigned HOST_WIDE_INT dsize, dalign, lsb, mask; + + if (GET_CODE (op) != SYMBOL_REF) + return false; + + /* ISA 3.0 vector d-form addressing is restricted, don't allow + SYMBOL_REF. */ + if (mode_supports_vsx_dform_quad (mode)) + return false; + + dsize = GET_MODE_SIZE (mode); + decl = SYMBOL_REF_DECL (op); + if (!decl) + { + if (dsize == 0) + return false; + + /* -fsection-anchors loses the original SYMBOL_REF_DECL when + replacing memory addresses with an anchor plus offset. We + could find the decl by rummaging around in the block->objects + VEC for the given offset but that seems like too much work. */ + dalign = BITS_PER_UNIT; + if (SYMBOL_REF_HAS_BLOCK_INFO_P (op) + && SYMBOL_REF_ANCHOR_P (op) + && SYMBOL_REF_BLOCK (op) != NULL) + { + struct object_block *block = SYMBOL_REF_BLOCK (op); + + dalign = block->alignment; + offset += SYMBOL_REF_BLOCK_OFFSET (op); + } + else if (CONSTANT_POOL_ADDRESS_P (op)) + { + /* It would be nice to have get_pool_align().. */ + machine_mode cmode = get_pool_mode (op); + + dalign = GET_MODE_ALIGNMENT (cmode); + } + } + else if (DECL_P (decl)) + { + dalign = DECL_ALIGN (decl); + + if (dsize == 0) + { + /* Allow BLKmode when the entire object is known to not + cross a 32k boundary. */ + if (!DECL_SIZE_UNIT (decl)) + return false; + + if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl))) + return false; + + dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl)); + if (dsize > 32768) + return false; + + dalign /= BITS_PER_UNIT; + if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) + dalign = POWERPC64_TOC_POINTER_ALIGNMENT; + return dalign >= dsize; + } + } + else + gcc_unreachable (); + + /* Find how many bits of the alignment we know for this access. */ + dalign /= BITS_PER_UNIT; + if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) + dalign = POWERPC64_TOC_POINTER_ALIGNMENT; + mask = dalign - 1; + lsb = offset & -offset; + mask &= lsb - 1; + dalign = mask + 1; + + return dalign >= dsize; +} + +static bool +constant_pool_expr_p (rtx op) +{ + rtx base, offset; + + split_const (op, &base, &offset); + return (GET_CODE (base) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (base) + && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode)); +} + +static const_rtx tocrel_base, tocrel_offset; + +/* Return true if OP is a toc pointer relative address (the output + of create_TOC_reference). If STRICT, do not match non-split + -mcmodel=large/medium toc pointer relative addresses. */ + +bool +toc_relative_expr_p (const_rtx op, bool strict) +{ + if (!TARGET_TOC) + return false; + + if (TARGET_CMODEL != CMODEL_SMALL) + { + /* When strict ensure we have everything tidy. */ + if (strict + && !(GET_CODE (op) == LO_SUM + && REG_P (XEXP (op, 0)) + && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))) + return false; + + /* When not strict, allow non-split TOC addresses and also allow + (lo_sum (high ..)) TOC addresses created during reload. */ + if (GET_CODE (op) == LO_SUM) + op = XEXP (op, 1); + } + + tocrel_base = op; + tocrel_offset = const0_rtx; + if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op))) + { + tocrel_base = XEXP (op, 0); + tocrel_offset = XEXP (op, 1); + } + + return (GET_CODE (tocrel_base) == UNSPEC + && XINT (tocrel_base, 1) == UNSPEC_TOCREL); +} + +/* Return true if X is a constant pool address, and also for cmodel=medium + if X is a toc-relative address known to be offsettable within MODE. */ + +bool +legitimate_constant_pool_address_p (const_rtx x, machine_mode mode, + bool strict) +{ + return (toc_relative_expr_p (x, strict) + && (TARGET_CMODEL != CMODEL_MEDIUM + || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0)) + || mode == QImode + || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0), + INTVAL (tocrel_offset), mode))); +} + +static bool +legitimate_small_data_p (machine_mode mode, rtx x) +{ + return (DEFAULT_ABI == ABI_V4 + && !flag_pic && !TARGET_TOC + && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST) + && small_data_operand (x, mode)); +} + +/* SPE offset addressing is limited to 5-bits worth of double words. */ +#define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0) + +bool +rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, + bool strict, bool worst_case) +{ + unsigned HOST_WIDE_INT offset; + unsigned int extra; + + if (GET_CODE (x) != PLUS) + return false; + if (!REG_P (XEXP (x, 0))) + return false; + if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) + return false; + if (mode_supports_vsx_dform_quad (mode)) + return quad_address_p (x, mode, strict); + if (!reg_offset_addressing_ok_p (mode)) + return virtual_stack_registers_memory_p (x); + if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress)) + return true; + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return false; + + offset = INTVAL (XEXP (x, 1)); + extra = 0; + switch (mode) + { + case V4HImode: + case V2SImode: + case V1DImode: + case V2SFmode: + /* SPE vector modes. */ + return SPE_CONST_OFFSET_OK (offset); + + case DFmode: + case DDmode: + case DImode: + /* On e500v2, we may have: + + (subreg:DF (mem:DI (plus (reg) (const_int))) 0). + + Which gets addressed with evldd instructions. */ + if (TARGET_E500_DOUBLE) + return SPE_CONST_OFFSET_OK (offset); + + /* If we are using VSX scalar loads, restrict ourselves to reg+reg + addressing. */ + if (VECTOR_MEM_VSX_P (mode)) + return false; + + if (!worst_case) + break; + if (!TARGET_POWERPC64) + extra = 4; + else if (offset & 3) + return false; + break; + + case TFmode: + case IFmode: + case KFmode: + case TDmode: + case TImode: + case PTImode: + if (TARGET_E500_DOUBLE) + return (SPE_CONST_OFFSET_OK (offset) + && SPE_CONST_OFFSET_OK (offset + 8)); + + extra = 8; + if (!worst_case) + break; + if (!TARGET_POWERPC64) + extra = 12; + else if (offset & 3) + return false; + break; + + default: + break; + } + + offset += 0x8000; + return offset < 0x10000 - extra; +} + +bool +legitimate_indexed_address_p (rtx x, int strict) +{ + rtx op0, op1; + + if (GET_CODE (x) != PLUS) + return false; + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + /* Recognize the rtl generated by reload which we know will later be + replaced with proper base and index regs. */ + if (!strict + && reload_in_progress + && (REG_P (op0) || GET_CODE (op0) == PLUS) + && REG_P (op1)) + return true; + + return (REG_P (op0) && REG_P (op1) + && ((INT_REG_OK_FOR_BASE_P (op0, strict) + && INT_REG_OK_FOR_INDEX_P (op1, strict)) + || (INT_REG_OK_FOR_BASE_P (op1, strict) + && INT_REG_OK_FOR_INDEX_P (op0, strict)))); +} + +bool +avoiding_indexed_address_p (machine_mode mode) +{ + /* Avoid indexed addressing for modes that have non-indexed + load/store instruction forms. */ + return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode)); +} + +bool +legitimate_indirect_address_p (rtx x, int strict) +{ + return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict); +} + +bool +macho_lo_sum_memory_operand (rtx x, machine_mode mode) +{ + if (!TARGET_MACHO || !flag_pic + || mode != SImode || GET_CODE (x) != MEM) + return false; + x = XEXP (x, 0); + + if (GET_CODE (x) != LO_SUM) + return false; + if (GET_CODE (XEXP (x, 0)) != REG) + return false; + if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0)) + return false; + x = XEXP (x, 1); + + return CONSTANT_P (x); +} + +static bool +legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) +{ + if (GET_CODE (x) != LO_SUM) + return false; + if (GET_CODE (XEXP (x, 0)) != REG) + return false; + if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) + return false; + /* quad word addresses are restricted, and we can't use LO_SUM. */ + if (mode_supports_vsx_dform_quad (mode)) + return false; + /* Restrict addressing for DI because of our SUBREG hackery. */ + if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return false; + x = XEXP (x, 1); + + if (TARGET_ELF || TARGET_MACHO) + { + bool large_toc_ok; + + if (DEFAULT_ABI == ABI_V4 && flag_pic) + return false; + /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls + push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS + recognizes some LO_SUM addresses as valid although this + function says opposite. In most cases, LRA through different + transformations can generate correct code for address reloads. + It can not manage only some LO_SUM cases. So we need to add + code analogous to one in rs6000_legitimize_reload_address for + LOW_SUM here saying that some addresses are still valid. */ + large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL + && small_toc_ref (x, VOIDmode)); + if (TARGET_TOC && ! large_toc_ok) + return false; + if (GET_MODE_NUNITS (mode) != 1) + return false; + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD + && !(/* ??? Assume floating point reg based on mode? */ + TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && (mode == DFmode || mode == DDmode))) + return false; + + return CONSTANT_P (x) || large_toc_ok; + } + + return false; +} + + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This is used from only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was + called. In some cases it is useful to look at this to decide what + needs to be done. + + It is always safe for this function to do nothing. It exists to + recognize opportunities to optimize the output. + + On RS/6000, first check for the sum of a register with a constant + integer that is out of range. If so, generate code to add the + constant with the low-order 16 bits masked to the register and force + this result into another register (this can be done with `cau'). + Then generate an address of REG+(CONST&0xffff), allowing for the + possibility of bit 16 being a one. + + Then check for the sum of a register and something not constant, try to + load the other things into a register and return the sum. */ + +static rtx +rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + machine_mode mode) +{ + unsigned int extra; + + if (!reg_offset_addressing_ok_p (mode) + || mode_supports_vsx_dform_quad (mode)) + { + if (virtual_stack_registers_memory_p (x)) + return x; + + /* In theory we should not be seeing addresses of the form reg+0, + but just in case it is generated, optimize it away. */ + if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) + return force_reg (Pmode, XEXP (x, 0)); + + /* For TImode with load/store quad, restrict addresses to just a single + pointer, so it works with both GPRs and VSX registers. */ + /* Make sure both operands are registers. */ + else if (GET_CODE (x) == PLUS + && (mode != TImode || !TARGET_VSX_TIMODE)) + return gen_rtx_PLUS (Pmode, + force_reg (Pmode, XEXP (x, 0)), + force_reg (Pmode, XEXP (x, 1))); + else + return force_reg (Pmode, x); + } + if (GET_CODE (x) == SYMBOL_REF) + { + enum tls_model model = SYMBOL_REF_TLS_MODEL (x); + if (model != 0) + return rs6000_legitimize_tls_address (x, model); + } + + extra = 0; + switch (mode) + { + case TFmode: + case TDmode: + case TImode: + case PTImode: + case IFmode: + case KFmode: + /* As in legitimate_offset_address_p we do not assume + worst-case. The mode here is just a hint as to the registers + used. A TImode is usually in gprs, but may actually be in + fprs. Leave worst-case scenario for reload to handle via + insn constraints. PTImode is only GPRs. */ + extra = 8; + break; + default: + break; + } + + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == CONST_INT + && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) + >= 0x10000 - extra) + && !(SPE_VECTOR_MODE (mode) + || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))) + { + HOST_WIDE_INT high_int, low_int; + rtx sum; + low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000; + if (low_int >= 0x8000 - extra) + low_int = 0; + high_int = INTVAL (XEXP (x, 1)) - low_int; + sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0), + GEN_INT (high_int)), 0); + return plus_constant (Pmode, sum, low_int); + } + else if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) != CONST_INT + && GET_MODE_NUNITS (mode) == 1 + && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD + || (/* ??? Assume floating point reg based on mode? */ + (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) + && (mode == DFmode || mode == DDmode))) + && !avoiding_indexed_address_p (mode)) + { + return gen_rtx_PLUS (Pmode, XEXP (x, 0), + force_reg (Pmode, force_operand (XEXP (x, 1), 0))); + } + else if (SPE_VECTOR_MODE (mode) + || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)) + { + if (mode == DImode) + return x; + /* We accept [reg + reg] and [reg + OFFSET]. */ + + if (GET_CODE (x) == PLUS) + { + rtx op1 = XEXP (x, 0); + rtx op2 = XEXP (x, 1); + rtx y; + + op1 = force_reg (Pmode, op1); + + if (GET_CODE (op2) != REG + && (GET_CODE (op2) != CONST_INT + || !SPE_CONST_OFFSET_OK (INTVAL (op2)) + || (GET_MODE_SIZE (mode) > 8 + && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8)))) + op2 = force_reg (Pmode, op2); + + /* We can't always do [reg + reg] for these, because [reg + + reg + offset] is not a legitimate addressing mode. */ + y = gen_rtx_PLUS (Pmode, op1, op2); + + if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2)) + return force_reg (Pmode, y); + else + return y; + } + + return force_reg (Pmode, x); + } + else if ((TARGET_ELF +#if TARGET_MACHO + || !MACHO_DYNAMIC_NO_PIC_P +#endif + ) + && TARGET_32BIT + && TARGET_NO_TOC + && ! flag_pic + && GET_CODE (x) != CONST_INT + && GET_CODE (x) != CONST_WIDE_INT + && GET_CODE (x) != CONST_DOUBLE + && CONSTANT_P (x) + && GET_MODE_NUNITS (mode) == 1 + && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD + || (/* ??? Assume floating point reg based on mode? */ + (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) + && (mode == DFmode || mode == DDmode)))) + { + rtx reg = gen_reg_rtx (Pmode); + if (TARGET_ELF) + emit_insn (gen_elf_high (reg, x)); + else + emit_insn (gen_macho_high (reg, x)); + return gen_rtx_LO_SUM (Pmode, reg, x); + } + else if (TARGET_TOC + && GET_CODE (x) == SYMBOL_REF + && constant_pool_expr_p (x) + && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode)) + return create_TOC_reference (x, NULL_RTX); + else + return x; +} + +/* Debug version of rs6000_legitimize_address. */ +static rtx +rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode) +{ + rtx ret; + rtx_insn *insns; + + start_sequence (); + ret = rs6000_legitimize_address (x, oldx, mode); + insns = get_insns (); + end_sequence (); + + if (ret != x) + { + fprintf (stderr, + "\nrs6000_legitimize_address: mode %s, old code %s, " + "new code %s, modified\n", + GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)), + GET_RTX_NAME (GET_CODE (ret))); + + fprintf (stderr, "Original address:\n"); + debug_rtx (x); + + fprintf (stderr, "oldx:\n"); + debug_rtx (oldx); + + fprintf (stderr, "New address:\n"); + debug_rtx (ret); + + if (insns) + { + fprintf (stderr, "Insns added:\n"); + debug_rtx_list (insns, 20); + } + } + else + { + fprintf (stderr, + "\nrs6000_legitimize_address: mode %s, code %s, no change:\n", + GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x))); + + debug_rtx (x); + } + + if (insns) + emit_insn (insns); + + return ret; +} + +/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. + We need to emit DTP-relative relocations. */ + +static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; +static void +rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x) +{ + switch (size) + { + case 4: + fputs ("\t.long\t", file); + break; + case 8: + fputs (DOUBLE_INT_ASM_OP, file); + break; + default: + gcc_unreachable (); + } + output_addr_const (file, x); + if (TARGET_ELF) + fputs ("@dtprel+0x8000", file); + else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF) + { + switch (SYMBOL_REF_TLS_MODEL (x)) + { + case 0: + break; + case TLS_MODEL_LOCAL_EXEC: + fputs ("@le", file); + break; + case TLS_MODEL_INITIAL_EXEC: + fputs ("@ie", file); + break; + case TLS_MODEL_GLOBAL_DYNAMIC: + case TLS_MODEL_LOCAL_DYNAMIC: + fputs ("@m", file); + break; + default: + gcc_unreachable (); + } + } +} + +/* Return true if X is a symbol that refers to real (rather than emulated) + TLS. */ + +static bool +rs6000_real_tls_symbol_ref_p (rtx x) +{ + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL); +} + +/* In the name of slightly smaller debug output, and to cater to + general assembler lossage, recognize various UNSPEC sequences + and turn them back into a direct symbol reference. */ + +static rtx +rs6000_delegitimize_address (rtx orig_x) +{ + rtx x, y, offset; + + orig_x = delegitimize_mem_from_attrs (orig_x); + x = orig_x; + if (MEM_P (x)) + x = XEXP (x, 0); + + y = x; + if (TARGET_CMODEL != CMODEL_SMALL + && GET_CODE (y) == LO_SUM) + y = XEXP (y, 1); + + offset = NULL_RTX; + if (GET_CODE (y) == PLUS + && GET_MODE (y) == Pmode + && CONST_INT_P (XEXP (y, 1))) + { + offset = XEXP (y, 1); + y = XEXP (y, 0); + } + + if (GET_CODE (y) == UNSPEC + && XINT (y, 1) == UNSPEC_TOCREL) + { + y = XVECEXP (y, 0, 0); + +#ifdef HAVE_AS_TLS + /* Do not associate thread-local symbols with the original + constant pool symbol. */ + if (TARGET_XCOFF + && GET_CODE (y) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (y) + && rs6000_real_tls_symbol_ref_p (get_pool_constant (y))) + return orig_x; +#endif + + if (offset != NULL_RTX) + y = gen_rtx_PLUS (Pmode, y, offset); + if (!MEM_P (orig_x)) + return y; + else + return replace_equiv_address_nv (orig_x, y); + } + + if (TARGET_MACHO + && GET_CODE (orig_x) == LO_SUM + && GET_CODE (XEXP (orig_x, 1)) == CONST) + { + y = XEXP (XEXP (orig_x, 1), 0); + if (GET_CODE (y) == UNSPEC + && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET) + return XVECEXP (y, 0, 0); + } + + return orig_x; +} + +/* Return true if X shouldn't be emitted into the debug info. + The linker doesn't like .toc section references from + .debug_* sections, so reject .toc section symbols. */ + +static bool +rs6000_const_not_ok_for_debug_p (rtx x) +{ + if (GET_CODE (x) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x)) + { + rtx c = get_pool_constant (x); + machine_mode cmode = get_pool_mode (x); + if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode)) + return true; + } + + return false; +} + + +/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ + +static bool +rs6000_legitimate_combined_insn (rtx_insn *insn) +{ + int icode = INSN_CODE (insn); + + /* Reject creating doloop insns. Combine should not be allowed + to create these for a number of reasons: + 1) In a nested loop, if combine creates one of these in an + outer loop and the register allocator happens to allocate ctr + to the outer loop insn, then the inner loop can't use ctr. + Inner loops ought to be more highly optimized. + 2) Combine often wants to create one of these from what was + originally a three insn sequence, first combining the three + insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not + allocated ctr, the splitter takes use back to the three insn + sequence. It's better to stop combine at the two insn + sequence. + 3) Faced with not being able to allocate ctr for ctrsi/crtdi + insns, the register allocator sometimes uses floating point + or vector registers for the pseudo. Since ctrsi/ctrdi is a + jump insn and output reloads are not implemented for jumps, + the ctrsi/ctrdi splitters need to handle all possible cases. + That's a pain, and it gets to be seriously difficult when a + splitter that runs after reload needs memory to transfer from + a gpr to fpr. See PR70098 and PR71763 which are not fixed + for the difficult case. It's better to not create problems + in the first place. */ + if (icode != CODE_FOR_nothing + && (icode == CODE_FOR_ctrsi_internal1 + || icode == CODE_FOR_ctrdi_internal1 + || icode == CODE_FOR_ctrsi_internal2 + || icode == CODE_FOR_ctrdi_internal2 + || icode == CODE_FOR_ctrsi_internal3 + || icode == CODE_FOR_ctrdi_internal3 + || icode == CODE_FOR_ctrsi_internal4 + || icode == CODE_FOR_ctrdi_internal4)) + return false; + + return true; +} + +/* Construct the SYMBOL_REF for the tls_get_addr function. */ + +static GTY(()) rtx rs6000_tls_symbol; +static rtx +rs6000_tls_get_addr (void) +{ + if (!rs6000_tls_symbol) + rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr"); + + return rs6000_tls_symbol; +} + +/* Construct the SYMBOL_REF for TLS GOT references. */ + +static GTY(()) rtx rs6000_got_symbol; +static rtx +rs6000_got_sym (void) +{ + if (!rs6000_got_symbol) + { + rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL; + SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL; + } + + return rs6000_got_symbol; +} + +/* AIX Thread-Local Address support. */ + +static rtx +rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model) +{ + rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr; + const char *name; + char *tlsname; + + name = XSTR (addr, 0); + /* Append TLS CSECT qualifier, unless the symbol already is qualified + or the symbol will be in TLS private data section. */ + if (name[strlen (name) - 1] != ']' + && (TREE_PUBLIC (SYMBOL_REF_DECL (addr)) + || bss_initializer_p (SYMBOL_REF_DECL (addr)))) + { + tlsname = XALLOCAVEC (char, strlen (name) + 4); + strcpy (tlsname, name); + strcat (tlsname, + bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]"); + tlsaddr = copy_rtx (addr); + XSTR (tlsaddr, 0) = ggc_strdup (tlsname); + } + else + tlsaddr = addr; + + /* Place addr into TOC constant pool. */ + sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr); + + /* Output the TOC entry and create the MEM referencing the value. */ + if (constant_pool_expr_p (XEXP (sym, 0)) + && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode)) + { + tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX); + mem = gen_const_mem (Pmode, tocref); + set_mem_alias_set (mem, get_TOC_alias_set ()); + } + else + return sym; + + /* Use global-dynamic for local-dynamic. */ + if (model == TLS_MODEL_GLOBAL_DYNAMIC + || model == TLS_MODEL_LOCAL_DYNAMIC) + { + /* Create new TOC reference for @m symbol. */ + name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0); + tlsname = XALLOCAVEC (char, strlen (name) + 1); + strcpy (tlsname, "*LCM"); + strcat (tlsname, name + 3); + rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname)); + SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL; + tocref = create_TOC_reference (modaddr, NULL_RTX); + rtx modmem = gen_const_mem (Pmode, tocref); + set_mem_alias_set (modmem, get_TOC_alias_set ()); + + rtx modreg = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (modreg, modmem)); + + tmpreg = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (tmpreg, mem)); + + dest = gen_reg_rtx (Pmode); + if (TARGET_32BIT) + emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg)); + else + emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg)); + return dest; + } + /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */ + else if (TARGET_32BIT) + { + tlsreg = gen_reg_rtx (SImode); + emit_insn (gen_tls_get_tpointer (tlsreg)); + } + else + tlsreg = gen_rtx_REG (DImode, 13); + + /* Load the TOC value into temporary register. */ + tmpreg = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (tmpreg, mem)); + set_unique_reg_note (get_last_insn (), REG_EQUAL, + gen_rtx_MINUS (Pmode, addr, tlsreg)); + + /* Add TOC symbol value to TLS pointer. */ + dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg)); + + return dest; +} + +/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute + this (thread-local) address. */ + +static rtx +rs6000_legitimize_tls_address (rtx addr, enum tls_model model) +{ + rtx dest, insn; + + if (TARGET_XCOFF) + return rs6000_legitimize_tls_address_aix (addr, model); + + dest = gen_reg_rtx (Pmode); + if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16) + { + rtx tlsreg; + + if (TARGET_64BIT) + { + tlsreg = gen_rtx_REG (Pmode, 13); + insn = gen_tls_tprel_64 (dest, tlsreg, addr); + } + else + { + tlsreg = gen_rtx_REG (Pmode, 2); + insn = gen_tls_tprel_32 (dest, tlsreg, addr); + } + emit_insn (insn); + } + else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32) + { + rtx tlsreg, tmp; + + tmp = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + { + tlsreg = gen_rtx_REG (Pmode, 13); + insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr); + } + else + { + tlsreg = gen_rtx_REG (Pmode, 2); + insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr); + } + emit_insn (insn); + if (TARGET_64BIT) + insn = gen_tls_tprel_lo_64 (dest, tmp, addr); + else + insn = gen_tls_tprel_lo_32 (dest, tmp, addr); + emit_insn (insn); + } + else + { + rtx r3, got, tga, tmp1, tmp2, call_insn; + + /* We currently use relocations like @got@tlsgd for tls, which + means the linker will handle allocation of tls entries, placing + them in the .got section. So use a pointer to the .got section, + not one to secondary TOC sections used by 64-bit -mminimal-toc, + or to secondary GOT sections used by 32-bit -fPIC. */ + if (TARGET_64BIT) + got = gen_rtx_REG (Pmode, 2); + else + { + if (flag_pic == 1) + got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); + else + { + rtx gsym = rs6000_got_sym (); + got = gen_reg_rtx (Pmode); + if (flag_pic == 0) + rs6000_emit_move (got, gsym, Pmode); + else + { + rtx mem, lab; + + tmp1 = gen_reg_rtx (Pmode); + tmp2 = gen_reg_rtx (Pmode); + mem = gen_const_mem (Pmode, tmp1); + lab = gen_label_rtx (); + emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab)); + emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); + if (TARGET_LINK_STACK) + emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4))); + emit_move_insn (tmp2, mem); + rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2)); + set_unique_reg_note (last, REG_EQUAL, gsym); + } + } + } + + if (model == TLS_MODEL_GLOBAL_DYNAMIC) + { + tga = rs6000_tls_get_addr (); + emit_library_call_value (tga, dest, LCT_CONST, Pmode, + 1, const0_rtx, Pmode); + + r3 = gen_rtx_REG (Pmode, 3); + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { + if (TARGET_64BIT) + insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx); + else + insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx); + } + else if (DEFAULT_ABI == ABI_V4) + insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx); + else + gcc_unreachable (); + call_insn = last_call_insn (); + PATTERN (call_insn) = insn; + if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), + pic_offset_table_rtx); + } + else if (model == TLS_MODEL_LOCAL_DYNAMIC) + { + tga = rs6000_tls_get_addr (); + tmp1 = gen_reg_rtx (Pmode); + emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, + 1, const0_rtx, Pmode); + + r3 = gen_rtx_REG (Pmode, 3); + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { + if (TARGET_64BIT) + insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx); + else + insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx); + } + else if (DEFAULT_ABI == ABI_V4) + insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx); + else + gcc_unreachable (); + call_insn = last_call_insn (); + PATTERN (call_insn) = insn; + if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), + pic_offset_table_rtx); + + if (rs6000_tls_size == 16) + { + if (TARGET_64BIT) + insn = gen_tls_dtprel_64 (dest, tmp1, addr); + else + insn = gen_tls_dtprel_32 (dest, tmp1, addr); + } + else if (rs6000_tls_size == 32) + { + tmp2 = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr); + else + insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr); + emit_insn (insn); + if (TARGET_64BIT) + insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr); + else + insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr); + } + else + { + tmp2 = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + insn = gen_tls_got_dtprel_64 (tmp2, got, addr); + else + insn = gen_tls_got_dtprel_32 (tmp2, got, addr); + emit_insn (insn); + insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1)); + } + emit_insn (insn); + } + else + { + /* IE, or 64-bit offset LE. */ + tmp2 = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + insn = gen_tls_got_tprel_64 (tmp2, got, addr); + else + insn = gen_tls_got_tprel_32 (tmp2, got, addr); + emit_insn (insn); + if (TARGET_64BIT) + insn = gen_tls_tls_64 (dest, tmp2, addr); + else + insn = gen_tls_tls_32 (dest, tmp2, addr); + emit_insn (insn); + } + } + + return dest; +} + +/* Only create the global variable for the stack protect guard if we are using + the global flavor of that guard. */ +static tree +rs6000_init_stack_protect_guard (void) +{ + if (rs6000_stack_protector_guard == SSP_GLOBAL) + return default_stack_protect_guard (); + + return NULL_TREE; +} + +/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + +static bool +rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + if (GET_CODE (x) == HIGH + && GET_CODE (XEXP (x, 0)) == UNSPEC) + return true; + + /* A TLS symbol in the TOC cannot contain a sum. */ + if (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0) + return true; + + /* Do not place an ELF TLS symbol in the constant pool. */ + return TARGET_ELF && tls_referenced_p (x); +} + +/* Return true iff the given SYMBOL_REF refers to a constant pool entry + that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF + can be addressed relative to the toc pointer. */ + +static bool +use_toc_relative_ref (rtx sym, machine_mode mode) +{ + return ((constant_pool_expr_p (sym) + && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym), + get_pool_mode (sym))) + || (TARGET_CMODEL == CMODEL_MEDIUM + && SYMBOL_REF_LOCAL_P (sym) + && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT)); +} + +/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to + replace the input X, or the original X if no replacement is called for. + The output parameter *WIN is 1 if the calling macro should goto WIN, + 0 if it should not. + + For RS/6000, we wish to handle large displacements off a base + register by splitting the addend across an addiu/addis and the mem insn. + This cuts number of extra insns needed from 3 to 1. + + On Darwin, we use this to generate code for floating point constants. + A movsf_low is generated so we wind up with 2 instructions rather than 3. + The Darwin code is inside #if TARGET_MACHO because only then are the + machopic_* functions defined. */ +static rtx +rs6000_legitimize_reload_address (rtx x, machine_mode mode, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED, int *win) +{ + bool reg_offset_p = reg_offset_addressing_ok_p (mode); + bool quad_offset_p = mode_supports_vsx_dform_quad (mode); + + /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a + DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */ + if (reg_offset_p + && opnum == 1 + && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode) + || (mode == DImode && recog_data.operand_mode[0] == V2DImode) + || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode + && TARGET_P9_VECTOR) + || (mode == SImode && recog_data.operand_mode[0] == V4SImode + && TARGET_P9_VECTOR))) + reg_offset_p = false; + + /* We must recognize output that we have already generated ourselves. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && GET_CODE (XEXP (x, 1)) == CONST_INT) + { + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n"); + debug_rtx (x); + } + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + *win = 1; + return x; + } + + /* Likewise for (lo_sum (high ...) ...) output we have generated. */ + if (GET_CODE (x) == LO_SUM + && GET_CODE (XEXP (x, 0)) == HIGH) + { + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n"); + debug_rtx (x); + } + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, + opnum, (enum reload_type) type); + *win = 1; + return x; + } + +#if TARGET_MACHO + if (DEFAULT_ABI == ABI_DARWIN && flag_pic + && GET_CODE (x) == LO_SUM + && GET_CODE (XEXP (x, 0)) == PLUS + && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx + && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH + && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1) + && machopic_operand_p (XEXP (x, 1))) + { + /* Result of previous invocation of this function on Darwin + floating point constant. */ + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, + opnum, (enum reload_type) type); + *win = 1; + return x; + } +#endif + + if (TARGET_CMODEL != CMODEL_SMALL + && reg_offset_p + && !quad_offset_p + && small_toc_ref (x, VOIDmode)) + { + rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x)); + x = gen_rtx_LO_SUM (Pmode, hi, x); + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n"); + debug_rtx (x); + } + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, + opnum, (enum reload_type) type); + *win = 1; + return x; + } + + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER + && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1) + && CONST_INT_P (XEXP (x, 1)) + && reg_offset_p + && !SPE_VECTOR_MODE (mode) + && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) + && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))) + { + HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); + HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT high + = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; + + /* Check for 32-bit overflow or quad addresses with one of the + four least significant bits set. */ + if (high + low != val + || (quad_offset_p && (low & 0xf))) + { + *win = 0; + return x; + } + + /* Reload the high part into a base reg; leave the low part + in the mem directly. */ + + x = gen_rtx_PLUS (GET_MODE (x), + gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), + GEN_INT (high)), + GEN_INT (low)); + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n"); + debug_rtx (x); + } + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + *win = 1; + return x; + } + + if (GET_CODE (x) == SYMBOL_REF + && reg_offset_p + && !quad_offset_p + && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)) + && !SPE_VECTOR_MODE (mode) +#if TARGET_MACHO + && DEFAULT_ABI == ABI_DARWIN + && (flag_pic || MACHO_DYNAMIC_NO_PIC_P) + && machopic_symbol_defined_p (x) +#else + && DEFAULT_ABI == ABI_V4 + && !flag_pic +#endif + /* Don't do this for TFmode or TDmode, since the result isn't offsettable. + The same goes for DImode without 64-bit gprs and DFmode and DDmode + without fprs. + ??? Assume floating point reg based on mode? This assumption is + violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c + where reload ends up doing a DFmode load of a constant from + mem using two gprs. Unfortunately, at this point reload + hasn't yet selected regs so poking around in reload data + won't help and even if we could figure out the regs reliably, + we'd still want to allow this transformation when the mem is + naturally aligned. Since we say the address is good here, we + can't disable offsets from LO_SUMs in mem_operand_gpr. + FIXME: Allow offset from lo_sum for other modes too, when + mem is sufficiently aligned. + + Also disallow this if the type can go in VMX/Altivec registers, since + those registers do not have d-form (reg+offset) address modes. */ + && !reg_addr[mode].scalar_in_vmx_p + && mode != TFmode + && mode != TDmode + && mode != IFmode + && mode != KFmode + && (mode != TImode || !TARGET_VSX_TIMODE) + && mode != PTImode + && (mode != DImode || TARGET_POWERPC64) + && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64 + || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT))) + { +#if TARGET_MACHO + if (flag_pic) + { + rtx offset = machopic_gen_offset (x); + x = gen_rtx_LO_SUM (GET_MODE (x), + gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + gen_rtx_HIGH (Pmode, offset)), offset); + } + else +#endif + x = gen_rtx_LO_SUM (GET_MODE (x), + gen_rtx_HIGH (Pmode, x), x); + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n"); + debug_rtx (x); + } + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, + opnum, (enum reload_type) type); + *win = 1; + return x; + } + + /* Reload an offset address wrapped by an AND that represents the + masking of the lower bits. Strip the outer AND and let reload + convert the offset address into an indirect address. For VSX, + force reload to create the address with an AND in a separate + register, because we can't guarantee an altivec register will + be used. */ + if (VECTOR_MEM_ALTIVEC_P (mode) + && GET_CODE (x) == AND + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) == -16) + { + x = XEXP (x, 0); + *win = 1; + return x; + } + + if (TARGET_TOC + && reg_offset_p + && !quad_offset_p + && GET_CODE (x) == SYMBOL_REF + && use_toc_relative_ref (x, mode)) + { + x = create_TOC_reference (x, NULL_RTX); + if (TARGET_CMODEL != CMODEL_SMALL) + { + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n"); + debug_rtx (x); + } + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, + opnum, (enum reload_type) type); + } + *win = 1; + return x; + } + *win = 0; + return x; +} + +/* Debug version of rs6000_legitimize_reload_address. */ +static rtx +rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode, + int opnum, int type, + int ind_levels, int *win) +{ + rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type, + ind_levels, win); + fprintf (stderr, + "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, " + "type = %d, ind_levels = %d, win = %d, original addr:\n", + GET_MODE_NAME (mode), opnum, type, ind_levels, *win); + debug_rtx (x); + + if (x == ret) + fprintf (stderr, "Same address returned\n"); + else if (!ret) + fprintf (stderr, "NULL returned\n"); + else + { + fprintf (stderr, "New address:\n"); + debug_rtx (ret); + } + + return ret; +} + +/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression + that is a valid memory address for an instruction. + The MODE argument is the machine mode for the MEM expression + that wants to use this address. + + On the RS/6000, there are four valid address: a SYMBOL_REF that + refers to a constant pool entry of an address (or the sum of it + plus a constant), a short (16-bit signed) constant plus a register, + the sum of two registers, or a register indirect, possibly with an + auto-increment. For DFmode, DDmode and DImode with a constant plus + register, we must ensure that both words are addressable or PowerPC64 + with offset word aligned. + + For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs, + 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used + because adjacent memory cells are accessed by adding word-sized offsets + during assembly output. */ +static bool +rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict) +{ + bool reg_offset_p = reg_offset_addressing_ok_p (mode); + bool quad_offset_p = mode_supports_vsx_dform_quad (mode); + + /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */ + if (VECTOR_MEM_ALTIVEC_P (mode) + && GET_CODE (x) == AND + && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) == -16) + x = XEXP (x, 0); + + if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x)) + return 0; + if (legitimate_indirect_address_p (x, reg_ok_strict)) + return 1; + if (TARGET_UPDATE + && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC) + && mode_supports_pre_incdec_p (mode) + && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) + return 1; + /* Handle restricted vector d-form offsets in ISA 3.0. */ + if (quad_offset_p) + { + if (quad_address_p (x, mode, reg_ok_strict)) + return 1; + } + else if (virtual_stack_registers_memory_p (x)) + return 1; + + else if (reg_offset_p) + { + if (legitimate_small_data_p (mode, x)) + return 1; + if (legitimate_constant_pool_address_p (x, mode, + reg_ok_strict || lra_in_progress)) + return 1; + if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC + && XINT (x, 1) == UNSPEC_FUSION_ADDIS) + return 1; + } + + /* For TImode, if we have TImode in VSX registers, only allow register + indirect addresses. This will allow the values to go in either GPRs + or VSX registers without reloading. The vector types would tend to + go into VSX registers, so we allow REG+REG, while TImode seems + somewhat split, in that some uses are GPR based, and some VSX based. */ + /* FIXME: We could loosen this by changing the following to + if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE) + but currently we cannot allow REG+REG addressing for TImode. See + PR72827 for complete details on how this ends up hoodwinking DSE. */ + if (mode == TImode && TARGET_VSX_TIMODE) + return 0; + /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ + if (! reg_ok_strict + && reg_offset_p + && GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && (XEXP (x, 0) == virtual_stack_vars_rtx + || XEXP (x, 0) == arg_pointer_rtx) + && GET_CODE (XEXP (x, 1)) == CONST_INT) + return 1; + if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false)) + return 1; + if (!FLOAT128_2REG_P (mode) + && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) + || TARGET_POWERPC64 + || (mode != DFmode && mode != DDmode) + || (TARGET_E500_DOUBLE && mode != DDmode)) + && (TARGET_POWERPC64 || mode != DImode) + && (mode != TImode || VECTOR_MEM_VSX_P (TImode)) + && mode != PTImode + && !avoiding_indexed_address_p (mode) + && legitimate_indexed_address_p (x, reg_ok_strict)) + return 1; + if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY + && mode_supports_pre_modify_p (mode) + && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict) + && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), + reg_ok_strict, false) + || (!avoiding_indexed_address_p (mode) + && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict))) + && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) + return 1; + if (reg_offset_p && !quad_offset_p + && legitimate_lo_sum_address_p (mode, x, reg_ok_strict)) + return 1; + return 0; +} + +/* Debug version of rs6000_legitimate_address_p. */ +static bool +rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, + bool reg_ok_strict) +{ + bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict); + fprintf (stderr, + "\nrs6000_legitimate_address_p: return = %s, mode = %s, " + "strict = %d, reload = %s, code = %s\n", + ret ? "true" : "false", + GET_MODE_NAME (mode), + reg_ok_strict, + (reload_completed + ? "after" + : (reload_in_progress ? "progress" : "before")), + GET_RTX_NAME (GET_CODE (x))); + debug_rtx (x); + + return ret; +} + +/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */ + +static bool +rs6000_mode_dependent_address_p (const_rtx addr, + addr_space_t as ATTRIBUTE_UNUSED) +{ + return rs6000_mode_dependent_address_ptr (addr); +} + +/* Go to LABEL if ADDR (a legitimate address expression) + has an effect that depends on the machine mode it is used for. + + On the RS/6000 this is true of all integral offsets (since AltiVec + and VSX modes don't allow them) or is a pre-increment or decrement. + + ??? Except that due to conceptual problems in offsettable_address_p + we can't really report the problems of integral offsets. So leave + this assuming that the adjustable offset must be valid for the + sub-words of a TFmode operand, which is what we had before. */ + +static bool +rs6000_mode_dependent_address (const_rtx addr) +{ + switch (GET_CODE (addr)) + { + case PLUS: + /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx + is considered a legitimate address before reload, so there + are no offset restrictions in that case. Note that this + condition is safe in strict mode because any address involving + virtual_stack_vars_rtx or arg_pointer_rtx would already have + been rejected as illegitimate. */ + if (XEXP (addr, 0) != virtual_stack_vars_rtx + && XEXP (addr, 0) != arg_pointer_rtx + && GET_CODE (XEXP (addr, 1)) == CONST_INT) + { + unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1)); + return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12); + } + break; + + case LO_SUM: + /* Anything in the constant pool is sufficiently aligned that + all bytes have the same high part address. */ + return !legitimate_constant_pool_address_p (addr, QImode, false); + + /* Auto-increment cases are now treated generically in recog.c. */ + case PRE_MODIFY: + return TARGET_UPDATE; + + /* AND is only allowed in Altivec loads. */ + case AND: + return true; + + default: + break; + } + + return false; +} + +/* Debug version of rs6000_mode_dependent_address. */ +static bool +rs6000_debug_mode_dependent_address (const_rtx addr) +{ + bool ret = rs6000_mode_dependent_address (addr); + + fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n", + ret ? "true" : "false"); + debug_rtx (addr); + + return ret; +} + +/* Implement FIND_BASE_TERM. */ + +rtx +rs6000_find_base_term (rtx op) +{ + rtx base; + + base = op; + if (GET_CODE (base) == CONST) + base = XEXP (base, 0); + if (GET_CODE (base) == PLUS) + base = XEXP (base, 0); + if (GET_CODE (base) == UNSPEC) + switch (XINT (base, 1)) + { + case UNSPEC_TOCREL: + case UNSPEC_MACHOPIC_OFFSET: + /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term + for aliasing purposes. */ + return XVECEXP (base, 0, 0); + } + + return op; +} + +/* More elaborate version of recog's offsettable_memref_p predicate + that works around the ??? note of rs6000_mode_dependent_address. + In particular it accepts + + (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8]))) + + in 32-bit mode, that the recog predicate rejects. */ + +static bool +rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode) +{ + bool worst_case; + + if (!MEM_P (op)) + return false; + + /* First mimic offsettable_memref_p. */ + if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0))) + return true; + + /* offsettable_address_p invokes rs6000_mode_dependent_address, but + the latter predicate knows nothing about the mode of the memory + reference and, therefore, assumes that it is the largest supported + mode (TFmode). As a consequence, legitimate offsettable memory + references are rejected. rs6000_legitimate_offset_address_p contains + the correct logic for the PLUS case of rs6000_mode_dependent_address, + at least with a little bit of help here given that we know the + actual registers used. */ + worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT) + || GET_MODE_SIZE (reg_mode) == 4); + return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0), + true, worst_case); +} + +/* Determine the reassociation width to be used in reassociate_bb. + This takes into account how many parallel operations we + can actually do of a given type, and also the latency. + P8: + int add/sub 6/cycle + mul 2/cycle + vect add/sub/mul 2/cycle + fp add/sub/mul 2/cycle + dfp 1/cycle +*/ + +static int +rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + switch (rs6000_cpu) + { + case PROCESSOR_POWER8: + case PROCESSOR_POWER9: + if (DECIMAL_FLOAT_MODE_P (mode)) + return 1; + if (VECTOR_MODE_P (mode)) + return 4; + if (INTEGRAL_MODE_P (mode)) + return opc == MULT_EXPR ? 4 : 6; + if (FLOAT_MODE_P (mode)) + return 4; + break; + default: + break; + } + return 1; +} + +/* Change register usage conditional on target flags. */ +static void +rs6000_conditional_register_usage (void) +{ + int i; + + if (TARGET_DEBUG_TARGET) + fprintf (stderr, "rs6000_conditional_register_usage called\n"); + + /* Set MQ register fixed (already call_used) so that it will not be + allocated. */ + fixed_regs[64] = 1; + + /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */ + if (TARGET_64BIT) + fixed_regs[13] = call_used_regs[13] + = call_really_used_regs[13] = 1; + + /* Conditionally disable FPRs. */ + if (TARGET_SOFT_FLOAT || !TARGET_FPRS) + for (i = 32; i < 64; i++) + fixed_regs[i] = call_used_regs[i] + = call_really_used_regs[i] = 1; + + /* The TOC register is not killed across calls in a way that is + visible to the compiler. */ + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + call_really_used_regs[2] = 0; + + if (DEFAULT_ABI == ABI_V4 && flag_pic == 2) + fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; + + if (DEFAULT_ABI == ABI_V4 && flag_pic == 1) + fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] + = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] + = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; + + if (DEFAULT_ABI == ABI_DARWIN && flag_pic) + fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] + = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] + = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; + + if (TARGET_TOC && TARGET_MINIMAL_TOC) + fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] + = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; + + if (TARGET_SPE) + { + global_regs[SPEFSCR_REGNO] = 1; + /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit + registers in prologues and epilogues. We no longer use r14 + for FIXED_SCRATCH, but we're keeping r14 out of the allocation + pool for link-compatibility with older versions of GCC. Once + "old" code has died out, we can return r14 to the allocation + pool. */ + fixed_regs[14] + = call_used_regs[14] + = call_really_used_regs[14] = 1; + } + + if (!TARGET_ALTIVEC && !TARGET_VSX) + { + for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) + fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; + call_really_used_regs[VRSAVE_REGNO] = 1; + } + + if (TARGET_ALTIVEC || TARGET_VSX) + global_regs[VSCR_REGNO] = 1; + + if (TARGET_ALTIVEC_ABI) + { + for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i) + call_used_regs[i] = call_really_used_regs[i] = 1; + + /* AIX reserves VR20:31 in non-extended ABI mode. */ + if (TARGET_XCOFF) + for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i) + fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; + } +} + + +/* Output insns to set DEST equal to the constant SOURCE as a series of + lis, ori and shl instructions and return TRUE. */ + +bool +rs6000_emit_set_const (rtx dest, rtx source) +{ + machine_mode mode = GET_MODE (dest); + rtx temp, set; + rtx_insn *insn; + HOST_WIDE_INT c; + + gcc_checking_assert (CONST_INT_P (source)); + c = INTVAL (source); + switch (mode) + { + case QImode: + case HImode: + emit_insn (gen_rtx_SET (dest, source)); + return true; + + case SImode: + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (copy_rtx (temp), + GEN_INT (c & ~(HOST_WIDE_INT) 0xffff))); + emit_insn (gen_rtx_SET (dest, + gen_rtx_IOR (SImode, copy_rtx (temp), + GEN_INT (c & 0xffff)))); + break; + + case DImode: + if (!TARGET_POWERPC64) + { + rtx hi, lo; + + hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0, + DImode); + lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, + DImode); + emit_move_insn (hi, GEN_INT (c >> 32)); + c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000; + emit_move_insn (lo, GEN_INT (c)); + } + else + rs6000_emit_set_long_const (dest, c); + break; + + default: + gcc_unreachable (); + } + + insn = get_last_insn (); + set = single_set (insn); + if (! CONSTANT_P (SET_SRC (set))) + set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c)); + + return true; +} + +/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. + Output insns to set DEST equal to the constant C as a series of + lis, ori and shl instructions. */ + +static void +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) +{ + rtx temp; + HOST_WIDE_INT ud1, ud2, ud3, ud4; + + ud1 = c & 0xffff; + c = c >> 16; + ud2 = c & 0xffff; + c = c >> 16; + ud3 = c & 0xffff; + c = c >> 16; + ud4 = c & 0xffff; + + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) + || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) + emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000)); + + else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) + || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + + emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); + if (ud1 != 0) + emit_move_insn (dest, + gen_rtx_IOR (DImode, copy_rtx (temp), + GEN_INT (ud1))); + } + else if (ud3 == 0 && ud4 == 0) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + + gcc_assert (ud2 & 0x8000); + emit_move_insn (copy_rtx (temp), + GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); + if (ud1 != 0) + emit_move_insn (copy_rtx (temp), + gen_rtx_IOR (DImode, copy_rtx (temp), + GEN_INT (ud1))); + emit_move_insn (dest, + gen_rtx_ZERO_EXTEND (DImode, + gen_lowpart (SImode, + copy_rtx (temp)))); + } + else if ((ud4 == 0xffff && (ud3 & 0x8000)) + || (ud4 == 0 && ! (ud3 & 0x8000))) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + + emit_move_insn (copy_rtx (temp), + GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000)); + if (ud2 != 0) + emit_move_insn (copy_rtx (temp), + gen_rtx_IOR (DImode, copy_rtx (temp), + GEN_INT (ud2))); + emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, + gen_rtx_ASHIFT (DImode, copy_rtx (temp), + GEN_INT (16))); + if (ud1 != 0) + emit_move_insn (dest, + gen_rtx_IOR (DImode, copy_rtx (temp), + GEN_INT (ud1))); + } + else + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + + emit_move_insn (copy_rtx (temp), + GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000)); + if (ud3 != 0) + emit_move_insn (copy_rtx (temp), + gen_rtx_IOR (DImode, copy_rtx (temp), + GEN_INT (ud3))); + + emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest, + gen_rtx_ASHIFT (DImode, copy_rtx (temp), + GEN_INT (32))); + if (ud2 != 0) + emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, + gen_rtx_IOR (DImode, copy_rtx (temp), + GEN_INT (ud2 << 16))); + if (ud1 != 0) + emit_move_insn (dest, + gen_rtx_IOR (DImode, copy_rtx (temp), + GEN_INT (ud1))); + } +} + +/* Helper for the following. Get rid of [r+r] memory refs + in cases where it won't work (TImode, TFmode, TDmode, PTImode). */ + +static void +rs6000_eliminate_indexed_memrefs (rtx operands[2]) +{ + if (reload_in_progress) + return; + + if (GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) != REG + && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0), + GET_MODE (operands[0]), false)) + operands[0] + = replace_equiv_address (operands[0], + copy_addr_to_reg (XEXP (operands[0], 0))); + + if (GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) != REG + && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0), + GET_MODE (operands[1]), false)) + operands[1] + = replace_equiv_address (operands[1], + copy_addr_to_reg (XEXP (operands[1], 0))); +} + +/* Generate a vector of constants to permute MODE for a little-endian + storage operation by swapping the two halves of a vector. */ +static rtvec +rs6000_const_vec (machine_mode mode) +{ + int i, subparts; + rtvec v; + + switch (mode) + { + case V1TImode: + subparts = 1; + break; + case V2DFmode: + case V2DImode: + subparts = 2; + break; + case V4SFmode: + case V4SImode: + subparts = 4; + break; + case V8HImode: + subparts = 8; + break; + case V16QImode: + subparts = 16; + break; + default: + gcc_unreachable(); + } + + v = rtvec_alloc (subparts); + + for (i = 0; i < subparts / 2; ++i) + RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2); + for (i = subparts / 2; i < subparts; ++i) + RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2); + + return v; +} + +/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi + for a VSX load or store operation. */ +rtx +rs6000_gen_le_vsx_permute (rtx source, machine_mode mode) +{ + /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and + 128-bit integers if they are allowed in VSX registers. */ + if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode) + return gen_rtx_ROTATE (mode, source, GEN_INT (64)); + else + { + rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); + return gen_rtx_VEC_SELECT (mode, source, par); + } +} + +/* Emit a little-endian load from vector memory location SOURCE to VSX + register DEST in mode MODE. The load is done with two permuting + insn's that represent an lxvd2x and xxpermdi. */ +void +rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) +{ + rtx tmp, permute_mem, permute_reg; + + /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, + V1TImode). */ + if (mode == TImode || mode == V1TImode) + { + mode = V2DImode; + dest = gen_lowpart (V2DImode, dest); + source = adjust_address (source, V2DImode, 0); + } + + tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; + permute_mem = rs6000_gen_le_vsx_permute (source, mode); + permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); + emit_insn (gen_rtx_SET (tmp, permute_mem)); + emit_insn (gen_rtx_SET (dest, permute_reg)); +} + +/* Emit a little-endian store to vector memory location DEST from VSX + register SOURCE in mode MODE. The store is done with two permuting + insn's that represent an xxpermdi and an stxvd2x. */ +void +rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) +{ + rtx tmp, permute_src, permute_tmp; + + /* This should never be called during or after reload, because it does + not re-permute the source register. It is intended only for use + during expand. */ + gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); + + /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, + V1TImode). */ + if (mode == TImode || mode == V1TImode) + { + mode = V2DImode; + dest = adjust_address (dest, V2DImode, 0); + source = gen_lowpart (V2DImode, source); + } + + tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; + permute_src = rs6000_gen_le_vsx_permute (source, mode); + permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); + emit_insn (gen_rtx_SET (tmp, permute_src)); + emit_insn (gen_rtx_SET (dest, permute_tmp)); +} + +/* Emit a sequence representing a little-endian VSX load or store, + moving data from SOURCE to DEST in mode MODE. This is done + separately from rs6000_emit_move to ensure it is called only + during expand. LE VSX loads and stores introduced later are + handled with a split. The expand-time RTL generation allows + us to optimize away redundant pairs of register-permutes. */ +void +rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode) +{ + gcc_assert (!BYTES_BIG_ENDIAN + && VECTOR_MEM_VSX_P (mode) + && !TARGET_P9_VECTOR + && !gpr_or_gpr_p (dest, source) + && (MEM_P (source) ^ MEM_P (dest))); + + if (MEM_P (source)) + { + gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG); + rs6000_emit_le_vsx_load (dest, source, mode); + } + else + { + if (!REG_P (source)) + source = force_reg (mode, source); + rs6000_emit_le_vsx_store (dest, source, mode); + } +} + +/* Return whether a SFmode or SImode move can be done without converting one + mode to another. This arrises when we have: + + (SUBREG:SF (REG:SI ...)) + (SUBREG:SI (REG:SF ...)) + + and one of the values is in a floating point/vector register, where SFmode + scalars are stored in DFmode format. */ + +bool +valid_sf_si_move (rtx dest, rtx src, machine_mode mode) +{ + if (TARGET_ALLOW_SF_SUBREG) + return true; + + if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT) + return true; + + if (!SUBREG_P (src) || !sf_subreg_operand (src, mode)) + return true; + + /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */ + if (SUBREG_P (dest)) + { + rtx dest_subreg = SUBREG_REG (dest); + rtx src_subreg = SUBREG_REG (src); + return GET_MODE (dest_subreg) == GET_MODE (src_subreg); + } + + return false; +} + + +/* Helper function to change moves with: + + (SUBREG:SF (REG:SI)) and + (SUBREG:SI (REG:SF)) + + into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode + values are stored as DFmode values in the VSX registers. We need to convert + the bits before we can use a direct move or operate on the bits in the + vector register as an integer type. + + Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */ + +static bool +rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) +{ + if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed + && !lra_in_progress + && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) + && SUBREG_P (source) && sf_subreg_operand (source, mode)) + { + rtx inner_source = SUBREG_REG (source); + machine_mode inner_mode = GET_MODE (inner_source); + + if (mode == SImode && inner_mode == SFmode) + { + emit_insn (gen_movsi_from_sf (dest, inner_source)); + return true; + } + + if (mode == SFmode && inner_mode == SImode) + { + emit_insn (gen_movsf_from_si (dest, inner_source)); + return true; + } + } + + return false; +} + +/* Emit a move from SOURCE to DEST in mode MODE. */ +void +rs6000_emit_move (rtx dest, rtx source, machine_mode mode) +{ + rtx operands[2]; + operands[0] = dest; + operands[1] = source; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, " + "reload_completed = %d, can_create_pseudos = %d.\ndest:\n", + GET_MODE_NAME (mode), + reload_in_progress, + reload_completed, + can_create_pseudo_p ()); + debug_rtx (dest); + fprintf (stderr, "source:\n"); + debug_rtx (source); + } + + /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */ + if (CONST_WIDE_INT_P (operands[1]) + && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) + { + /* This should be fixed with the introduction of CONST_WIDE_INT. */ + gcc_unreachable (); + } + + /* See if we need to special case SImode/SFmode SUBREG moves. */ + if ((mode == SImode || mode == SFmode) && SUBREG_P (source) + && rs6000_emit_move_si_sf_subreg (dest, source, mode)) + return; + + /* Check if GCC is setting up a block move that will end up using FP + registers as temporaries. We must make sure this is acceptable. */ + if (GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) == MEM + && mode == DImode + && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0])) + || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1]))) + && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32 + ? 32 : MEM_ALIGN (operands[0]))) + || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32 + ? 32 + : MEM_ALIGN (operands[1])))) + && ! MEM_VOLATILE_P (operands [0]) + && ! MEM_VOLATILE_P (operands [1])) + { + emit_move_insn (adjust_address (operands[0], SImode, 0), + adjust_address (operands[1], SImode, 0)); + emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4), + adjust_address (copy_rtx (operands[1]), SImode, 4)); + return; + } + + if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM + && !gpc_reg_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + + /* Recognize the case where operand[1] is a reference to thread-local + data and load its address to a register. */ + if (tls_referenced_p (operands[1])) + { + enum tls_model model; + rtx tmp = operands[1]; + rtx addend = NULL; + + if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) + { + addend = XEXP (XEXP (tmp, 0), 1); + tmp = XEXP (XEXP (tmp, 0), 0); + } + + gcc_assert (GET_CODE (tmp) == SYMBOL_REF); + model = SYMBOL_REF_TLS_MODEL (tmp); + gcc_assert (model != 0); + + tmp = rs6000_legitimize_tls_address (tmp, model); + if (addend) + { + tmp = gen_rtx_PLUS (mode, tmp, addend); + tmp = force_operand (tmp, operands[0]); + } + operands[1] = tmp; + } + + /* Handle the case where reload calls us with an invalid address. */ + if (reload_in_progress && mode == Pmode + && (! general_operand (operands[1], mode) + || ! nonimmediate_operand (operands[0], mode))) + goto emit_set; + + /* 128-bit constant floating-point values on Darwin should really be loaded + as two parts. However, this premature splitting is a problem when DFmode + values can go into Altivec registers. */ + if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p + && GET_CODE (operands[1]) == CONST_DOUBLE) + { + rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0), + simplify_gen_subreg (DFmode, operands[1], mode, 0), + DFmode); + rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, + GET_MODE_SIZE (DFmode)), + simplify_gen_subreg (DFmode, operands[1], mode, + GET_MODE_SIZE (DFmode)), + DFmode); + return; + } + + if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX) + cfun->machine->sdmode_stack_slot = + eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX); + + + /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD), + p1:SD) if p1 is not of floating point class and p0 is spilled as + we can have no analogous movsd_store for this. */ + if (lra_in_progress && mode == DDmode + && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[0])) == NO_REGS + && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1])) + && GET_MODE (SUBREG_REG (operands[1])) == SDmode) + { + enum reg_class cl; + int regno = REGNO (SUBREG_REG (operands[1])); + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1]; + } + if (regno >= 0 && ! FP_REGNO_P (regno)) + { + mode = SDmode; + operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]); + operands[1] = SUBREG_REG (operands[1]); + } + } + if (lra_in_progress + && mode == SDmode + && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[0])) == NO_REGS + && (REG_P (operands[1]) + || (GET_CODE (operands[1]) == SUBREG + && REG_P (SUBREG_REG (operands[1]))))) + { + int regno = REGNO (GET_CODE (operands[1]) == SUBREG + ? SUBREG_REG (operands[1]) : operands[1]); + enum reg_class cl; + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + gcc_assert (cl != NO_REGS); + regno = ira_class_hard_regs[cl][0]; + } + if (FP_REGNO_P (regno)) + { + if (GET_MODE (operands[0]) != DDmode) + operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0); + emit_insn (gen_movsd_store (operands[0], operands[1])); + } + else if (INT_REGNO_P (regno)) + emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); + else + gcc_unreachable(); + return; + } + /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD + p:DD)) if p0 is not of floating point class and p1 is spilled as + we can have no analogous movsd_load for this. */ + if (lra_in_progress && mode == DDmode + && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0])) + && GET_MODE (SUBREG_REG (operands[0])) == SDmode + && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[1])) == NO_REGS) + { + enum reg_class cl; + int regno = REGNO (SUBREG_REG (operands[0])); + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0]; + } + if (regno >= 0 && ! FP_REGNO_P (regno)) + { + mode = SDmode; + operands[0] = SUBREG_REG (operands[0]); + operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]); + } + } + if (lra_in_progress + && mode == SDmode + && (REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && REG_P (SUBREG_REG (operands[0])))) + && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[1])) == NO_REGS) + { + int regno = REGNO (GET_CODE (operands[0]) == SUBREG + ? SUBREG_REG (operands[0]) : operands[0]); + enum reg_class cl; + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + gcc_assert (cl != NO_REGS); + regno = ira_class_hard_regs[cl][0]; + } + if (FP_REGNO_P (regno)) + { + if (GET_MODE (operands[1]) != DDmode) + operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0); + emit_insn (gen_movsd_load (operands[0], operands[1])); + } + else if (INT_REGNO_P (regno)) + emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); + else + gcc_unreachable(); + return; + } + + if (reload_in_progress + && mode == SDmode + && cfun->machine->sdmode_stack_slot != NULL_RTX + && MEM_P (operands[0]) + && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot) + && REG_P (operands[1])) + { + if (FP_REGNO_P (REGNO (operands[1]))) + { + rtx mem = adjust_address_nv (operands[0], DDmode, 0); + mem = eliminate_regs (mem, VOIDmode, NULL_RTX); + emit_insn (gen_movsd_store (mem, operands[1])); + } + else if (INT_REGNO_P (REGNO (operands[1]))) + { + rtx mem = operands[0]; + if (BYTES_BIG_ENDIAN) + mem = adjust_address_nv (mem, mode, 4); + mem = eliminate_regs (mem, VOIDmode, NULL_RTX); + emit_insn (gen_movsd_hardfloat (mem, operands[1])); + } + else + gcc_unreachable(); + return; + } + if (reload_in_progress + && mode == SDmode + && REG_P (operands[0]) + && MEM_P (operands[1]) + && cfun->machine->sdmode_stack_slot != NULL_RTX + && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot)) + { + if (FP_REGNO_P (REGNO (operands[0]))) + { + rtx mem = adjust_address_nv (operands[1], DDmode, 0); + mem = eliminate_regs (mem, VOIDmode, NULL_RTX); + emit_insn (gen_movsd_load (operands[0], mem)); + } + else if (INT_REGNO_P (REGNO (operands[0]))) + { + rtx mem = operands[1]; + if (BYTES_BIG_ENDIAN) + mem = adjust_address_nv (mem, mode, 4); + mem = eliminate_regs (mem, VOIDmode, NULL_RTX); + emit_insn (gen_movsd_hardfloat (operands[0], mem)); + } + else + gcc_unreachable(); + return; + } + + /* FIXME: In the long term, this switch statement should go away + and be replaced by a sequence of tests based on things like + mode == Pmode. */ + switch (mode) + { + case HImode: + case QImode: + if (CONSTANT_P (operands[1]) + && GET_CODE (operands[1]) != CONST_INT) + operands[1] = force_const_mem (mode, operands[1]); + break; + + case TFmode: + case TDmode: + case IFmode: + case KFmode: + if (FLOAT128_2REG_P (mode)) + rs6000_eliminate_indexed_memrefs (operands); + /* fall through */ + + case DFmode: + case DDmode: + case SFmode: + case SDmode: + if (CONSTANT_P (operands[1]) + && ! easy_fp_constant (operands[1], mode)) + operands[1] = force_const_mem (mode, operands[1]); + break; + + case V16QImode: + case V8HImode: + case V4SFmode: + case V4SImode: + case V4HImode: + case V2SFmode: + case V2SImode: + case V1DImode: + case V2DFmode: + case V2DImode: + case V1TImode: + if (CONSTANT_P (operands[1]) + && !easy_vector_constant (operands[1], mode)) + operands[1] = force_const_mem (mode, operands[1]); + break; + + case SImode: + case DImode: + /* Use default pattern for address of ELF small data */ + if (TARGET_ELF + && mode == Pmode + && DEFAULT_ABI == ABI_V4 + && (GET_CODE (operands[1]) == SYMBOL_REF + || GET_CODE (operands[1]) == CONST) + && small_data_operand (operands[1], mode)) + { + emit_insn (gen_rtx_SET (operands[0], operands[1])); + return; + } + + if (DEFAULT_ABI == ABI_V4 + && mode == Pmode && mode == SImode + && flag_pic == 1 && got_operand (operands[1], mode)) + { + emit_insn (gen_movsi_got (operands[0], operands[1])); + return; + } + + if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN) + && TARGET_NO_TOC + && ! flag_pic + && mode == Pmode + && CONSTANT_P (operands[1]) + && GET_CODE (operands[1]) != HIGH + && GET_CODE (operands[1]) != CONST_INT) + { + rtx target = (!can_create_pseudo_p () + ? operands[0] + : gen_reg_rtx (mode)); + + /* If this is a function address on -mcall-aixdesc, + convert it to the address of the descriptor. */ + if (DEFAULT_ABI == ABI_AIX + && GET_CODE (operands[1]) == SYMBOL_REF + && XSTR (operands[1], 0)[0] == '.') + { + const char *name = XSTR (operands[1], 0); + rtx new_ref; + while (*name == '.') + name++; + new_ref = gen_rtx_SYMBOL_REF (Pmode, name); + CONSTANT_POOL_ADDRESS_P (new_ref) + = CONSTANT_POOL_ADDRESS_P (operands[1]); + SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]); + SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]); + SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]); + operands[1] = new_ref; + } + + if (DEFAULT_ABI == ABI_DARWIN) + { +#if TARGET_MACHO + if (MACHO_DYNAMIC_NO_PIC_P) + { + /* Take care of any required data indirection. */ + operands[1] = rs6000_machopic_legitimize_pic_address ( + operands[1], mode, operands[0]); + if (operands[0] != operands[1]) + emit_insn (gen_rtx_SET (operands[0], operands[1])); + return; + } +#endif + emit_insn (gen_macho_high (target, operands[1])); + emit_insn (gen_macho_low (operands[0], target, operands[1])); + return; + } + + emit_insn (gen_elf_high (target, operands[1])); + emit_insn (gen_elf_low (operands[0], target, operands[1])); + return; + } + + /* If this is a SYMBOL_REF that refers to a constant pool entry, + and we have put it in the TOC, we just need to make a TOC-relative + reference to it. */ + if (TARGET_TOC + && GET_CODE (operands[1]) == SYMBOL_REF + && use_toc_relative_ref (operands[1], mode)) + operands[1] = create_TOC_reference (operands[1], operands[0]); + else if (mode == Pmode + && CONSTANT_P (operands[1]) + && GET_CODE (operands[1]) != HIGH + && ((GET_CODE (operands[1]) != CONST_INT + && ! easy_fp_constant (operands[1], mode)) + || (GET_CODE (operands[1]) == CONST_INT + && (num_insns_constant (operands[1], mode) + > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2))) + || (GET_CODE (operands[0]) == REG + && FP_REGNO_P (REGNO (operands[0])))) + && !toc_relative_expr_p (operands[1], false) + && (TARGET_CMODEL == CMODEL_SMALL + || can_create_pseudo_p () + || (REG_P (operands[0]) + && INT_REG_OK_FOR_BASE_P (operands[0], true)))) + { + +#if TARGET_MACHO + /* Darwin uses a special PIC legitimizer. */ + if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT) + { + operands[1] = + rs6000_machopic_legitimize_pic_address (operands[1], mode, + operands[0]); + if (operands[0] != operands[1]) + emit_insn (gen_rtx_SET (operands[0], operands[1])); + return; + } +#endif + + /* If we are to limit the number of things we put in the TOC and + this is a symbol plus a constant we can add in one insn, + just put the symbol in the TOC and add the constant. Don't do + this if reload is in progress. */ + if (GET_CODE (operands[1]) == CONST + && TARGET_NO_SUM_IN_TOC && ! reload_in_progress + && GET_CODE (XEXP (operands[1], 0)) == PLUS + && add_operand (XEXP (XEXP (operands[1], 0), 1), mode) + && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF + || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF) + && ! side_effects_p (operands[0])) + { + rtx sym = + force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0)); + rtx other = XEXP (XEXP (operands[1], 0), 1); + + sym = force_reg (mode, sym); + emit_insn (gen_add3_insn (operands[0], sym, other)); + return; + } + + operands[1] = force_const_mem (mode, operands[1]); + + if (TARGET_TOC + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && use_toc_relative_ref (XEXP (operands[1], 0), mode)) + { + rtx tocref = create_TOC_reference (XEXP (operands[1], 0), + operands[0]); + operands[1] = gen_const_mem (mode, tocref); + set_mem_alias_set (operands[1], get_TOC_alias_set ()); + } + } + break; + + case TImode: + if (!VECTOR_MEM_VSX_P (TImode)) + rs6000_eliminate_indexed_memrefs (operands); + break; + + case PTImode: + rs6000_eliminate_indexed_memrefs (operands); + break; + + default: + fatal_insn ("bad move", gen_rtx_SET (dest, source)); + } + + /* Above, we may have called force_const_mem which may have returned + an invalid address. If we can, fix this up; otherwise, reload will + have to deal with it. */ + if (GET_CODE (operands[1]) == MEM && ! reload_in_progress) + operands[1] = validize_mem (operands[1]); + + emit_set: + emit_insn (gen_rtx_SET (operands[0], operands[1])); +} + +/* Return true if a structure, union or array containing FIELD should be + accessed using `BLKMODE'. + + For the SPE, simd types are V2SI, and gcc can be tempted to put the + entire thing in a DI and use subregs to access the internals. + store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the + back-end. Because a single GPR can hold a V2SI, but not a DI, the + best thing to do is set structs to BLKmode and avoid Severe Tire + Damage. + + On e500 v2, DF and DI modes suffer from the same anomaly. DF can + fit into 1, whereas DI still needs two. */ + +static bool +rs6000_member_type_forces_blk (const_tree field, machine_mode mode) +{ + return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) + || (TARGET_E500_DOUBLE && mode == DFmode)); +} + +/* Nonzero if we can use a floating-point register to pass this arg. */ +#define USE_FP_FOR_ARG_P(CUM,MODE) \ + (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ + && (CUM)->fregno <= FP_ARG_MAX_REG \ + && TARGET_HARD_FLOAT && TARGET_FPRS) + +/* Nonzero if we can use an AltiVec register to pass this arg. */ +#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \ + (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \ + && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \ + && TARGET_ALTIVEC_ABI \ + && (NAMED)) + +/* Walk down the type tree of TYPE counting consecutive base elements. + If *MODEP is VOIDmode, then set it to the first valid floating point + or vector type. If a non-floating point or vector type is found, or + if a floating point or vector type that doesn't match a non-VOIDmode + *MODEP is found, then return -1, otherwise return the count in the + sub-tree. */ + +static int +rs6000_aggregate_candidate (const_tree type, machine_mode *modep) +{ + machine_mode mode; + HOST_WIDE_INT size; + + switch (TREE_CODE (type)) + { + case REAL_TYPE: + mode = TYPE_MODE (type); + if (!SCALAR_FLOAT_MODE_P (mode)) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 1; + + break; + + case COMPLEX_TYPE: + mode = TYPE_MODE (TREE_TYPE (type)); + if (!SCALAR_FLOAT_MODE_P (mode)) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 2; + + break; + + case VECTOR_TYPE: + if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC) + return -1; + + /* Use V4SImode as representative of all 128-bit vector types. */ + size = int_size_in_bytes (type); + switch (size) + { + case 16: + mode = V4SImode; + break; + default: + return -1; + } + + if (*modep == VOIDmode) + *modep = mode; + + /* Vector modes are considered to be opaque: two vectors are + equivalent for the purposes of being homogeneous aggregates + if they are the same size. */ + if (*modep == mode) + return 1; + + break; + + case ARRAY_TYPE: + { + int count; + tree index = TYPE_DOMAIN (type); + + /* Can't handle incomplete types nor sizes that are not + fixed. */ + if (!COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + return -1; + + count = rs6000_aggregate_candidate (TREE_TYPE (type), modep); + if (count == -1 + || !index + || !TYPE_MAX_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) + || !TYPE_MIN_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) + || count < 0) + return -1; + + count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) + - tree_to_uhwi (TYPE_MIN_VALUE (index))); + + /* There must be no padding. */ + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case RECORD_TYPE: + { + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types nor sizes that are not + fixed. */ + if (!COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + return -1; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count += sub_count; + } + + /* There must be no padding. */ + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + /* These aren't very interesting except in a degenerate case. */ + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types nor sizes that are not + fixed. */ + if (!COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + return -1; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count = count > sub_count ? count : sub_count; + } + + /* There must be no padding. */ + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + default: + break; + } + + return -1; +} + +/* If an argument, whose type is described by TYPE and MODE, is a homogeneous + float or vector aggregate that shall be passed in FP/vector registers + according to the ELFv2 ABI, return the homogeneous element mode in + *ELT_MODE and the number of elements in *N_ELTS, and return TRUE. + + Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */ + +static bool +rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type, + machine_mode *elt_mode, + int *n_elts) +{ + /* Note that we do not accept complex types at the top level as + homogeneous aggregates; these types are handled via the + targetm.calls.split_complex_arg mechanism. Complex types + can be elements of homogeneous aggregates, however. */ + if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type)) + { + machine_mode field_mode = VOIDmode; + int field_count = rs6000_aggregate_candidate (type, &field_mode); + + if (field_count > 0) + { + int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ? + (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1); + + /* The ELFv2 ABI allows homogeneous aggregates to occupy + up to AGGR_ARG_NUM_REG registers. */ + if (field_count * n_regs <= AGGR_ARG_NUM_REG) + { + if (elt_mode) + *elt_mode = field_mode; + if (n_elts) + *n_elts = field_count; + return true; + } + } + } + + if (elt_mode) + *elt_mode = mode; + if (n_elts) + *n_elts = 1; + return false; +} + +/* Return a nonzero value to say to return the function value in + memory, just as large structures are always returned. TYPE will be + the data type of the value, and FNTYPE will be the type of the + function doing the returning, or @code{NULL} for libcalls. + + The AIX ABI for the RS/6000 specifies that all structures are + returned in memory. The Darwin ABI does the same. + + For the Darwin 64 Bit ABI, a function result can be returned in + registers or in memory, depending on the size of the return data + type. If it is returned in registers, the value occupies the same + registers as it would if it were the first and only function + argument. Otherwise, the function places its result in memory at + the location pointed to by GPR3. + + The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4, + but a draft put them in memory, and GCC used to implement the draft + instead of the final standard. Therefore, aix_struct_return + controls this instead of DEFAULT_ABI; V.4 targets needing backward + compatibility can change DRAFT_V4_STRUCT_RET to override the + default, and -m switches get the final word. See + rs6000_option_override_internal for more details. + + The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit + long double support is enabled. These values are returned in memory. + + int_size_in_bytes returns -1 for variable size objects, which go in + memory always. The cast to unsigned makes -1 > 8. */ + +static bool +rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + /* For the Darwin64 ABI, test if we can fit the return value in regs. */ + if (TARGET_MACHO + && rs6000_darwin64_abi + && TREE_CODE (type) == RECORD_TYPE + && int_size_in_bytes (type) > 0) + { + CUMULATIVE_ARGS valcum; + rtx valret; + + valcum.words = 0; + valcum.fregno = FP_ARG_MIN_REG; + valcum.vregno = ALTIVEC_ARG_MIN_REG; + /* Do a trial code generation as if this were going to be passed + as an argument; if any part goes in memory, we return NULL. */ + valret = rs6000_darwin64_record_arg (&valcum, type, true, true); + if (valret) + return false; + /* Otherwise fall through to more conventional ABI rules. */ + } + + /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */ + if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type, + NULL, NULL)) + return false; + + /* The ELFv2 ABI returns aggregates up to 16B in registers */ + if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type) + && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16) + return false; + + if (AGGREGATE_TYPE_P (type) + && (aix_struct_return + || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)) + return true; + + /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector + modes only exist for GCC vector types if -maltivec. */ + if (TARGET_32BIT && !TARGET_ALTIVEC_ABI + && ALTIVEC_VECTOR_MODE (TYPE_MODE (type))) + return false; + + /* Return synthetic vectors in memory. */ + if (TREE_CODE (type) == VECTOR_TYPE + && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8)) + { + static bool warned_for_return_big_vectors = false; + if (!warned_for_return_big_vectors) + { + warning (OPT_Wpsabi, "GCC vector returned by reference: " + "non-standard ABI extension with no compatibility guarantee"); + warned_for_return_big_vectors = true; + } + return true; + } + + if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD + && FLOAT128_IEEE_P (TYPE_MODE (type))) + return true; + + return false; +} + +/* Specify whether values returned in registers should be at the most + significant end of a register. We want aggregates returned by + value to match the way aggregates are passed to functions. */ + +static bool +rs6000_return_in_msb (const_tree valtype) +{ + return (DEFAULT_ABI == ABI_ELFv2 + && BYTES_BIG_ENDIAN + && AGGREGATE_TYPE_P (valtype) + && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward); +} + +#ifdef HAVE_AS_GNU_ATTRIBUTE +/* Return TRUE if a call to function FNDECL may be one that + potentially affects the function calling ABI of the object file. */ + +static bool +call_ABI_of_interest (tree fndecl) +{ + if (rs6000_gnu_attr && symtab->state == EXPANSION) + { + struct cgraph_node *c_node; + + /* Libcalls are always interesting. */ + if (fndecl == NULL_TREE) + return true; + + /* Any call to an external function is interesting. */ + if (DECL_EXTERNAL (fndecl)) + return true; + + /* Interesting functions that we are emitting in this object file. */ + c_node = cgraph_node::get (fndecl); + c_node = c_node->ultimate_alias_target (); + return !c_node->only_called_directly_p (); + } + return false; +} +#endif + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0 and RETURN_MODE the return value mode. + + For incoming args we set the number of arguments in the prototype large + so we never return a PARALLEL. */ + +void +init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, + rtx libname ATTRIBUTE_UNUSED, int incoming, + int libcall, int n_named_args, + tree fndecl ATTRIBUTE_UNUSED, + machine_mode return_mode ATTRIBUTE_UNUSED) +{ + static CUMULATIVE_ARGS zero_cumulative; + + *cum = zero_cumulative; + cum->words = 0; + cum->fregno = FP_ARG_MIN_REG; + cum->vregno = ALTIVEC_ARG_MIN_REG; + cum->prototype = (fntype && prototype_p (fntype)); + cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall) + ? CALL_LIBCALL : CALL_NORMAL); + cum->sysv_gregno = GP_ARG_MIN_REG; + cum->stdarg = stdarg_p (fntype); + cum->libcall = libcall; + + cum->nargs_prototype = 0; + if (incoming || cum->prototype) + cum->nargs_prototype = n_named_args; + + /* Check for a longcall attribute. */ + if ((!fntype && rs6000_default_long_calls) + || (fntype + && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)) + && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))) + cum->call_cookie |= CALL_LONG; + + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, "\ninit_cumulative_args:"); + if (fntype) + { + tree ret_type = TREE_TYPE (fntype); + fprintf (stderr, " ret code = %s,", + get_tree_code_name (TREE_CODE (ret_type))); + } + + if (cum->call_cookie & CALL_LONG) + fprintf (stderr, " longcall,"); + + fprintf (stderr, " proto = %d, nargs = %d\n", + cum->prototype, cum->nargs_prototype); + } + +#ifdef HAVE_AS_GNU_ATTRIBUTE + if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)) + { + cum->escapes = call_ABI_of_interest (fndecl); + if (cum->escapes) + { + tree return_type; + + if (fntype) + { + return_type = TREE_TYPE (fntype); + return_mode = TYPE_MODE (return_type); + } + else + return_type = lang_hooks.types.type_for_mode (return_mode, 0); + + if (return_type != NULL) + { + if (TREE_CODE (return_type) == RECORD_TYPE + && TYPE_TRANSPARENT_AGGR (return_type)) + { + return_type = TREE_TYPE (first_field (return_type)); + return_mode = TYPE_MODE (return_type); + } + if (AGGREGATE_TYPE_P (return_type) + && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type) + <= 8)) + rs6000_returns_struct = true; + } + if (SCALAR_FLOAT_MODE_P (return_mode)) + { + rs6000_passes_float = true; + if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT) + && (FLOAT128_IBM_P (return_mode) + || FLOAT128_IEEE_P (return_mode) + || (return_type != NULL + && (TYPE_MAIN_VARIANT (return_type) + == long_double_type_node)))) + rs6000_passes_long_double = true; + } + if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode) + || SPE_VECTOR_MODE (return_mode)) + rs6000_passes_vector = true; + } + } +#endif + + if (fntype + && !TARGET_ALTIVEC + && TARGET_ALTIVEC_ABI + && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype)))) + { + error ("cannot return value in vector register because" + " altivec instructions are disabled, use -maltivec" + " to enable them"); + } +} + +/* The mode the ABI uses for a word. This is not the same as word_mode + for -m32 -mpowerpc64. This is used to implement various target hooks. */ + +static machine_mode +rs6000_abi_word_mode (void) +{ + return TARGET_32BIT ? SImode : DImode; +} + +/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ +static char * +rs6000_offload_options (void) +{ + if (TARGET_64BIT) + return xstrdup ("-foffload-abi=lp64"); + else + return xstrdup ("-foffload-abi=ilp32"); +} + +/* On rs6000, function arguments are promoted, as are function return + values. */ + +static machine_mode +rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree, int) +{ + PROMOTE_MODE (mode, *punsignedp, type); + + return mode; +} + +/* Return true if TYPE must be passed on the stack and not in registers. */ + +static bool +rs6000_must_pass_in_stack (machine_mode mode, const_tree type) +{ + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT) + return must_pass_in_stack_var_size (mode, type); + else + return must_pass_in_stack_var_size_or_pad (mode, type); +} + +static inline bool +is_complex_IBM_long_double (machine_mode mode) +{ + return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode); +} + +/* Whether ABI_V4 passes MODE args to a function in floating point + registers. */ + +static bool +abi_v4_pass_in_fpr (machine_mode mode) +{ + if (!TARGET_FPRS || !TARGET_HARD_FLOAT) + return false; + if (TARGET_SINGLE_FLOAT && mode == SFmode) + return true; + if (TARGET_DOUBLE_FLOAT && mode == DFmode) + return true; + /* ABI_V4 passes complex IBM long double in 8 gprs. + Stupid, but we can't change the ABI now. */ + if (is_complex_IBM_long_double (mode)) + return false; + if (FLOAT128_2REG_P (mode)) + return true; + if (DECIMAL_FLOAT_MODE_P (mode)) + return true; + return false; +} + +/* If defined, a C expression which determines whether, and in which + direction, to pad out an argument with extra space. The value + should be of type `enum direction': either `upward' to pad above + the argument, `downward' to pad below, or `none' to inhibit + padding. + + For the AIX ABI structs are always stored left shifted in their + argument slot. */ + +enum direction +function_arg_padding (machine_mode mode, const_tree type) +{ +#ifndef AGGREGATE_PADDING_FIXED +#define AGGREGATE_PADDING_FIXED 0 +#endif +#ifndef AGGREGATES_PAD_UPWARD_ALWAYS +#define AGGREGATES_PAD_UPWARD_ALWAYS 0 +#endif + + if (!AGGREGATE_PADDING_FIXED) + { + /* GCC used to pass structures of the same size as integer types as + if they were in fact integers, ignoring FUNCTION_ARG_PADDING. + i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were + passed padded downward, except that -mstrict-align further + muddied the water in that multi-component structures of 2 and 4 + bytes in size were passed padded upward. + + The following arranges for best compatibility with previous + versions of gcc, but removes the -mstrict-align dependency. */ + if (BYTES_BIG_ENDIAN) + { + HOST_WIDE_INT size = 0; + + if (mode == BLKmode) + { + if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) + size = int_size_in_bytes (type); + } + else + size = GET_MODE_SIZE (mode); + + if (size == 1 || size == 2 || size == 4) + return downward; + } + return upward; + } + + if (AGGREGATES_PAD_UPWARD_ALWAYS) + { + if (type != 0 && AGGREGATE_TYPE_P (type)) + return upward; + } + + /* Fall back to the default. */ + return DEFAULT_FUNCTION_ARG_PADDING (mode, type); +} + +/* If defined, a C expression that gives the alignment boundary, in bits, + of an argument with the specified mode and type. If it is not defined, + PARM_BOUNDARY is used for all arguments. + + V.4 wants long longs and doubles to be double word aligned. Just + testing the mode size is a boneheaded way to do this as it means + that other types such as complex int are also double word aligned. + However, we're stuck with this because changing the ABI might break + existing library interfaces. + + Doubleword align SPE vectors. + Quadword align Altivec/VSX vectors. + Quadword align large synthetic vector types. */ + +static unsigned int +rs6000_function_arg_boundary (machine_mode mode, const_tree type) +{ + machine_mode elt_mode; + int n_elts; + + rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); + + if (DEFAULT_ABI == ABI_V4 + && (GET_MODE_SIZE (mode) == 8 + || (TARGET_HARD_FLOAT + && TARGET_FPRS + && !is_complex_IBM_long_double (mode) + && FLOAT128_2REG_P (mode)))) + return 64; + else if (FLOAT128_VECTOR_P (mode)) + return 128; + else if (SPE_VECTOR_MODE (mode) + || (type && TREE_CODE (type) == VECTOR_TYPE + && int_size_in_bytes (type) >= 8 + && int_size_in_bytes (type) < 16)) + return 64; + else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode) + || (type && TREE_CODE (type) == VECTOR_TYPE + && int_size_in_bytes (type) >= 16)) + return 128; + + /* Aggregate types that need > 8 byte alignment are quadword-aligned + in the parameter area in the ELFv2 ABI, and in the AIX ABI unless + -mcompat-align-parm is used. */ + if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm) + || DEFAULT_ABI == ABI_ELFv2) + && type && TYPE_ALIGN (type) > 64) + { + /* "Aggregate" means any AGGREGATE_TYPE except for single-element + or homogeneous float/vector aggregates here. We already handled + vector aggregates above, but still need to check for float here. */ + bool aggregate_p = (AGGREGATE_TYPE_P (type) + && !SCALAR_FLOAT_MODE_P (elt_mode)); + + /* We used to check for BLKmode instead of the above aggregate type + check. Warn when this results in any difference to the ABI. */ + if (aggregate_p != (mode == BLKmode)) + { + static bool warned; + if (!warned && warn_psabi) + { + warned = true; + inform (input_location, + "the ABI of passing aggregates with %d-byte alignment" + " has changed in GCC 5", + (int) TYPE_ALIGN (type) / BITS_PER_UNIT); + } + } + + if (aggregate_p) + return 128; + } + + /* Similar for the Darwin64 ABI. Note that for historical reasons we + implement the "aggregate type" check as a BLKmode check here; this + means certain aggregate types are in fact not aligned. */ + if (TARGET_MACHO && rs6000_darwin64_abi + && mode == BLKmode + && type && TYPE_ALIGN (type) > 64) + return 128; + + return PARM_BOUNDARY; +} + +/* The offset in words to the start of the parameter save area. */ + +static unsigned int +rs6000_parm_offset (void) +{ + return (DEFAULT_ABI == ABI_V4 ? 2 + : DEFAULT_ABI == ABI_ELFv2 ? 4 + : 6); +} + +/* For a function parm of MODE and TYPE, return the starting word in + the parameter area. NWORDS of the parameter area are already used. */ + +static unsigned int +rs6000_parm_start (machine_mode mode, const_tree type, + unsigned int nwords) +{ + unsigned int align; + + align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1; + return nwords + (-(rs6000_parm_offset () + nwords) & align); +} + +/* Compute the size (in words) of a function argument. */ + +static unsigned long +rs6000_arg_size (machine_mode mode, const_tree type) +{ + unsigned long size; + + if (mode != BLKmode) + size = GET_MODE_SIZE (mode); + else + size = int_size_in_bytes (type); + + if (TARGET_32BIT) + return (size + 3) >> 2; + else + return (size + 7) >> 3; +} + +/* Use this to flush pending int fields. */ + +static void +rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum, + HOST_WIDE_INT bitpos, int final) +{ + unsigned int startbit, endbit; + int intregs, intoffset; + machine_mode mode; + + /* Handle the situations where a float is taking up the first half + of the GPR, and the other half is empty (typically due to + alignment restrictions). We can detect this by a 8-byte-aligned + int field, or by seeing that this is the final flush for this + argument. Count the word and continue on. */ + if (cum->floats_in_gpr == 1 + && (cum->intoffset % 64 == 0 + || (cum->intoffset == -1 && final))) + { + cum->words++; + cum->floats_in_gpr = 0; + } + + if (cum->intoffset == -1) + return; + + intoffset = cum->intoffset; + cum->intoffset = -1; + cum->floats_in_gpr = 0; + + if (intoffset % BITS_PER_WORD != 0) + { + mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, + MODE_INT, 0); + if (mode == BLKmode) + { + /* We couldn't find an appropriate mode, which happens, + e.g., in packed structs when there are 3 bytes to load. + Back intoffset back to the beginning of the word in this + case. */ + intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD); + } + } + + startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); + endbit = ROUND_UP (bitpos, BITS_PER_WORD); + intregs = (endbit - startbit) / BITS_PER_WORD; + cum->words += intregs; + /* words should be unsigned. */ + if ((unsigned)cum->words < (endbit/BITS_PER_WORD)) + { + int pad = (endbit/BITS_PER_WORD) - cum->words; + cum->words += pad; + } +} + +/* The darwin64 ABI calls for us to recurse down through structs, + looking for elements passed in registers. Unfortunately, we have + to track int register count here also because of misalignments + in powerpc alignment mode. */ + +static void +rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum, + const_tree type, + HOST_WIDE_INT startbitpos) +{ + tree f; + + for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + HOST_WIDE_INT bitpos = startbitpos; + tree ftype = TREE_TYPE (f); + machine_mode mode; + if (ftype == error_mark_node) + continue; + mode = TYPE_MODE (ftype); + + if (DECL_SIZE (f) != 0 + && tree_fits_uhwi_p (bit_position (f))) + bitpos += int_bit_position (f); + + /* ??? FIXME: else assume zero offset. */ + + if (TREE_CODE (ftype) == RECORD_TYPE) + rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos); + else if (USE_FP_FOR_ARG_P (cum, mode)) + { + unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3; + rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); + cum->fregno += n_fpregs; + /* Single-precision floats present a special problem for + us, because they are smaller than an 8-byte GPR, and so + the structure-packing rules combined with the standard + varargs behavior mean that we want to pack float/float + and float/int combinations into a single register's + space. This is complicated by the arg advance flushing, + which works on arbitrarily large groups of int-type + fields. */ + if (mode == SFmode) + { + if (cum->floats_in_gpr == 1) + { + /* Two floats in a word; count the word and reset + the float count. */ + cum->words++; + cum->floats_in_gpr = 0; + } + else if (bitpos % 64 == 0) + { + /* A float at the beginning of an 8-byte word; + count it and put off adjusting cum->words until + we see if a arg advance flush is going to do it + for us. */ + cum->floats_in_gpr++; + } + else + { + /* The float is at the end of a word, preceded + by integer fields, so the arg advance flush + just above has already set cum->words and + everything is taken care of. */ + } + } + else + cum->words += n_fpregs; + } + else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1)) + { + rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); + cum->vregno++; + cum->words += 2; + } + else if (cum->intoffset == -1) + cum->intoffset = bitpos; + } +} + +/* Check for an item that needs to be considered specially under the darwin 64 + bit ABI. These are record types where the mode is BLK or the structure is + 8 bytes in size. */ +static int +rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type) +{ + return rs6000_darwin64_abi + && ((mode == BLKmode + && TREE_CODE (type) == RECORD_TYPE + && int_size_in_bytes (type) > 0) + || (type && TREE_CODE (type) == RECORD_TYPE + && int_size_in_bytes (type) == 8)) ? 1 : 0; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) + + Note that for args passed by reference, function_arg will be called + with MODE and TYPE set to that of the pointer to the arg, not the arg + itself. */ + +static void +rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode, + const_tree type, bool named, int depth) +{ + machine_mode elt_mode; + int n_elts; + + rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); + + /* Only tick off an argument if we're not recursing. */ + if (depth == 0) + cum->nargs_prototype--; + +#ifdef HAVE_AS_GNU_ATTRIBUTE + if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4) + && cum->escapes) + { + if (SCALAR_FLOAT_MODE_P (mode)) + { + rs6000_passes_float = true; + if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT) + && (FLOAT128_IBM_P (mode) + || FLOAT128_IEEE_P (mode) + || (type != NULL + && TYPE_MAIN_VARIANT (type) == long_double_type_node))) + rs6000_passes_long_double = true; + } + if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) + || (SPE_VECTOR_MODE (mode) + && !cum->stdarg + && cum->sysv_gregno <= GP_ARG_MAX_REG)) + rs6000_passes_vector = true; + } +#endif + + if (TARGET_ALTIVEC_ABI + && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode) + || (type && TREE_CODE (type) == VECTOR_TYPE + && int_size_in_bytes (type) == 16))) + { + bool stack = false; + + if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) + { + cum->vregno += n_elts; + + if (!TARGET_ALTIVEC) + error ("cannot pass argument in vector register because" + " altivec instructions are disabled, use -maltivec" + " to enable them"); + + /* PowerPC64 Linux and AIX allocate GPRs for a vector argument + even if it is going to be passed in a vector register. + Darwin does the same for variable-argument functions. */ + if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && TARGET_64BIT) + || (cum->stdarg && DEFAULT_ABI != ABI_V4)) + stack = true; + } + else + stack = true; + + if (stack) + { + int align; + + /* Vector parameters must be 16-byte aligned. In 32-bit + mode this means we need to take into account the offset + to the parameter save area. In 64-bit mode, they just + have to start on an even word, since the parameter save + area is 16-byte aligned. */ + if (TARGET_32BIT) + align = -(rs6000_parm_offset () + cum->words) & 3; + else + align = cum->words & 1; + cum->words += align + rs6000_arg_size (mode, type); + + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, "function_adv: words = %2d, align=%d, ", + cum->words, align); + fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n", + cum->nargs_prototype, cum->prototype, + GET_MODE_NAME (mode)); + } + } + } + else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode) + && !cum->stdarg + && cum->sysv_gregno <= GP_ARG_MAX_REG) + cum->sysv_gregno++; + + else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) + { + int size = int_size_in_bytes (type); + /* Variable sized types have size == -1 and are + treated as if consisting entirely of ints. + Pad to 16 byte boundary if needed. */ + if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD + && (cum->words % 2) != 0) + cum->words++; + /* For varargs, we can just go up by the size of the struct. */ + if (!named) + cum->words += (size + 7) / 8; + else + { + /* It is tempting to say int register count just goes up by + sizeof(type)/8, but this is wrong in a case such as + { int; double; int; } [powerpc alignment]. We have to + grovel through the fields for these too. */ + cum->intoffset = 0; + cum->floats_in_gpr = 0; + rs6000_darwin64_record_arg_advance_recurse (cum, type, 0); + rs6000_darwin64_record_arg_advance_flush (cum, + size * BITS_PER_UNIT, 1); + } + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d", + cum->words, TYPE_ALIGN (type), size); + fprintf (stderr, + "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n", + cum->nargs_prototype, cum->prototype, + GET_MODE_NAME (mode)); + } + } + else if (DEFAULT_ABI == ABI_V4) + { + if (abi_v4_pass_in_fpr (mode)) + { + /* _Decimal128 must use an even/odd register pair. This assumes + that the register number is odd when fregno is odd. */ + if (mode == TDmode && (cum->fregno % 2) == 1) + cum->fregno++; + + if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0) + <= FP_ARG_V4_MAX_REG) + cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3; + else + { + cum->fregno = FP_ARG_V4_MAX_REG + 1; + if (mode == DFmode || FLOAT128_IBM_P (mode) + || mode == DDmode || mode == TDmode) + cum->words += cum->words & 1; + cum->words += rs6000_arg_size (mode, type); + } + } + else + { + int n_words = rs6000_arg_size (mode, type); + int gregno = cum->sysv_gregno; + + /* Long long and SPE vectors are put in (r3,r4), (r5,r6), + (r7,r8) or (r9,r10). As does any other 2 word item such + as complex int due to a historical mistake. */ + if (n_words == 2) + gregno += (1 - gregno) & 1; + + /* Multi-reg args are not split between registers and stack. */ + if (gregno + n_words - 1 > GP_ARG_MAX_REG) + { + /* Long long and SPE vectors are aligned on the stack. + So are other 2 word items such as complex int due to + a historical mistake. */ + if (n_words == 2) + cum->words += cum->words & 1; + cum->words += n_words; + } + + /* Note: continuing to accumulate gregno past when we've started + spilling to the stack indicates the fact that we've started + spilling to the stack to expand_builtin_saveregs. */ + cum->sysv_gregno = gregno + n_words; + } + + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ", + cum->words, cum->fregno); + fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ", + cum->sysv_gregno, cum->nargs_prototype, cum->prototype); + fprintf (stderr, "mode = %4s, named = %d\n", + GET_MODE_NAME (mode), named); + } + } + else + { + int n_words = rs6000_arg_size (mode, type); + int start_words = cum->words; + int align_words = rs6000_parm_start (mode, type, start_words); + + cum->words = align_words + n_words; + + if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS) + { + /* _Decimal128 must be passed in an even/odd float register pair. + This assumes that the register number is odd when fregno is + odd. */ + if (elt_mode == TDmode && (cum->fregno % 2) == 1) + cum->fregno++; + cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3); + } + + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ", + cum->words, cum->fregno); + fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ", + cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode)); + fprintf (stderr, "named = %d, align = %d, depth = %d\n", + named, align_words - start_words, depth); + } + } +} + +static void +rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode, + const_tree type, bool named) +{ + rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named, + 0); +} + +static rtx +spe_build_register_parallel (machine_mode mode, int gregno) +{ + rtx r1, r3, r5, r7; + + switch (mode) + { + case DFmode: + r1 = gen_rtx_REG (DImode, gregno); + r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); + return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1)); + + case DCmode: + case TFmode: + r1 = gen_rtx_REG (DImode, gregno); + r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); + r3 = gen_rtx_REG (DImode, gregno + 2); + r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); + return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3)); + + case TCmode: + r1 = gen_rtx_REG (DImode, gregno); + r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); + r3 = gen_rtx_REG (DImode, gregno + 2); + r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); + r5 = gen_rtx_REG (DImode, gregno + 4); + r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16)); + r7 = gen_rtx_REG (DImode, gregno + 6); + r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24)); + return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7)); + + default: + gcc_unreachable (); + } +} + +/* Determine where to put a SIMD argument on the SPE. */ +static rtx +rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode, + const_tree type) +{ + int gregno = cum->sysv_gregno; + + /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but + are passed and returned in a pair of GPRs for ABI compatibility. */ + if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode + || mode == DCmode || mode == TCmode)) + { + int n_words = rs6000_arg_size (mode, type); + + /* Doubles go in an odd/even register pair (r5/r6, etc). */ + if (mode == DFmode) + gregno += (1 - gregno) & 1; + + /* Multi-reg args are not split between registers and stack. */ + if (gregno + n_words - 1 > GP_ARG_MAX_REG) + return NULL_RTX; + + return spe_build_register_parallel (mode, gregno); + } + if (cum->stdarg) + { + int n_words = rs6000_arg_size (mode, type); + + /* SPE vectors are put in odd registers. */ + if (n_words == 2 && (gregno & 1) == 0) + gregno += 1; + + if (gregno + n_words - 1 <= GP_ARG_MAX_REG) + { + rtx r1, r2; + machine_mode m = SImode; + + r1 = gen_rtx_REG (m, gregno); + r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx); + r2 = gen_rtx_REG (m, gregno + 1); + r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4)); + return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); + } + else + return NULL_RTX; + } + else + { + if (gregno <= GP_ARG_MAX_REG) + return gen_rtx_REG (mode, gregno); + else + return NULL_RTX; + } +} + +/* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the + structure between cum->intoffset and bitpos to integer registers. */ + +static void +rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum, + HOST_WIDE_INT bitpos, rtx rvec[], int *k) +{ + machine_mode mode; + unsigned int regno; + unsigned int startbit, endbit; + int this_regno, intregs, intoffset; + rtx reg; + + if (cum->intoffset == -1) + return; + + intoffset = cum->intoffset; + cum->intoffset = -1; + + /* If this is the trailing part of a word, try to only load that + much into the register. Otherwise load the whole register. Note + that in the latter case we may pick up unwanted bits. It's not a + problem at the moment but may wish to revisit. */ + + if (intoffset % BITS_PER_WORD != 0) + { + mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, + MODE_INT, 0); + if (mode == BLKmode) + { + /* We couldn't find an appropriate mode, which happens, + e.g., in packed structs when there are 3 bytes to load. + Back intoffset back to the beginning of the word in this + case. */ + intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD); + mode = word_mode; + } + } + else + mode = word_mode; + + startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); + endbit = ROUND_UP (bitpos, BITS_PER_WORD); + intregs = (endbit - startbit) / BITS_PER_WORD; + this_regno = cum->words + intoffset / BITS_PER_WORD; + + if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno) + cum->use_stack = 1; + + intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno); + if (intregs <= 0) + return; + + intoffset /= BITS_PER_UNIT; + do + { + regno = GP_ARG_MIN_REG + this_regno; + reg = gen_rtx_REG (mode, regno); + rvec[(*k)++] = + gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); + + this_regno += 1; + intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; + mode = word_mode; + intregs -= 1; + } + while (intregs > 0); +} + +/* Recursive workhorse for the following. */ + +static void +rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type, + HOST_WIDE_INT startbitpos, rtx rvec[], + int *k) +{ + tree f; + + for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + HOST_WIDE_INT bitpos = startbitpos; + tree ftype = TREE_TYPE (f); + machine_mode mode; + if (ftype == error_mark_node) + continue; + mode = TYPE_MODE (ftype); + + if (DECL_SIZE (f) != 0 + && tree_fits_uhwi_p (bit_position (f))) + bitpos += int_bit_position (f); + + /* ??? FIXME: else assume zero offset. */ + + if (TREE_CODE (ftype) == RECORD_TYPE) + rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k); + else if (cum->named && USE_FP_FOR_ARG_P (cum, mode)) + { + unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3; +#if 0 + switch (mode) + { + case SCmode: mode = SFmode; break; + case DCmode: mode = DFmode; break; + case TCmode: mode = TFmode; break; + default: break; + } +#endif + rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); + if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1) + { + gcc_assert (cum->fregno == FP_ARG_MAX_REG + && (mode == TFmode || mode == TDmode)); + /* Long double or _Decimal128 split over regs and memory. */ + mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode; + cum->use_stack=1; + } + rvec[(*k)++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, cum->fregno++), + GEN_INT (bitpos / BITS_PER_UNIT)); + if (FLOAT128_2REG_P (mode)) + cum->fregno++; + } + else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1)) + { + rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); + rvec[(*k)++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, cum->vregno++), + GEN_INT (bitpos / BITS_PER_UNIT)); + } + else if (cum->intoffset == -1) + cum->intoffset = bitpos; + } +} + +/* For the darwin64 ABI, we want to construct a PARALLEL consisting of + the register(s) to be used for each field and subfield of a struct + being passed by value, along with the offset of where the + register's value may be found in the block. FP fields go in FP + register, vector fields go in vector registers, and everything + else goes in int registers, packed as in memory. + + This code is also used for function return values. RETVAL indicates + whether this is the case. + + Much of this is taken from the SPARC V9 port, which has a similar + calling convention. */ + +static rtx +rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type, + bool named, bool retval) +{ + rtx rvec[FIRST_PSEUDO_REGISTER]; + int k = 1, kbase = 1; + HOST_WIDE_INT typesize = int_size_in_bytes (type); + /* This is a copy; modifications are not visible to our caller. */ + CUMULATIVE_ARGS copy_cum = *orig_cum; + CUMULATIVE_ARGS *cum = ©_cum; + + /* Pad to 16 byte boundary if needed. */ + if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD + && (cum->words % 2) != 0) + cum->words++; + + cum->intoffset = 0; + cum->use_stack = 0; + cum->named = named; + + /* Put entries into rvec[] for individual FP and vector fields, and + for the chunks of memory that go in int regs. Note we start at + element 1; 0 is reserved for an indication of using memory, and + may or may not be filled in below. */ + rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k); + rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k); + + /* If any part of the struct went on the stack put all of it there. + This hack is because the generic code for + FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register + parts of the struct are not at the beginning. */ + if (cum->use_stack) + { + if (retval) + return NULL_RTX; /* doesn't go in registers at all */ + kbase = 0; + rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); + } + if (k > 1 || cum->use_stack) + return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase])); + else + return NULL_RTX; +} + +/* Determine where to place an argument in 64-bit mode with 32-bit ABI. */ + +static rtx +rs6000_mixed_function_arg (machine_mode mode, const_tree type, + int align_words) +{ + int n_units; + int i, k; + rtx rvec[GP_ARG_NUM_REG + 1]; + + if (align_words >= GP_ARG_NUM_REG) + return NULL_RTX; + + n_units = rs6000_arg_size (mode, type); + + /* Optimize the simple case where the arg fits in one gpr, except in + the case of BLKmode due to assign_parms assuming that registers are + BITS_PER_WORD wide. */ + if (n_units == 0 + || (n_units == 1 && mode != BLKmode)) + return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); + + k = 0; + if (align_words + n_units > GP_ARG_NUM_REG) + /* Not all of the arg fits in gprs. Say that it goes in memory too, + using a magic NULL_RTX component. + This is not strictly correct. Only some of the arg belongs in + memory, not all of it. However, the normal scheme using + function_arg_partial_nregs can result in unusual subregs, eg. + (subreg:SI (reg:DF) 4), which are not handled well. The code to + store the whole arg to memory is often more efficient than code + to store pieces, and we know that space is available in the right + place for the whole arg. */ + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); + + i = 0; + do + { + rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words); + rtx off = GEN_INT (i++ * 4); + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); + } + while (++align_words < GP_ARG_NUM_REG && --n_units != 0); + + return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec)); +} + +/* We have an argument of MODE and TYPE that goes into FPRs or VRs, + but must also be copied into the parameter save area starting at + offset ALIGN_WORDS. Fill in RVEC with the elements corresponding + to the GPRs and/or memory. Return the number of elements used. */ + +static int +rs6000_psave_function_arg (machine_mode mode, const_tree type, + int align_words, rtx *rvec) +{ + int k = 0; + + if (align_words < GP_ARG_NUM_REG) + { + int n_words = rs6000_arg_size (mode, type); + + if (align_words + n_words > GP_ARG_NUM_REG + || mode == BLKmode + || (TARGET_32BIT && TARGET_POWERPC64)) + { + /* If this is partially on the stack, then we only + include the portion actually in registers here. */ + machine_mode rmode = TARGET_32BIT ? SImode : DImode; + int i = 0; + + if (align_words + n_words > GP_ARG_NUM_REG) + { + /* Not all of the arg fits in gprs. Say that it goes in memory + too, using a magic NULL_RTX component. Also see comment in + rs6000_mixed_function_arg for why the normal + function_arg_partial_nregs scheme doesn't work in this case. */ + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); + } + + do + { + rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words); + rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode)); + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); + } + while (++align_words < GP_ARG_NUM_REG && --n_words != 0); + } + else + { + /* The whole arg fits in gprs. */ + rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx); + } + } + else + { + /* It's entirely in memory. */ + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); + } + + return k; +} + +/* RVEC is a vector of K components of an argument of mode MODE. + Construct the final function_arg return value from it. */ + +static rtx +rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k) +{ + gcc_assert (k >= 1); + + /* Avoid returning a PARALLEL in the trivial cases. */ + if (k == 1) + { + if (XEXP (rvec[0], 0) == NULL_RTX) + return NULL_RTX; + + if (GET_MODE (XEXP (rvec[0], 0)) == mode) + return XEXP (rvec[0], 0); + } + + return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec)); +} + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. It is + not modified in this routine. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On RS/6000 the first eight words of non-FP are normally in registers + and the rest are pushed. Under AIX, the first 13 FP args are in registers. + Under V.4, the first 8 FP args are in registers. + + If this is floating-point and no prototype is specified, we use + both an FP and integer register (or possibly FP reg and stack). Library + functions (when CALL_LIBCALL is set) always have the proper types for args, + so we can pass the FP value just in one register. emit_library_function + doesn't support PARALLEL anyway. + + Note that for args passed by reference, function_arg will be called + with MODE and TYPE set to that of the pointer to the arg, not the arg + itself. */ + +static rtx +rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + enum rs6000_abi abi = DEFAULT_ABI; + machine_mode elt_mode; + int n_elts; + + /* Return a marker to indicate whether CR1 needs to set or clear the + bit that V.4 uses to say fp args were passed in registers. + Assume that we don't need the marker for software floating point, + or compiler generated library calls. */ + if (mode == VOIDmode) + { + if (abi == ABI_V4 + && (cum->call_cookie & CALL_LIBCALL) == 0 + && (cum->stdarg + || (cum->nargs_prototype < 0 + && (cum->prototype || TARGET_NO_PROTOTYPE)))) + { + /* For the SPE, we need to crxor CR6 always. */ + if (TARGET_SPE_ABI) + return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS); + else if (TARGET_HARD_FLOAT && TARGET_FPRS) + return GEN_INT (cum->call_cookie + | ((cum->fregno == FP_ARG_MIN_REG) + ? CALL_V4_SET_FP_ARGS + : CALL_V4_CLEAR_FP_ARGS)); + } + + return GEN_INT (cum->call_cookie & ~CALL_LIBCALL); + } + + rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); + + if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) + { + rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false); + if (rslt != NULL_RTX) + return rslt; + /* Else fall through to usual handling. */ + } + + if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) + { + rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1]; + rtx r, off; + int i, k = 0; + + /* Do we also need to pass this argument in the parameter save area? + Library support functions for IEEE 128-bit are assumed to not need the + value passed both in GPRs and in vector registers. */ + if (TARGET_64BIT && !cum->prototype + && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode))) + { + int align_words = ROUND_UP (cum->words, 2); + k = rs6000_psave_function_arg (mode, type, align_words, rvec); + } + + /* Describe where this argument goes in the vector registers. */ + for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++) + { + r = gen_rtx_REG (elt_mode, cum->vregno + i); + off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); + } + + return rs6000_finish_function_arg (mode, rvec, k); + } + else if (TARGET_ALTIVEC_ABI + && (ALTIVEC_OR_VSX_VECTOR_MODE (mode) + || (type && TREE_CODE (type) == VECTOR_TYPE + && int_size_in_bytes (type) == 16))) + { + if (named || abi == ABI_V4) + return NULL_RTX; + else + { + /* Vector parameters to varargs functions under AIX or Darwin + get passed in memory and possibly also in GPRs. */ + int align, align_words, n_words; + machine_mode part_mode; + + /* Vector parameters must be 16-byte aligned. In 32-bit + mode this means we need to take into account the offset + to the parameter save area. In 64-bit mode, they just + have to start on an even word, since the parameter save + area is 16-byte aligned. */ + if (TARGET_32BIT) + align = -(rs6000_parm_offset () + cum->words) & 3; + else + align = cum->words & 1; + align_words = cum->words + align; + + /* Out of registers? Memory, then. */ + if (align_words >= GP_ARG_NUM_REG) + return NULL_RTX; + + if (TARGET_32BIT && TARGET_POWERPC64) + return rs6000_mixed_function_arg (mode, type, align_words); + + /* The vector value goes in GPRs. Only the part of the + value in GPRs is reported here. */ + part_mode = mode; + n_words = rs6000_arg_size (mode, type); + if (align_words + n_words > GP_ARG_NUM_REG) + /* Fortunately, there are only two possibilities, the value + is either wholly in GPRs or half in GPRs and half not. */ + part_mode = DImode; + + return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words); + } + } + else if (TARGET_SPE_ABI && TARGET_SPE + && (SPE_VECTOR_MODE (mode) + || (TARGET_E500_DOUBLE && (mode == DFmode + || mode == DCmode + || mode == TFmode + || mode == TCmode)))) + return rs6000_spe_function_arg (cum, mode, type); + + else if (abi == ABI_V4) + { + if (abi_v4_pass_in_fpr (mode)) + { + /* _Decimal128 must use an even/odd register pair. This assumes + that the register number is odd when fregno is odd. */ + if (mode == TDmode && (cum->fregno % 2) == 1) + cum->fregno++; + + if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0) + <= FP_ARG_V4_MAX_REG) + return gen_rtx_REG (mode, cum->fregno); + else + return NULL_RTX; + } + else + { + int n_words = rs6000_arg_size (mode, type); + int gregno = cum->sysv_gregno; + + /* Long long and SPE vectors are put in (r3,r4), (r5,r6), + (r7,r8) or (r9,r10). As does any other 2 word item such + as complex int due to a historical mistake. */ + if (n_words == 2) + gregno += (1 - gregno) & 1; + + /* Multi-reg args are not split between registers and stack. */ + if (gregno + n_words - 1 > GP_ARG_MAX_REG) + return NULL_RTX; + + if (TARGET_32BIT && TARGET_POWERPC64) + return rs6000_mixed_function_arg (mode, type, + gregno - GP_ARG_MIN_REG); + return gen_rtx_REG (mode, gregno); + } + } + else + { + int align_words = rs6000_parm_start (mode, type, cum->words); + + /* _Decimal128 must be passed in an even/odd float register pair. + This assumes that the register number is odd when fregno is odd. */ + if (elt_mode == TDmode && (cum->fregno % 2) == 1) + cum->fregno++; + + if (USE_FP_FOR_ARG_P (cum, elt_mode)) + { + rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1]; + rtx r, off; + int i, k = 0; + unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; + int fpr_words; + + /* Do we also need to pass this argument in the parameter + save area? */ + if (type && (cum->nargs_prototype <= 0 + || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && TARGET_XL_COMPAT + && align_words >= GP_ARG_NUM_REG))) + k = rs6000_psave_function_arg (mode, type, align_words, rvec); + + /* Describe where this argument goes in the fprs. */ + for (i = 0; i < n_elts + && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++) + { + /* Check if the argument is split over registers and memory. + This can only ever happen for long double or _Decimal128; + complex types are handled via split_complex_arg. */ + machine_mode fmode = elt_mode; + if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1) + { + gcc_assert (FLOAT128_2REG_P (fmode)); + fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode; + } + + r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg); + off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); + } + + /* If there were not enough FPRs to hold the argument, the rest + usually goes into memory. However, if the current position + is still within the register parameter area, a portion may + actually have to go into GPRs. + + Note that it may happen that the portion of the argument + passed in the first "half" of the first GPR was already + passed in the last FPR as well. + + For unnamed arguments, we already set up GPRs to cover the + whole argument in rs6000_psave_function_arg, so there is + nothing further to do at this point. */ + fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8); + if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG + && cum->nargs_prototype > 0) + { + static bool warned; + + machine_mode rmode = TARGET_32BIT ? SImode : DImode; + int n_words = rs6000_arg_size (mode, type); + + align_words += fpr_words; + n_words -= fpr_words; + + do + { + r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words); + off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode)); + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); + } + while (++align_words < GP_ARG_NUM_REG && --n_words != 0); + + if (!warned && warn_psabi) + { + warned = true; + inform (input_location, + "the ABI of passing homogeneous float aggregates" + " has changed in GCC 5"); + } + } + + return rs6000_finish_function_arg (mode, rvec, k); + } + else if (align_words < GP_ARG_NUM_REG) + { + if (TARGET_32BIT && TARGET_POWERPC64) + return rs6000_mixed_function_arg (mode, type, align_words); + + return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); + } + else + return NULL_RTX; + } +} + +/* For an arg passed partly in registers and partly in memory, this is + the number of bytes passed in registers. For args passed entirely in + registers or entirely in memory, zero. When an arg is described by a + PARALLEL, perhaps using more than one register type, this function + returns the number of bytes used by the first element of the PARALLEL. */ + +static int +rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, + tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + bool passed_in_gprs = true; + int ret = 0; + int align_words; + machine_mode elt_mode; + int n_elts; + + rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); + + if (DEFAULT_ABI == ABI_V4) + return 0; + + if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) + { + /* If we are passing this arg in the fixed parameter save area (gprs or + memory) as well as VRs, we do not use the partial bytes mechanism; + instead, rs6000_function_arg will return a PARALLEL including a memory + element as necessary. Library support functions for IEEE 128-bit are + assumed to not need the value passed both in GPRs and in vector + registers. */ + if (TARGET_64BIT && !cum->prototype + && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode))) + return 0; + + /* Otherwise, we pass in VRs only. Check for partial copies. */ + passed_in_gprs = false; + if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1) + ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16; + } + + /* In this complicated case we just disable the partial_nregs code. */ + if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) + return 0; + + align_words = rs6000_parm_start (mode, type, cum->words); + + if (USE_FP_FOR_ARG_P (cum, elt_mode)) + { + unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; + + /* If we are passing this arg in the fixed parameter save area + (gprs or memory) as well as FPRs, we do not use the partial + bytes mechanism; instead, rs6000_function_arg will return a + PARALLEL including a memory element as necessary. */ + if (type + && (cum->nargs_prototype <= 0 + || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && TARGET_XL_COMPAT + && align_words >= GP_ARG_NUM_REG))) + return 0; + + /* Otherwise, we pass in FPRs only. Check for partial copies. */ + passed_in_gprs = false; + if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1) + { + /* Compute number of bytes / words passed in FPRs. If there + is still space available in the register parameter area + *after* that amount, a part of the argument will be passed + in GPRs. In that case, the total amount passed in any + registers is equal to the amount that would have been passed + in GPRs if everything were passed there, so we fall back to + the GPR code below to compute the appropriate value. */ + int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno) + * MIN (8, GET_MODE_SIZE (elt_mode))); + int fpr_words = fpr / (TARGET_32BIT ? 4 : 8); + + if (align_words + fpr_words < GP_ARG_NUM_REG) + passed_in_gprs = true; + else + ret = fpr; + } + } + + if (passed_in_gprs + && align_words < GP_ARG_NUM_REG + && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type)) + ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8); + + if (ret != 0 && TARGET_DEBUG_ARG) + fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret); + + return ret; +} + +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. + + Under V.4, aggregates and long double are passed by reference. + + As an extension to all 32-bit ABIs, AltiVec vectors are passed by + reference unless the AltiVec vector extension ABI is in force. + + As an extension to all ABIs, variable sized types are passed by + reference. */ + +static bool +rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, + machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + if (!type) + return 0; + + if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD + && FLOAT128_IEEE_P (TYPE_MODE (type))) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n"); + return 1; + } + + if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type)) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n"); + return 1; + } + + if (int_size_in_bytes (type) < 0) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference: variable size\n"); + return 1; + } + + /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector + modes only exist for GCC vector types if -maltivec. */ + if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n"); + return 1; + } + + /* Pass synthetic vectors in memory. */ + if (TREE_CODE (type) == VECTOR_TYPE + && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8)) + { + static bool warned_for_pass_big_vectors = false; + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n"); + if (!warned_for_pass_big_vectors) + { + warning (OPT_Wpsabi, "GCC vector passed by reference: " + "non-standard ABI extension with no compatibility guarantee"); + warned_for_pass_big_vectors = true; + } + return 1; + } + + return 0; +} + +/* Process parameter of type TYPE after ARGS_SO_FAR parameters were + already processes. Return true if the parameter must be passed + (fully or partially) on the stack. */ + +static bool +rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type) +{ + machine_mode mode; + int unsignedp; + rtx entry_parm; + + /* Catch errors. */ + if (type == NULL || type == error_mark_node) + return true; + + /* Handle types with no storage requirement. */ + if (TYPE_MODE (type) == VOIDmode) + return false; + + /* Handle complex types. */ + if (TREE_CODE (type) == COMPLEX_TYPE) + return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)) + || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))); + + /* Handle transparent aggregates. */ + if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE) + && TYPE_TRANSPARENT_AGGR (type)) + type = TREE_TYPE (first_field (type)); + + /* See if this arg was passed by invisible reference. */ + if (pass_by_reference (get_cumulative_args (args_so_far), + TYPE_MODE (type), type, true)) + type = build_pointer_type (type); + + /* Find mode as it is passed by the ABI. */ + unsignedp = TYPE_UNSIGNED (type); + mode = promote_mode (type, TYPE_MODE (type), &unsignedp); + + /* If we must pass in stack, we need a stack. */ + if (rs6000_must_pass_in_stack (mode, type)) + return true; + + /* If there is no incoming register, we need a stack. */ + entry_parm = rs6000_function_arg (args_so_far, mode, type, true); + if (entry_parm == NULL) + return true; + + /* Likewise if we need to pass both in registers and on the stack. */ + if (GET_CODE (entry_parm) == PARALLEL + && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX) + return true; + + /* Also true if we're partially in registers and partially not. */ + if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0) + return true; + + /* Update info on where next arg arrives in registers. */ + rs6000_function_arg_advance (args_so_far, mode, type, true); + return false; +} + +/* Return true if FUN has no prototype, has a variable argument + list, or passes any parameter in memory. */ + +static bool +rs6000_function_parms_need_stack (tree fun, bool incoming) +{ + tree fntype, result; + CUMULATIVE_ARGS args_so_far_v; + cumulative_args_t args_so_far; + + if (!fun) + /* Must be a libcall, all of which only use reg parms. */ + return false; + + fntype = fun; + if (!TYPE_P (fun)) + fntype = TREE_TYPE (fun); + + /* Varargs functions need the parameter save area. */ + if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype)) + return true; + + INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX); + args_so_far = pack_cumulative_args (&args_so_far_v); + + /* When incoming, we will have been passed the function decl. + It is necessary to use the decl to handle K&R style functions, + where TYPE_ARG_TYPES may not be available. */ + if (incoming) + { + gcc_assert (DECL_P (fun)); + result = DECL_RESULT (fun); + } + else + result = TREE_TYPE (fntype); + + if (result && aggregate_value_p (result, fntype)) + { + if (!TYPE_P (result)) + result = TREE_TYPE (result); + result = build_pointer_type (result); + rs6000_parm_needs_stack (args_so_far, result); + } + + if (incoming) + { + tree parm; + + for (parm = DECL_ARGUMENTS (fun); + parm && parm != void_list_node; + parm = TREE_CHAIN (parm)) + if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm))) + return true; + } + else + { + function_args_iterator args_iter; + tree arg_type; + + FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) + if (rs6000_parm_needs_stack (args_so_far, arg_type)) + return true; + } + + return false; +} + +/* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is + usually a constant depending on the ABI. However, in the ELFv2 ABI + the register parameter area is optional when calling a function that + has a prototype is scope, has no variable argument list, and passes + all parameters in registers. */ + +int +rs6000_reg_parm_stack_space (tree fun, bool incoming) +{ + int reg_parm_stack_space; + + switch (DEFAULT_ABI) + { + default: + reg_parm_stack_space = 0; + break; + + case ABI_AIX: + case ABI_DARWIN: + reg_parm_stack_space = TARGET_64BIT ? 64 : 32; + break; + + case ABI_ELFv2: + /* ??? Recomputing this every time is a bit expensive. Is there + a place to cache this information? */ + if (rs6000_function_parms_need_stack (fun, incoming)) + reg_parm_stack_space = TARGET_64BIT ? 64 : 32; + else + reg_parm_stack_space = 0; + break; + } + + return reg_parm_stack_space; +} + +static void +rs6000_move_block_from_reg (int regno, rtx x, int nregs) +{ + int i; + machine_mode reg_mode = TARGET_32BIT ? SImode : DImode; + + if (nregs == 0) + return; + + for (i = 0; i < nregs; i++) + { + rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode)); + if (reload_completed) + { + if (! strict_memory_address_p (reg_mode, XEXP (tem, 0))) + tem = NULL_RTX; + else + tem = simplify_gen_subreg (reg_mode, x, BLKmode, + i * GET_MODE_SIZE (reg_mode)); + } + else + tem = replace_equiv_address (tem, XEXP (tem, 0)); + + gcc_assert (tem); + + emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i)); + } +} + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. + + CUM is as above. + + MODE and TYPE are the mode and type of the current parameter. + + PRETEND_SIZE is a variable that should be set to the amount of stack + that must be pushed by the prolog to pretend that our caller pushed + it. + + Normally, this macro will push all remaining incoming registers on the + stack and set PRETEND_SIZE to the length of the registers pushed. */ + +static void +setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, + tree type, int *pretend_size ATTRIBUTE_UNUSED, + int no_rtl) +{ + CUMULATIVE_ARGS next_cum; + int reg_size = TARGET_32BIT ? 4 : 8; + rtx save_area = NULL_RTX, mem; + int first_reg_offset; + alias_set_type set; + + /* Skip the last named argument. */ + next_cum = *get_cumulative_args (cum); + rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0); + + if (DEFAULT_ABI == ABI_V4) + { + first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG; + + if (! no_rtl) + { + int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0; + HOST_WIDE_INT offset = 0; + + /* Try to optimize the size of the varargs save area. + The ABI requires that ap.reg_save_area is doubleword + aligned, but we don't need to allocate space for all + the bytes, only those to which we actually will save + anything. */ + if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG) + gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset; + if (TARGET_HARD_FLOAT && TARGET_FPRS + && next_cum.fregno <= FP_ARG_V4_MAX_REG + && cfun->va_list_fpr_size) + { + if (gpr_reg_num) + fpr_size = (next_cum.fregno - FP_ARG_MIN_REG) + * UNITS_PER_FP_WORD; + if (cfun->va_list_fpr_size + < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno) + fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD; + else + fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno) + * UNITS_PER_FP_WORD; + } + if (gpr_reg_num) + { + offset = -((first_reg_offset * reg_size) & ~7); + if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size) + { + gpr_reg_num = cfun->va_list_gpr_size; + if (reg_size == 4 && (first_reg_offset & 1)) + gpr_reg_num++; + } + gpr_size = (gpr_reg_num * reg_size + 7) & ~7; + } + else if (fpr_size) + offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG) + * UNITS_PER_FP_WORD + - (int) (GP_ARG_NUM_REG * reg_size); + + if (gpr_size + fpr_size) + { + rtx reg_save_area + = assign_stack_local (BLKmode, gpr_size + fpr_size, 64); + gcc_assert (GET_CODE (reg_save_area) == MEM); + reg_save_area = XEXP (reg_save_area, 0); + if (GET_CODE (reg_save_area) == PLUS) + { + gcc_assert (XEXP (reg_save_area, 0) + == virtual_stack_vars_rtx); + gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT); + offset += INTVAL (XEXP (reg_save_area, 1)); + } + else + gcc_assert (reg_save_area == virtual_stack_vars_rtx); + } + + cfun->machine->varargs_save_offset = offset; + save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset); + } + } + else + { + first_reg_offset = next_cum.words; + save_area = crtl->args.internal_arg_pointer; + + if (targetm.calls.must_pass_in_stack (mode, type)) + first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type); + } + + set = get_varargs_alias_set (); + if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG + && cfun->va_list_gpr_size) + { + int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset; + + if (va_list_gpr_counter_field) + /* V4 va_list_gpr_size counts number of registers needed. */ + n_gpr = cfun->va_list_gpr_size; + else + /* char * va_list instead counts number of bytes needed. */ + n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size; + + if (nregs > n_gpr) + nregs = n_gpr; + + mem = gen_rtx_MEM (BLKmode, + plus_constant (Pmode, save_area, + first_reg_offset * reg_size)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + set_mem_align (mem, BITS_PER_WORD); + + rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem, + nregs); + } + + /* Save FP registers if needed. */ + if (DEFAULT_ABI == ABI_V4 + && TARGET_HARD_FLOAT && TARGET_FPRS + && ! no_rtl + && next_cum.fregno <= FP_ARG_V4_MAX_REG + && cfun->va_list_fpr_size) + { + int fregno = next_cum.fregno, nregs; + rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO); + rtx lab = gen_label_rtx (); + int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG) + * UNITS_PER_FP_WORD); + + emit_jump_insn + (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_NE (VOIDmode, cr1, + const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, lab), + pc_rtx))); + + for (nregs = 0; + fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size; + fregno++, off += UNITS_PER_FP_WORD, nregs++) + { + mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) + ? DFmode : SFmode, + plus_constant (Pmode, save_area, off)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + set_mem_align (mem, GET_MODE_ALIGNMENT ( + (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) + ? DFmode : SFmode)); + emit_move_insn (mem, gen_rtx_REG ( + (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) + ? DFmode : SFmode, fregno)); + } + + emit_label (lab); + } +} + +/* Create the va_list data type. */ + +static tree +rs6000_build_builtin_va_list (void) +{ + tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl; + + /* For AIX, prefer 'char *' because that's what the system + header files like. */ + if (DEFAULT_ABI != ABI_V4) + return build_pointer_type (char_type_node); + + record = (*lang_hooks.types.make_type) (RECORD_TYPE); + type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL, + get_identifier ("__va_list_tag"), record); + + f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"), + unsigned_char_type_node); + f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"), + unsigned_char_type_node); + /* Give the two bytes of padding a name, so that -Wpadded won't warn on + every user file. */ + f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("reserved"), short_unsigned_type_node); + f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("overflow_arg_area"), + ptr_type_node); + f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("reg_save_area"), + ptr_type_node); + + va_list_gpr_counter_field = f_gpr; + va_list_fpr_counter_field = f_fpr; + + DECL_FIELD_CONTEXT (f_gpr) = record; + DECL_FIELD_CONTEXT (f_fpr) = record; + DECL_FIELD_CONTEXT (f_res) = record; + DECL_FIELD_CONTEXT (f_ovf) = record; + DECL_FIELD_CONTEXT (f_sav) = record; + + TYPE_STUB_DECL (record) = type_decl; + TYPE_NAME (record) = type_decl; + TYPE_FIELDS (record) = f_gpr; + DECL_CHAIN (f_gpr) = f_fpr; + DECL_CHAIN (f_fpr) = f_res; + DECL_CHAIN (f_res) = f_ovf; + DECL_CHAIN (f_ovf) = f_sav; + + layout_type (record); + + /* The correct type is an array type of one element. */ + return build_array_type (record, build_index_type (size_zero_node)); +} + +/* Implement va_start. */ + +static void +rs6000_va_start (tree valist, rtx nextarg) +{ + HOST_WIDE_INT words, n_gpr, n_fpr; + tree f_gpr, f_fpr, f_res, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, t; + + /* Only SVR4 needs something special. */ + if (DEFAULT_ABI != ABI_V4) + { + std_expand_builtin_va_start (valist, nextarg); + return; + } + + f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); + f_fpr = DECL_CHAIN (f_gpr); + f_res = DECL_CHAIN (f_fpr); + f_ovf = DECL_CHAIN (f_res); + f_sav = DECL_CHAIN (f_ovf); + + valist = build_simple_mem_ref (valist); + gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); + fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), + f_fpr, NULL_TREE); + ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), + f_ovf, NULL_TREE); + sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), + f_sav, NULL_TREE); + + /* Count number of gp and fp argument registers used. */ + words = crtl->args.info.words; + n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG, + GP_ARG_NUM_REG); + n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG, + FP_ARG_NUM_REG); + + if (TARGET_DEBUG_ARG) + fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = " + HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n", + words, n_gpr, n_fpr); + + if (cfun->va_list_gpr_size) + { + t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, + build_int_cst (NULL_TREE, n_gpr)); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + if (cfun->va_list_fpr_size) + { + t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, + build_int_cst (NULL_TREE, n_fpr)); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + +#ifdef HAVE_AS_GNU_ATTRIBUTE + if (call_ABI_of_interest (cfun->decl)) + rs6000_passes_float = true; +#endif + } + + /* Find the overflow area. */ + t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer); + if (words != 0) + t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD); + t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + /* If there were no va_arg invocations, don't set up the register + save area. */ + if (!cfun->va_list_gpr_size + && !cfun->va_list_fpr_size + && n_gpr < GP_ARG_NUM_REG + && n_fpr < FP_ARG_V4_MAX_REG) + return; + + /* Find the register save area. */ + t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx); + if (cfun->machine->varargs_save_offset) + t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset); + t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); +} + +/* Implement va_arg. */ + +static tree +rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + tree f_gpr, f_fpr, f_res, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, reg, t, u; + int size, rsize, n_reg, sav_ofs, sav_scale; + tree lab_false, lab_over, addr; + int align; + tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); + int regalign = 0; + gimple *stmt; + + if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) + { + t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p); + return build_va_arg_indirect_ref (t); + } + + /* We need to deal with the fact that the darwin ppc64 ABI is defined by an + earlier version of gcc, with the property that it always applied alignment + adjustments to the va-args (even for zero-sized types). The cheapest way + to deal with this is to replicate the effect of the part of + std_gimplify_va_arg_expr that carries out the align adjust, for the case + of relevance. + We don't need to check for pass-by-reference because of the test above. + We can return a simplifed answer, since we know there's no offset to add. */ + + if (((TARGET_MACHO + && rs6000_darwin64_abi) + || DEFAULT_ABI == ABI_ELFv2 + || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) + && integer_zerop (TYPE_SIZE (type))) + { + unsigned HOST_WIDE_INT align, boundary; + tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL); + align = PARM_BOUNDARY / BITS_PER_UNIT; + boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type); + if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT) + boundary = MAX_SUPPORTED_STACK_ALIGNMENT; + boundary /= BITS_PER_UNIT; + if (boundary > align) + { + tree t ; + /* This updates arg ptr by the amount that would be necessary + to align the zero-sized (but not zero-alignment) item. */ + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, + fold_build_pointer_plus_hwi (valist_tmp, boundary - 1)); + gimplify_and_add (t, pre_p); + + t = fold_convert (sizetype, valist_tmp); + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, + fold_convert (TREE_TYPE (valist), + fold_build2 (BIT_AND_EXPR, sizetype, t, + size_int (-boundary)))); + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); + gimplify_and_add (t, pre_p); + } + /* Since it is zero-sized there's no increment for the item itself. */ + valist_tmp = fold_convert (build_pointer_type (type), valist_tmp); + return build_va_arg_indirect_ref (valist_tmp); + } + + if (DEFAULT_ABI != ABI_V4) + { + if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE) + { + tree elem_type = TREE_TYPE (type); + machine_mode elem_mode = TYPE_MODE (elem_type); + int elem_size = GET_MODE_SIZE (elem_mode); + + if (elem_size < UNITS_PER_WORD) + { + tree real_part, imag_part; + gimple_seq post = NULL; + + real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p, + &post); + /* Copy the value into a temporary, lest the formal temporary + be reused out from under us. */ + real_part = get_initialized_tmp_var (real_part, pre_p, &post); + gimple_seq_add_seq (pre_p, post); + + imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p, + post_p); + + return build2 (COMPLEX_EXPR, type, real_part, imag_part); + } + } + + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + } + + f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); + f_fpr = DECL_CHAIN (f_gpr); + f_res = DECL_CHAIN (f_fpr); + f_ovf = DECL_CHAIN (f_res); + f_sav = DECL_CHAIN (f_ovf); + + gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); + fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), + f_fpr, NULL_TREE); + ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), + f_ovf, NULL_TREE); + sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), + f_sav, NULL_TREE); + + size = int_size_in_bytes (type); + rsize = (size + 3) / 4; + int pad = 4 * rsize - size; + align = 1; + + machine_mode mode = TYPE_MODE (type); + if (abi_v4_pass_in_fpr (mode)) + { + /* FP args go in FP registers, if present. */ + reg = fpr; + n_reg = (size + 7) / 8; + sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4; + sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4); + if (mode != SFmode && mode != SDmode) + align = 8; + } + else + { + /* Otherwise into GP registers. */ + reg = gpr; + n_reg = rsize; + sav_ofs = 0; + sav_scale = 4; + if (n_reg == 2) + align = 8; + } + + /* Pull the value out of the saved registers.... */ + + lab_over = NULL; + addr = create_tmp_var (ptr_type_node, "addr"); + + /* AltiVec vectors never go in registers when -mabi=altivec. */ + if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) + align = 16; + else + { + lab_false = create_artificial_label (input_location); + lab_over = create_artificial_label (input_location); + + /* Long long and SPE vectors are aligned in the registers. + As are any other 2 gpr item such as complex int due to a + historical mistake. */ + u = reg; + if (n_reg == 2 && reg == gpr) + { + regalign = 1; + u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg), + build_int_cst (TREE_TYPE (reg), n_reg - 1)); + u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), + unshare_expr (reg), u); + } + /* _Decimal128 is passed in even/odd fpr pairs; the stored + reg number is 0 for f1, so we want to make it odd. */ + else if (reg == fpr && mode == TDmode) + { + t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg), + build_int_cst (TREE_TYPE (reg), 1)); + u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t); + } + + t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1)); + t = build2 (GE_EXPR, boolean_type_node, u, t); + u = build1 (GOTO_EXPR, void_type_node, lab_false); + t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); + gimplify_and_add (t, pre_p); + + t = sav; + if (sav_ofs) + t = fold_build_pointer_plus_hwi (sav, sav_ofs); + + u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg), + build_int_cst (TREE_TYPE (reg), n_reg)); + u = fold_convert (sizetype, u); + u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale)); + t = fold_build_pointer_plus (t, u); + + /* _Decimal32 varargs are located in the second word of the 64-bit + FP register for 32-bit binaries. */ + if (TARGET_32BIT + && TARGET_HARD_FLOAT && TARGET_FPRS + && mode == SDmode) + t = fold_build_pointer_plus_hwi (t, size); + + /* Args are passed right-aligned. */ + if (BYTES_BIG_ENDIAN) + t = fold_build_pointer_plus_hwi (t, pad); + + gimplify_assign (addr, t, pre_p); + + gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); + + stmt = gimple_build_label (lab_false); + gimple_seq_add_stmt (pre_p, stmt); + + if ((n_reg == 2 && !regalign) || n_reg > 2) + { + /* Ensure that we don't find any more args in regs. + Alignment has taken care of for special cases. */ + gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p); + } + } + + /* ... otherwise out of the overflow area. */ + + /* Care for on-stack alignment if needed. */ + t = ovf; + if (align != 1) + { + t = fold_build_pointer_plus_hwi (t, align - 1); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), -align)); + } + + /* Args are passed right-aligned. */ + if (BYTES_BIG_ENDIAN) + t = fold_build_pointer_plus_hwi (t, pad); + + gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); + + gimplify_assign (unshare_expr (addr), t, pre_p); + + t = fold_build_pointer_plus_hwi (t, size); + gimplify_assign (unshare_expr (ovf), t, pre_p); + + if (lab_over) + { + stmt = gimple_build_label (lab_over); + gimple_seq_add_stmt (pre_p, stmt); + } + + if (STRICT_ALIGNMENT + && (TYPE_ALIGN (type) + > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align))) + { + /* The value (of type complex double, for example) may not be + aligned in memory in the saved registers, so copy via a + temporary. (This is the same code as used for SPARC.) */ + tree tmp = create_tmp_var (type, "va_arg_tmp"); + tree dest_addr = build_fold_addr_expr (tmp); + + tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), + 3, dest_addr, addr, size_int (rsize * 4)); + + gimplify_and_add (copy, pre_p); + addr = dest_addr; + } + + addr = fold_convert (ptrtype, addr); + return build_va_arg_indirect_ref (addr); +} + +/* Builtins. */ + +static void +def_builtin (const char *name, tree type, enum rs6000_builtins code) +{ + tree t; + unsigned classify = rs6000_builtin_info[(int)code].attr; + const char *attr_string = ""; + + gcc_assert (name != NULL); + gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT)); + + if (rs6000_builtin_decls[(int)code]) + fatal_error (input_location, + "internal error: builtin function %s already processed", name); + + rs6000_builtin_decls[(int)code] = t = + add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE); + + /* Set any special attributes. */ + if ((classify & RS6000_BTC_CONST) != 0) + { + /* const function, function only depends on the inputs. */ + TREE_READONLY (t) = 1; + TREE_NOTHROW (t) = 1; + attr_string = ", const"; + } + else if ((classify & RS6000_BTC_PURE) != 0) + { + /* pure function, function can read global memory, but does not set any + external state. */ + DECL_PURE_P (t) = 1; + TREE_NOTHROW (t) = 1; + attr_string = ", pure"; + } + else if ((classify & RS6000_BTC_FP) != 0) + { + /* Function is a math function. If rounding mode is on, then treat the + function as not reading global memory, but it can have arbitrary side + effects. If it is off, then assume the function is a const function. + This mimics the ATTR_MATHFN_FPROUNDING attribute in + builtin-attribute.def that is used for the math functions. */ + TREE_NOTHROW (t) = 1; + if (flag_rounding_math) + { + DECL_PURE_P (t) = 1; + DECL_IS_NOVOPS (t) = 1; + attr_string = ", fp, pure"; + } + else + { + TREE_READONLY (t) = 1; + attr_string = ", fp, const"; + } + } + else if ((classify & RS6000_BTC_ATTR_MASK) != 0) + gcc_unreachable (); + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n", + (int)code, name, attr_string); +} + +/* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */ + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_3arg[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* DST operations: void foo (void *, const int, const char). */ + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_dst[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* Simple binary operations: VECc = foo (VECa, VECb). */ + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_2arg[] = +{ +#include "powerpcspe-builtin.def" +}; + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +/* AltiVec predicates. */ + +static const struct builtin_description bdesc_altivec_preds[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* SPE predicates. */ +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_spe_predicates[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* SPE evsel predicates. */ +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_spe_evsel[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* PAIRED predicates. */ +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_paired_preds[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* ABS* operations. */ + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_abs[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* Simple unary operations: VECb = foo (unsigned literal) or VECb = + foo (VECa). */ + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_1arg[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* Simple no-argument operations: result = __builtin_darn_32 () */ + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_0arg[] = +{ +#include "powerpcspe-builtin.def" +}; + +/* HTM builtins. */ +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \ + { MASK, ICODE, NAME, ENUM }, + +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) + +static const struct builtin_description bdesc_htm[] = +{ +#include "powerpcspe-builtin.def" +}; + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S + +/* Return true if a builtin function is overloaded. */ +bool +rs6000_overloaded_builtin_p (enum rs6000_builtins fncode) +{ + return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0; +} + +const char * +rs6000_overloaded_builtin_name (enum rs6000_builtins fncode) +{ + return rs6000_builtin_info[(int)fncode].name; +} + +/* Expand an expression EXP that calls a builtin without arguments. */ +static rtx +rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target) +{ + rtx pat; + machine_mode tmode = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + pat = GEN_FCN (icode) (target); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} + + +static rtx +rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode mode0 = insn_data[icode].operand[0].mode; + machine_mode mode1 = insn_data[icode].operand[1].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (GET_CODE (op0) != CONST_INT + || INTVAL (op0) > 255 + || INTVAL (op0) < 0) + { + error ("argument 1 must be an 8-bit field value"); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (op0, op1); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + +static rtx +rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + if (icode == CODE_FOR_altivec_vspltisb + || icode == CODE_FOR_altivec_vspltish + || icode == CODE_FOR_altivec_vspltisw + || icode == CODE_FOR_spe_evsplatfi + || icode == CODE_FOR_spe_evsplati) + { + /* Only allow 5-bit *signed* literals. */ + if (GET_CODE (op0) != CONST_INT + || INTVAL (op0) > 15 + || INTVAL (op0) < -16) + { + error ("argument 1 must be a 5-bit signed literal"); + return CONST0_RTX (tmode); + } + } + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} + +static rtx +altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, scratch1, scratch2; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + + /* If we have invalid arguments, bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + scratch1 = gen_reg_rtx (mode0); + scratch2 = gen_reg_rtx (mode0); + + pat = GEN_FCN (icode) (target, op0, scratch1, scratch2); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} + +static rtx +rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (icode == CODE_FOR_altivec_vcfux + || icode == CODE_FOR_altivec_vcfsx + || icode == CODE_FOR_altivec_vctsxs + || icode == CODE_FOR_altivec_vctuxs + || icode == CODE_FOR_altivec_vspltb + || icode == CODE_FOR_altivec_vsplth + || icode == CODE_FOR_altivec_vspltw + || icode == CODE_FOR_spe_evaddiw + || icode == CODE_FOR_spe_evldd + || icode == CODE_FOR_spe_evldh + || icode == CODE_FOR_spe_evldw + || icode == CODE_FOR_spe_evlhhesplat + || icode == CODE_FOR_spe_evlhhossplat + || icode == CODE_FOR_spe_evlhhousplat + || icode == CODE_FOR_spe_evlwhe + || icode == CODE_FOR_spe_evlwhos + || icode == CODE_FOR_spe_evlwhou + || icode == CODE_FOR_spe_evlwhsplat + || icode == CODE_FOR_spe_evlwwsplat + || icode == CODE_FOR_spe_evrlwi + || icode == CODE_FOR_spe_evslwi + || icode == CODE_FOR_spe_evsrwis + || icode == CODE_FOR_spe_evsubifw + || icode == CODE_FOR_spe_evsrwiu) + { + /* Only allow 5-bit unsigned literals. */ + STRIP_NOPS (arg1); + if (TREE_CODE (arg1) != INTEGER_CST + || TREE_INT_CST_LOW (arg1) & ~0x1f) + { + error ("argument 2 must be a 5-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_dfptstsfi_eq_dd + || icode == CODE_FOR_dfptstsfi_lt_dd + || icode == CODE_FOR_dfptstsfi_gt_dd + || icode == CODE_FOR_dfptstsfi_unordered_dd + || icode == CODE_FOR_dfptstsfi_eq_td + || icode == CODE_FOR_dfptstsfi_lt_td + || icode == CODE_FOR_dfptstsfi_gt_td + || icode == CODE_FOR_dfptstsfi_unordered_td) + { + /* Only allow 6-bit unsigned literals. */ + STRIP_NOPS (arg0); + if (TREE_CODE (arg0) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63)) + { + error ("argument 1 must be a 6-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_xststdcdp + || icode == CODE_FOR_xststdcsp + || icode == CODE_FOR_xvtstdcdp + || icode == CODE_FOR_xvtstdcsp) + { + /* Only allow 7-bit unsigned literals. */ + STRIP_NOPS (arg1); + if (TREE_CODE (arg1) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127)) + { + error ("argument 2 must be a 7-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} + +static rtx +altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, scratch; + tree cr6_form = CALL_EXPR_ARG (exp, 0); + tree arg0 = CALL_EXPR_ARG (exp, 1); + tree arg1 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode tmode = SImode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + int cr6_form_int; + + if (TREE_CODE (cr6_form) != INTEGER_CST) + { + error ("argument 1 of __builtin_altivec_predicate must be a constant"); + return const0_rtx; + } + else + cr6_form_int = TREE_INT_CST_LOW (cr6_form); + + gcc_assert (mode0 == mode1); + + /* If we have invalid arguments, bail out before generating bad rtl. */ + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + /* Note that for many of the relevant operations (e.g. cmpne or + cmpeq) with float or double operands, it makes more sense for the + mode of the allocated scratch register to select a vector of + integer. But the choice to copy the mode of operand 0 was made + long ago and there are no plans to change it. */ + scratch = gen_reg_rtx (mode0); + + pat = GEN_FCN (icode) (scratch, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + + /* The vec_any* and vec_all* predicates use the same opcodes for two + different operations, but the bits in CR6 will be different + depending on what information we want. So we have to play tricks + with CR6 to get the right bits out. + + If you think this is disgusting, look at the specs for the + AltiVec predicates. */ + + switch (cr6_form_int) + { + case 0: + emit_insn (gen_cr6_test_for_zero (target)); + break; + case 1: + emit_insn (gen_cr6_test_for_zero_reverse (target)); + break; + case 2: + emit_insn (gen_cr6_test_for_lt (target)); + break; + case 3: + emit_insn (gen_cr6_test_for_lt_reverse (target)); + break; + default: + error ("argument 1 of __builtin_altivec_predicate is out of range"); + break; + } + + return target; +} + +static rtx +paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, addr; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = Pmode; + machine_mode mode1 = Pmode; + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + op1 = copy_to_mode_reg (mode1, op1); + + if (op0 == const0_rtx) + { + addr = gen_rtx_MEM (tmode, op1); + } + else + { + op0 = copy_to_mode_reg (mode0, op0); + addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1)); + } + + pat = GEN_FCN (icode) (target, addr); + + if (! pat) + return 0; + emit_insn (pat); + + return target; +} + +/* Return a constant vector for use as a little-endian permute control vector + to reverse the order of elements of the given vector mode. */ +static rtx +swap_selector_for_mode (machine_mode mode) +{ + /* These are little endian vectors, so their elements are reversed + from what you would normally expect for a permute control vector. */ + unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}; + unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12}; + unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14}; + unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + unsigned int *swaparray, i; + rtx perm[16]; + + switch (mode) + { + case V2DFmode: + case V2DImode: + swaparray = swap2; + break; + case V4SFmode: + case V4SImode: + swaparray = swap4; + break; + case V8HImode: + swaparray = swap8; + break; + case V16QImode: + swaparray = swap16; + break; + default: + gcc_unreachable (); + } + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (swaparray[i]); + + return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); +} + +/* Generate code for an "lvxl", or "lve*x" built-in for a little endian target + with -maltivec=be specified. Issue the load followed by an element- + reversing permute. */ +void +altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) +{ + rtx tmp = gen_reg_rtx (mode); + rtx load = gen_rtx_SET (tmp, op1); + rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec); + rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx)); + rtx sel = swap_selector_for_mode (mode); + rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM); + + gcc_assert (REG_P (op0)); + emit_insn (par); + emit_insn (gen_rtx_SET (op0, vperm)); +} + +/* Generate code for a "stvxl" built-in for a little endian target with + -maltivec=be specified. Issue the store preceded by an element-reversing + permute. */ +void +altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) +{ + rtx tmp = gen_reg_rtx (mode); + rtx store = gen_rtx_SET (op0, tmp); + rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec); + rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx)); + rtx sel = swap_selector_for_mode (mode); + rtx vperm; + + gcc_assert (REG_P (op1)); + vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM); + emit_insn (gen_rtx_SET (tmp, vperm)); + emit_insn (par); +} + +/* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be + specified. Issue the store preceded by an element-reversing permute. */ +void +altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) +{ + machine_mode inner_mode = GET_MODE_INNER (mode); + rtx tmp = gen_reg_rtx (mode); + rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec); + rtx sel = swap_selector_for_mode (mode); + rtx vperm; + + gcc_assert (REG_P (op1)); + vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM); + emit_insn (gen_rtx_SET (tmp, vperm)); + emit_insn (gen_rtx_SET (op0, stvx)); +} + +static rtx +altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk) +{ + rtx pat, addr; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = Pmode; + machine_mode mode1 = Pmode; + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + op1 = copy_to_mode_reg (mode1, op1); + + /* For LVX, express the RTL accurately by ANDing the address with -16. + LVXL and LVE*X expand to use UNSPECs to hide their special behavior, + so the raw address is fine. */ + if (icode == CODE_FOR_altivec_lvx_v2df_2op + || icode == CODE_FOR_altivec_lvx_v2di_2op + || icode == CODE_FOR_altivec_lvx_v4sf_2op + || icode == CODE_FOR_altivec_lvx_v4si_2op + || icode == CODE_FOR_altivec_lvx_v8hi_2op + || icode == CODE_FOR_altivec_lvx_v16qi_2op) + { + rtx rawaddr; + if (op0 == const0_rtx) + rawaddr = op1; + else + { + op0 = copy_to_mode_reg (mode0, op0); + rawaddr = gen_rtx_PLUS (Pmode, op1, op0); + } + addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); + addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr); + + /* For -maltivec=be, emit the load and follow it up with a + permute to swap the elements. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + rtx temp = gen_reg_rtx (tmode); + emit_insn (gen_rtx_SET (temp, addr)); + + rtx sel = swap_selector_for_mode (tmode); + rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (target, vperm)); + } + else + emit_insn (gen_rtx_SET (target, addr)); + } + else + { + if (op0 == const0_rtx) + addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); + else + { + op0 = copy_to_mode_reg (mode0, op0); + addr = gen_rtx_MEM (blk ? BLKmode : tmode, + gen_rtx_PLUS (Pmode, op1, op0)); + } + + pat = GEN_FCN (icode) (target, addr); + if (! pat) + return 0; + emit_insn (pat); + } + + return target; +} + +static rtx +spe_expand_stv_builtin (enum insn_code icode, tree exp) +{ + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx pat; + machine_mode mode0 = insn_data[icode].operand[0].mode; + machine_mode mode1 = insn_data[icode].operand[1].mode; + machine_mode mode2 = insn_data[icode].operand[2].mode; + + /* Invalid arguments. Bail before doing anything stoopid! */ + if (arg0 == error_mark_node + || arg1 == error_mark_node + || arg2 == error_mark_node) + return const0_rtx; + + if (! (*insn_data[icode].operand[2].predicate) (op0, mode2)) + op0 = copy_to_mode_reg (mode2, op0); + if (! (*insn_data[icode].operand[0].predicate) (op1, mode0)) + op1 = copy_to_mode_reg (mode0, op1); + if (! (*insn_data[icode].operand[1].predicate) (op2, mode1)) + op2 = copy_to_mode_reg (mode1, op2); + + pat = GEN_FCN (icode) (op1, op2, op0); + if (pat) + emit_insn (pat); + return NULL_RTX; +} + +static rtx +paired_expand_stv_builtin (enum insn_code icode, tree exp) +{ + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx pat, addr; + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode1 = Pmode; + machine_mode mode2 = Pmode; + + /* Invalid arguments. Bail before doing anything stoopid! */ + if (arg0 == error_mark_node + || arg1 == error_mark_node + || arg2 == error_mark_node) + return const0_rtx; + + if (! (*insn_data[icode].operand[1].predicate) (op0, tmode)) + op0 = copy_to_mode_reg (tmode, op0); + + op2 = copy_to_mode_reg (mode2, op2); + + if (op1 == const0_rtx) + { + addr = gen_rtx_MEM (tmode, op2); + } + else + { + op1 = copy_to_mode_reg (mode1, op1); + addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2)); + } + + pat = GEN_FCN (icode) (addr, op0); + if (pat) + emit_insn (pat); + return NULL_RTX; +} + +static rtx +altivec_expand_stxvl_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + machine_mode mode0 = insn_data[icode].operand[0].mode; + machine_mode mode1 = insn_data[icode].operand[1].mode; + machine_mode mode2 = insn_data[icode].operand[2].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return NULL_RTX; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node + || arg1 == error_mark_node + || arg2 == error_mark_node) + return NULL_RTX; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + + pat = GEN_FCN (icode) (op0, op1, op2); + if (pat) + emit_insn (pat); + + return NULL_RTX; +} + +static rtx +altivec_expand_stv_builtin (enum insn_code icode, tree exp) +{ + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx pat, addr, rawaddr; + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode smode = insn_data[icode].operand[1].mode; + machine_mode mode1 = Pmode; + machine_mode mode2 = Pmode; + + /* Invalid arguments. Bail before doing anything stoopid! */ + if (arg0 == error_mark_node + || arg1 == error_mark_node + || arg2 == error_mark_node) + return const0_rtx; + + op2 = copy_to_mode_reg (mode2, op2); + + /* For STVX, express the RTL accurately by ANDing the address with -16. + STVXL and STVE*X expand to use UNSPECs to hide their special behavior, + so the raw address is fine. */ + if (icode == CODE_FOR_altivec_stvx_v2df_2op + || icode == CODE_FOR_altivec_stvx_v2di_2op + || icode == CODE_FOR_altivec_stvx_v4sf_2op + || icode == CODE_FOR_altivec_stvx_v4si_2op + || icode == CODE_FOR_altivec_stvx_v8hi_2op + || icode == CODE_FOR_altivec_stvx_v16qi_2op) + { + if (op1 == const0_rtx) + rawaddr = op2; + else + { + op1 = copy_to_mode_reg (mode1, op1); + rawaddr = gen_rtx_PLUS (Pmode, op2, op1); + } + + addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); + addr = gen_rtx_MEM (tmode, addr); + + op0 = copy_to_mode_reg (tmode, op0); + + /* For -maltivec=be, emit a permute to swap the elements, followed + by the store. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + rtx temp = gen_reg_rtx (tmode); + rtx sel = swap_selector_for_mode (tmode); + rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (temp, vperm)); + emit_insn (gen_rtx_SET (addr, temp)); + } + else + emit_insn (gen_rtx_SET (addr, op0)); + } + else + { + if (! (*insn_data[icode].operand[1].predicate) (op0, smode)) + op0 = copy_to_mode_reg (smode, op0); + + if (op1 == const0_rtx) + addr = gen_rtx_MEM (tmode, op2); + else + { + op1 = copy_to_mode_reg (mode1, op1); + addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1)); + } + + pat = GEN_FCN (icode) (addr, op0); + if (pat) + emit_insn (pat); + } + + return NULL_RTX; +} + +/* Return the appropriate SPR number associated with the given builtin. */ +static inline HOST_WIDE_INT +htm_spr_num (enum rs6000_builtins code) +{ + if (code == HTM_BUILTIN_GET_TFHAR + || code == HTM_BUILTIN_SET_TFHAR) + return TFHAR_SPR; + else if (code == HTM_BUILTIN_GET_TFIAR + || code == HTM_BUILTIN_SET_TFIAR) + return TFIAR_SPR; + else if (code == HTM_BUILTIN_GET_TEXASR + || code == HTM_BUILTIN_SET_TEXASR) + return TEXASR_SPR; + gcc_assert (code == HTM_BUILTIN_GET_TEXASRU + || code == HTM_BUILTIN_SET_TEXASRU); + return TEXASRU_SPR; +} + +/* Return the appropriate SPR regno associated with the given builtin. */ +static inline HOST_WIDE_INT +htm_spr_regno (enum rs6000_builtins code) +{ + if (code == HTM_BUILTIN_GET_TFHAR + || code == HTM_BUILTIN_SET_TFHAR) + return TFHAR_REGNO; + else if (code == HTM_BUILTIN_GET_TFIAR + || code == HTM_BUILTIN_SET_TFIAR) + return TFIAR_REGNO; + gcc_assert (code == HTM_BUILTIN_GET_TEXASR + || code == HTM_BUILTIN_SET_TEXASR + || code == HTM_BUILTIN_GET_TEXASRU + || code == HTM_BUILTIN_SET_TEXASRU); + return TEXASR_REGNO; +} + +/* Return the correct ICODE value depending on whether we are + setting or reading the HTM SPRs. */ +static inline enum insn_code +rs6000_htm_spr_icode (bool nonvoid) +{ + if (nonvoid) + return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; + else + return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; +} + +/* Expand the HTM builtin in EXP and store the result in TARGET. + Store true in *EXPANDEDP if we found a builtin to expand. */ +static rtx +htm_expand_builtin (tree exp, rtx target, bool * expandedp) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + const struct builtin_description *d; + size_t i; + + *expandedp = true; + + if (!TARGET_POWERPC64 + && (fcode == HTM_BUILTIN_TABORTDC + || fcode == HTM_BUILTIN_TABORTDCI)) + { + size_t uns_fcode = (size_t)fcode; + const char *name = rs6000_builtin_info[uns_fcode].name; + error ("builtin %s is only valid in 64-bit mode", name); + return const0_rtx; + } + + /* Expand the HTM builtins. */ + d = bdesc_htm; + for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++) + if (d->code == fcode) + { + rtx op[MAX_HTM_OPERANDS], pat; + int nopnds = 0; + tree arg; + call_expr_arg_iterator iter; + unsigned attr = rs6000_builtin_info[fcode].attr; + enum insn_code icode = d->icode; + const struct insn_operand_data *insn_op; + bool uses_spr = (attr & RS6000_BTC_SPR); + rtx cr = NULL_RTX; + + if (uses_spr) + icode = rs6000_htm_spr_icode (nonvoid); + insn_op = &insn_data[icode].operand[0]; + + if (nonvoid) + { + machine_mode tmode = (uses_spr) ? insn_op->mode : SImode; + if (!target + || GET_MODE (target) != tmode + || (uses_spr && !(*insn_op->predicate) (target, tmode))) + target = gen_reg_rtx (tmode); + if (uses_spr) + op[nopnds++] = target; + } + + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS) + return const0_rtx; + + insn_op = &insn_data[icode].operand[nopnds]; + + op[nopnds] = expand_normal (arg); + + if (!(*insn_op->predicate) (op[nopnds], insn_op->mode)) + { + if (!strcmp (insn_op->constraint, "n")) + { + int arg_num = (nonvoid) ? nopnds : nopnds + 1; + if (!CONST_INT_P (op[nopnds])) + error ("argument %d must be an unsigned literal", arg_num); + else + error ("argument %d is an unsigned literal that is " + "out of range", arg_num); + return const0_rtx; + } + op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]); + } + + nopnds++; + } + + /* Handle the builtins for extended mnemonics. These accept + no arguments, but map to builtins that take arguments. */ + switch (fcode) + { + case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */ + case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */ + op[nopnds++] = GEN_INT (1); + if (flag_checking) + attr |= RS6000_BTC_UNARY; + break; + case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */ + op[nopnds++] = GEN_INT (0); + if (flag_checking) + attr |= RS6000_BTC_UNARY; + break; + default: + break; + } + + /* If this builtin accesses SPRs, then pass in the appropriate + SPR number and SPR regno as the last two operands. */ + if (uses_spr) + { + machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode; + op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode)); + op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode)); + } + /* If this builtin accesses a CR, then pass in a scratch + CR as the last operand. */ + else if (attr & RS6000_BTC_CR) + { cr = gen_reg_rtx (CCmode); + op[nopnds++] = cr; + } + + if (flag_checking) + { + int expected_nopnds = 0; + if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY) + expected_nopnds = 1; + else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY) + expected_nopnds = 2; + else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY) + expected_nopnds = 3; + if (!(attr & RS6000_BTC_VOID)) + expected_nopnds += 1; + if (uses_spr) + expected_nopnds += 2; + + gcc_assert (nopnds == expected_nopnds + && nopnds <= MAX_HTM_OPERANDS); + } + + switch (nopnds) + { + case 1: + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + case 3: + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + case 4: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (attr & RS6000_BTC_CR) + { + if (fcode == HTM_BUILTIN_TBEGIN) + { + /* Emit code to set TARGET to true or false depending on + whether the tbegin. instruction successfully or failed + to start a transaction. We do this by placing the 1's + complement of CR's EQ bit into TARGET. */ + rtx scratch = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (scratch, + gen_rtx_EQ (SImode, cr, + const0_rtx))); + emit_insn (gen_rtx_SET (target, + gen_rtx_XOR (SImode, scratch, + GEN_INT (1)))); + } + else + { + /* Emit code to copy the 4-bit condition register field + CR into the least significant end of register TARGET. */ + rtx scratch1 = gen_reg_rtx (SImode); + rtx scratch2 = gen_reg_rtx (SImode); + rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0); + emit_insn (gen_movcc (subreg, cr)); + emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28))); + emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf))); + } + } + + if (nonvoid) + return target; + return const0_rtx; + } + + *expandedp = false; + return NULL_RTX; +} + +/* Expand the CPU builtin in FCODE and store the result in TARGET. */ + +static rtx +cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED, + rtx target) +{ + /* __builtin_cpu_init () is a nop, so expand to nothing. */ + if (fcode == RS6000_BUILTIN_CPU_INIT) + return const0_rtx; + + if (target == 0 || GET_MODE (target) != SImode) + target = gen_reg_rtx (SImode); + +#ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB + tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0); + if (TREE_CODE (arg) != STRING_CST) + { + error ("builtin %s only accepts a string argument", + rs6000_builtin_info[(size_t) fcode].name); + return const0_rtx; + } + + if (fcode == RS6000_BUILTIN_CPU_IS) + { + const char *cpu = TREE_STRING_POINTER (arg); + rtx cpuid = NULL_RTX; + for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++) + if (strcmp (cpu, cpu_is_info[i].cpu) == 0) + { + /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */ + cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM); + break; + } + if (cpuid == NULL_RTX) + { + /* Invalid CPU argument. */ + error ("cpu %s is an invalid argument to builtin %s", + cpu, rs6000_builtin_info[(size_t) fcode].name); + return const0_rtx; + } + + rtx platform = gen_reg_rtx (SImode); + rtx tcbmem = gen_const_mem (SImode, + gen_rtx_PLUS (Pmode, + gen_rtx_REG (Pmode, TLS_REGNUM), + GEN_INT (TCB_PLATFORM_OFFSET))); + emit_move_insn (platform, tcbmem); + emit_insn (gen_eqsi3 (target, platform, cpuid)); + } + else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS) + { + const char *hwcap = TREE_STRING_POINTER (arg); + rtx mask = NULL_RTX; + int hwcap_offset; + for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++) + if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0) + { + mask = GEN_INT (cpu_supports_info[i].mask); + hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id); + break; + } + if (mask == NULL_RTX) + { + /* Invalid HWCAP argument. */ + error ("hwcap %s is an invalid argument to builtin %s", + hwcap, rs6000_builtin_info[(size_t) fcode].name); + return const0_rtx; + } + + rtx tcb_hwcap = gen_reg_rtx (SImode); + rtx tcbmem = gen_const_mem (SImode, + gen_rtx_PLUS (Pmode, + gen_rtx_REG (Pmode, TLS_REGNUM), + GEN_INT (hwcap_offset))); + emit_move_insn (tcb_hwcap, tcbmem); + rtx scratch1 = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask))); + rtx scratch2 = gen_reg_rtx (SImode); + emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx)); + emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx))); + } + + /* Record that we have expanded a CPU builtin, so that we can later + emit a reference to the special symbol exported by LIBC to ensure we + do not link against an old LIBC that doesn't support this feature. */ + cpu_builtin_p = true; + +#else + /* For old LIBCs, always return FALSE. */ + emit_move_insn (target, GEN_INT (0)); +#endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */ + + return target; +} + +static rtx +rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + machine_mode mode2 = insn_data[icode].operand[3].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node + || arg1 == error_mark_node + || arg2 == error_mark_node) + return const0_rtx; + + /* Check and prepare argument depending on the instruction code. + + Note that a switch statement instead of the sequence of tests + would be incorrect as many of the CODE_FOR values could be + CODE_FOR_nothing and that would yield multiple alternatives + with identical values. We'd never reach here at runtime in + this case. */ + if (icode == CODE_FOR_altivec_vsldoi_v4sf + || icode == CODE_FOR_altivec_vsldoi_v2df + || icode == CODE_FOR_altivec_vsldoi_v4si + || icode == CODE_FOR_altivec_vsldoi_v8hi + || icode == CODE_FOR_altivec_vsldoi_v16qi) + { + /* Only allow 4-bit unsigned literals. */ + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || TREE_INT_CST_LOW (arg2) & ~0xf) + { + error ("argument 3 must be a 4-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_vsx_xxpermdi_v2df + || icode == CODE_FOR_vsx_xxpermdi_v2di + || icode == CODE_FOR_vsx_xxpermdi_v2df_be + || icode == CODE_FOR_vsx_xxpermdi_v2di_be + || icode == CODE_FOR_vsx_xxpermdi_v1ti + || icode == CODE_FOR_vsx_xxpermdi_v4sf + || icode == CODE_FOR_vsx_xxpermdi_v4si + || icode == CODE_FOR_vsx_xxpermdi_v8hi + || icode == CODE_FOR_vsx_xxpermdi_v16qi + || icode == CODE_FOR_vsx_xxsldwi_v16qi + || icode == CODE_FOR_vsx_xxsldwi_v8hi + || icode == CODE_FOR_vsx_xxsldwi_v4si + || icode == CODE_FOR_vsx_xxsldwi_v4sf + || icode == CODE_FOR_vsx_xxsldwi_v2di + || icode == CODE_FOR_vsx_xxsldwi_v2df) + { + /* Only allow 2-bit unsigned literals. */ + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || TREE_INT_CST_LOW (arg2) & ~0x3) + { + error ("argument 3 must be a 2-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_vsx_set_v2df + || icode == CODE_FOR_vsx_set_v2di + || icode == CODE_FOR_bcdadd + || icode == CODE_FOR_bcdadd_lt + || icode == CODE_FOR_bcdadd_eq + || icode == CODE_FOR_bcdadd_gt + || icode == CODE_FOR_bcdsub + || icode == CODE_FOR_bcdsub_lt + || icode == CODE_FOR_bcdsub_eq + || icode == CODE_FOR_bcdsub_gt) + { + /* Only allow 1-bit unsigned literals. */ + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || TREE_INT_CST_LOW (arg2) & ~0x1) + { + error ("argument 3 must be a 1-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_dfp_ddedpd_dd + || icode == CODE_FOR_dfp_ddedpd_td) + { + /* Only allow 2-bit unsigned literals where the value is 0 or 2. */ + STRIP_NOPS (arg0); + if (TREE_CODE (arg0) != INTEGER_CST + || TREE_INT_CST_LOW (arg2) & ~0x3) + { + error ("argument 1 must be 0 or 2"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_dfp_denbcd_dd + || icode == CODE_FOR_dfp_denbcd_td) + { + /* Only allow 1-bit unsigned literals. */ + STRIP_NOPS (arg0); + if (TREE_CODE (arg0) != INTEGER_CST + || TREE_INT_CST_LOW (arg0) & ~0x1) + { + error ("argument 1 must be a 1-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_dfp_dscli_dd + || icode == CODE_FOR_dfp_dscli_td + || icode == CODE_FOR_dfp_dscri_dd + || icode == CODE_FOR_dfp_dscri_td) + { + /* Only allow 6-bit unsigned literals. */ + STRIP_NOPS (arg1); + if (TREE_CODE (arg1) != INTEGER_CST + || TREE_INT_CST_LOW (arg1) & ~0x3f) + { + error ("argument 2 must be a 6-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_crypto_vshasigmaw + || icode == CODE_FOR_crypto_vshasigmad) + { + /* Check whether the 2nd and 3rd arguments are integer constants and in + range and prepare arguments. */ + STRIP_NOPS (arg1); + if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2)) + { + error ("argument 2 must be 0 or 1"); + return CONST0_RTX (tmode); + } + + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16)) + { + error ("argument 3 must be in the range 0..15"); + return CONST0_RTX (tmode); + } + } + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + + if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4) + pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode)); + else + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} + +/* Expand the lvx builtins. */ +static rtx +altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg0; + machine_mode tmode, mode0; + rtx pat, op0; + enum insn_code icode; + + switch (fcode) + { + case ALTIVEC_BUILTIN_LD_INTERNAL_16qi: + icode = CODE_FOR_vector_altivec_load_v16qi; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_8hi: + icode = CODE_FOR_vector_altivec_load_v8hi; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_4si: + icode = CODE_FOR_vector_altivec_load_v4si; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_4sf: + icode = CODE_FOR_vector_altivec_load_v4sf; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_2df: + icode = CODE_FOR_vector_altivec_load_v2df; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_2di: + icode = CODE_FOR_vector_altivec_load_v2di; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_1ti: + icode = CODE_FOR_vector_altivec_load_v1ti; + break; + default: + *expandedp = false; + return NULL_RTX; + } + + *expandedp = true; + + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Expand the stvx builtins. */ +static rtx +altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, + bool *expandedp) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg0, arg1; + machine_mode mode0, mode1; + rtx pat, op0, op1; + enum insn_code icode; + + switch (fcode) + { + case ALTIVEC_BUILTIN_ST_INTERNAL_16qi: + icode = CODE_FOR_vector_altivec_store_v16qi; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_8hi: + icode = CODE_FOR_vector_altivec_store_v8hi; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_4si: + icode = CODE_FOR_vector_altivec_store_v4si; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_4sf: + icode = CODE_FOR_vector_altivec_store_v4sf; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_2df: + icode = CODE_FOR_vector_altivec_store_v2df; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_2di: + icode = CODE_FOR_vector_altivec_store_v2di; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_1ti: + icode = CODE_FOR_vector_altivec_store_v1ti; + break; + default: + *expandedp = false; + return NULL_RTX; + } + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (op0, op1); + if (pat) + emit_insn (pat); + + *expandedp = true; + return NULL_RTX; +} + +/* Expand the dst builtins. */ +static rtx +altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, + bool *expandedp) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + tree arg0, arg1, arg2; + machine_mode mode0, mode1; + rtx pat, op0, op1, op2; + const struct builtin_description *d; + size_t i; + + *expandedp = false; + + /* Handle DST variants. */ + d = bdesc_dst; + for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++) + if (d->code == fcode) + { + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + mode0 = insn_data[d->icode].operand[0].mode; + mode1 = insn_data[d->icode].operand[1].mode; + + /* Invalid arguments, bail out before generating bad rtl. */ + if (arg0 == error_mark_node + || arg1 == error_mark_node + || arg2 == error_mark_node) + return const0_rtx; + + *expandedp = true; + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || TREE_INT_CST_LOW (arg2) & ~0x3) + { + error ("argument to %qs must be a 2-bit unsigned literal", d->name); + return const0_rtx; + } + + if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (Pmode, op0); + if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (d->icode) (op0, op1, op2); + if (pat != 0) + emit_insn (pat); + + return NULL_RTX; + } + + return NULL_RTX; +} + +/* Expand vec_init builtin. */ +static rtx +altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) +{ + machine_mode tmode = TYPE_MODE (type); + machine_mode inner_mode = GET_MODE_INNER (tmode); + int i, n_elt = GET_MODE_NUNITS (tmode); + + gcc_assert (VECTOR_MODE_P (tmode)); + gcc_assert (n_elt == call_expr_nargs (exp)); + + if (!target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + /* If we have a vector compromised of a single element, such as V1TImode, do + the initialization directly. */ + if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode)) + { + rtx x = expand_normal (CALL_EXPR_ARG (exp, 0)); + emit_move_insn (target, gen_lowpart (tmode, x)); + } + else + { + rtvec v = rtvec_alloc (n_elt); + + for (i = 0; i < n_elt; ++i) + { + rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); + RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); + } + + rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); + } + + return target; +} + +/* Return the integer constant in ARG. Constrain it to be in the range + of the subparts of VEC_TYPE; issue an error if not. */ + +static int +get_element_number (tree vec_type, tree arg) +{ + unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; + + if (!tree_fits_uhwi_p (arg) + || (elt = tree_to_uhwi (arg), elt > max)) + { + error ("selector must be an integer constant in the range 0..%wi", max); + return 0; + } + + return elt; +} + +/* Expand vec_set builtin. */ +static rtx +altivec_expand_vec_set_builtin (tree exp) +{ + machine_mode tmode, mode1; + tree arg0, arg1, arg2; + int elt; + rtx op0, op1; + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + + tmode = TYPE_MODE (TREE_TYPE (arg0)); + mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + gcc_assert (VECTOR_MODE_P (tmode)); + + op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); + elt = get_element_number (TREE_TYPE (arg0), arg2); + + if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) + op1 = convert_modes (mode1, GET_MODE (op1), op1, true); + + op0 = force_reg (tmode, op0); + op1 = force_reg (mode1, op1); + + rs6000_expand_vector_set (op0, op1, elt); + + return op0; +} + +/* Expand vec_ext builtin. */ +static rtx +altivec_expand_vec_ext_builtin (tree exp, rtx target) +{ + machine_mode tmode, mode0; + tree arg0, arg1; + rtx op0; + rtx op1; + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + /* Call get_element_number to validate arg1 if it is a constant. */ + if (TREE_CODE (arg1) == INTEGER_CST) + (void) get_element_number (TREE_TYPE (arg0), arg1); + + tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + mode0 = TYPE_MODE (TREE_TYPE (arg0)); + gcc_assert (VECTOR_MODE_P (mode0)); + + op0 = force_reg (mode0, op0); + + if (optimize || !target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + rs6000_expand_vector_extract (target, op0, op1); + + return target; +} + +/* Expand the builtin in EXP and store the result in TARGET. Store + true in *EXPANDEDP if we found a builtin to expand. */ +static rtx +altivec_expand_builtin (tree exp, rtx target, bool *expandedp) +{ + const struct builtin_description *d; + size_t i; + enum insn_code icode; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0, arg1, arg2; + rtx op0, pat; + machine_mode tmode, mode0; + enum rs6000_builtins fcode + = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + + if (rs6000_overloaded_builtin_p (fcode)) + { + *expandedp = true; + error ("unresolved overload for Altivec builtin %qF", fndecl); + + /* Given it is invalid, just generate a normal call. */ + return expand_call (exp, target, false); + } + + target = altivec_expand_ld_builtin (exp, target, expandedp); + if (*expandedp) + return target; + + target = altivec_expand_st_builtin (exp, target, expandedp); + if (*expandedp) + return target; + + target = altivec_expand_dst_builtin (exp, target, expandedp); + if (*expandedp) + return target; + + *expandedp = true; + + switch (fcode) + { + case ALTIVEC_BUILTIN_STVX_V2DF: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp); + case ALTIVEC_BUILTIN_STVX_V2DI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp); + case ALTIVEC_BUILTIN_STVX_V4SF: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp); + case ALTIVEC_BUILTIN_STVX: + case ALTIVEC_BUILTIN_STVX_V4SI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp); + case ALTIVEC_BUILTIN_STVX_V8HI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp); + case ALTIVEC_BUILTIN_STVX_V16QI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp); + case ALTIVEC_BUILTIN_STVEBX: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp); + case ALTIVEC_BUILTIN_STVEHX: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp); + case ALTIVEC_BUILTIN_STVEWX: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp); + case ALTIVEC_BUILTIN_STVXL_V2DF: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp); + case ALTIVEC_BUILTIN_STVXL_V2DI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp); + case ALTIVEC_BUILTIN_STVXL_V4SF: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp); + case ALTIVEC_BUILTIN_STVXL: + case ALTIVEC_BUILTIN_STVXL_V4SI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp); + case ALTIVEC_BUILTIN_STVXL_V8HI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp); + case ALTIVEC_BUILTIN_STVXL_V16QI: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp); + + case ALTIVEC_BUILTIN_STVLX: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp); + case ALTIVEC_BUILTIN_STVLXL: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp); + case ALTIVEC_BUILTIN_STVRX: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp); + case ALTIVEC_BUILTIN_STVRXL: + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp); + + case P9V_BUILTIN_STXVL: + return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp); + + case VSX_BUILTIN_STXVD2X_V1TI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp); + case VSX_BUILTIN_STXVD2X_V2DF: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp); + case VSX_BUILTIN_STXVD2X_V2DI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp); + case VSX_BUILTIN_STXVW4X_V4SF: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp); + case VSX_BUILTIN_STXVW4X_V4SI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp); + case VSX_BUILTIN_STXVW4X_V8HI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp); + case VSX_BUILTIN_STXVW4X_V16QI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp); + + /* For the following on big endian, it's ok to use any appropriate + unaligned-supporting store, so use a generic expander. For + little-endian, the exact element-reversing instruction must + be used. */ + case VSX_BUILTIN_ST_ELEMREV_V2DF: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df + : CODE_FOR_vsx_st_elemrev_v2df); + return altivec_expand_stv_builtin (code, exp); + } + case VSX_BUILTIN_ST_ELEMREV_V2DI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di + : CODE_FOR_vsx_st_elemrev_v2di); + return altivec_expand_stv_builtin (code, exp); + } + case VSX_BUILTIN_ST_ELEMREV_V4SF: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf + : CODE_FOR_vsx_st_elemrev_v4sf); + return altivec_expand_stv_builtin (code, exp); + } + case VSX_BUILTIN_ST_ELEMREV_V4SI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si + : CODE_FOR_vsx_st_elemrev_v4si); + return altivec_expand_stv_builtin (code, exp); + } + case VSX_BUILTIN_ST_ELEMREV_V8HI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi + : CODE_FOR_vsx_st_elemrev_v8hi); + return altivec_expand_stv_builtin (code, exp); + } + case VSX_BUILTIN_ST_ELEMREV_V16QI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi + : CODE_FOR_vsx_st_elemrev_v16qi); + return altivec_expand_stv_builtin (code, exp); + } + + case ALTIVEC_BUILTIN_MFVSCR: + icode = CODE_FOR_altivec_mfvscr; + tmode = insn_data[icode].operand[0].mode; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + pat = GEN_FCN (icode) (target); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ALTIVEC_BUILTIN_MTVSCR: + icode = CODE_FOR_altivec_mtvscr; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + mode0 = insn_data[icode].operand[0].mode; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (pat) + emit_insn (pat); + return NULL_RTX; + + case ALTIVEC_BUILTIN_DSSALL: + emit_insn (gen_altivec_dssall ()); + return NULL_RTX; + + case ALTIVEC_BUILTIN_DSS: + icode = CODE_FOR_altivec_dss; + arg0 = CALL_EXPR_ARG (exp, 0); + STRIP_NOPS (arg0); + op0 = expand_normal (arg0); + mode0 = insn_data[icode].operand[0].mode; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + if (TREE_CODE (arg0) != INTEGER_CST + || TREE_INT_CST_LOW (arg0) & ~0x3) + { + error ("argument to dss must be a 2-bit unsigned literal"); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_altivec_dss (op0)); + return NULL_RTX; + + case ALTIVEC_BUILTIN_VEC_INIT_V4SI: + case ALTIVEC_BUILTIN_VEC_INIT_V8HI: + case ALTIVEC_BUILTIN_VEC_INIT_V16QI: + case ALTIVEC_BUILTIN_VEC_INIT_V4SF: + case VSX_BUILTIN_VEC_INIT_V2DF: + case VSX_BUILTIN_VEC_INIT_V2DI: + case VSX_BUILTIN_VEC_INIT_V1TI: + return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); + + case ALTIVEC_BUILTIN_VEC_SET_V4SI: + case ALTIVEC_BUILTIN_VEC_SET_V8HI: + case ALTIVEC_BUILTIN_VEC_SET_V16QI: + case ALTIVEC_BUILTIN_VEC_SET_V4SF: + case VSX_BUILTIN_VEC_SET_V2DF: + case VSX_BUILTIN_VEC_SET_V2DI: + case VSX_BUILTIN_VEC_SET_V1TI: + return altivec_expand_vec_set_builtin (exp); + + case ALTIVEC_BUILTIN_VEC_EXT_V4SI: + case ALTIVEC_BUILTIN_VEC_EXT_V8HI: + case ALTIVEC_BUILTIN_VEC_EXT_V16QI: + case ALTIVEC_BUILTIN_VEC_EXT_V4SF: + case VSX_BUILTIN_VEC_EXT_V2DF: + case VSX_BUILTIN_VEC_EXT_V2DI: + case VSX_BUILTIN_VEC_EXT_V1TI: + return altivec_expand_vec_ext_builtin (exp, target); + + case P9V_BUILTIN_VEXTRACT4B: + case P9V_BUILTIN_VEC_VEXTRACT4B: + arg1 = CALL_EXPR_ARG (exp, 1); + STRIP_NOPS (arg1); + + /* Generate a normal call if it is invalid. */ + if (arg1 == error_mark_node) + return expand_call (exp, target, false); + + if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12) + { + error ("second argument to vec_vextract4b must be 0..12"); + return expand_call (exp, target, false); + } + break; + + case P9V_BUILTIN_VINSERT4B: + case P9V_BUILTIN_VINSERT4B_DI: + case P9V_BUILTIN_VEC_VINSERT4B: + arg2 = CALL_EXPR_ARG (exp, 2); + STRIP_NOPS (arg2); + + /* Generate a normal call if it is invalid. */ + if (arg2 == error_mark_node) + return expand_call (exp, target, false); + + if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12) + { + error ("third argument to vec_vinsert4b must be 0..12"); + return expand_call (exp, target, false); + } + break; + + default: + break; + /* Fall through. */ + } + + /* Expand abs* operations. */ + d = bdesc_abs; + for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++) + if (d->code == fcode) + return altivec_expand_abs_builtin (d->icode, exp, target); + + /* Expand the AltiVec predicates. */ + d = bdesc_altivec_preds; + for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++) + if (d->code == fcode) + return altivec_expand_predicate_builtin (d->icode, exp, target); + + /* LV* are funky. We initialized them differently. */ + switch (fcode) + { + case ALTIVEC_BUILTIN_LVSL: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl, + exp, target, false); + case ALTIVEC_BUILTIN_LVSR: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr, + exp, target, false); + case ALTIVEC_BUILTIN_LVEBX: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx, + exp, target, false); + case ALTIVEC_BUILTIN_LVEHX: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx, + exp, target, false); + case ALTIVEC_BUILTIN_LVEWX: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx, + exp, target, false); + case ALTIVEC_BUILTIN_LVXL_V2DF: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df, + exp, target, false); + case ALTIVEC_BUILTIN_LVXL_V2DI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di, + exp, target, false); + case ALTIVEC_BUILTIN_LVXL_V4SF: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf, + exp, target, false); + case ALTIVEC_BUILTIN_LVXL: + case ALTIVEC_BUILTIN_LVXL_V4SI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si, + exp, target, false); + case ALTIVEC_BUILTIN_LVXL_V8HI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi, + exp, target, false); + case ALTIVEC_BUILTIN_LVXL_V16QI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi, + exp, target, false); + case ALTIVEC_BUILTIN_LVX_V2DF: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op, + exp, target, false); + case ALTIVEC_BUILTIN_LVX_V2DI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op, + exp, target, false); + case ALTIVEC_BUILTIN_LVX_V4SF: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op, + exp, target, false); + case ALTIVEC_BUILTIN_LVX: + case ALTIVEC_BUILTIN_LVX_V4SI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op, + exp, target, false); + case ALTIVEC_BUILTIN_LVX_V8HI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op, + exp, target, false); + case ALTIVEC_BUILTIN_LVX_V16QI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op, + exp, target, false); + case ALTIVEC_BUILTIN_LVLX: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx, + exp, target, true); + case ALTIVEC_BUILTIN_LVLXL: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl, + exp, target, true); + case ALTIVEC_BUILTIN_LVRX: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx, + exp, target, true); + case ALTIVEC_BUILTIN_LVRXL: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl, + exp, target, true); + case VSX_BUILTIN_LXVD2X_V1TI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti, + exp, target, false); + case VSX_BUILTIN_LXVD2X_V2DF: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df, + exp, target, false); + case VSX_BUILTIN_LXVD2X_V2DI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V4SF: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V4SI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V8HI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V16QI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi, + exp, target, false); + /* For the following on big endian, it's ok to use any appropriate + unaligned-supporting load, so use a generic expander. For + little-endian, the exact element-reversing instruction must + be used. */ + case VSX_BUILTIN_LD_ELEMREV_V2DF: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df + : CODE_FOR_vsx_ld_elemrev_v2df); + return altivec_expand_lv_builtin (code, exp, target, false); + } + case VSX_BUILTIN_LD_ELEMREV_V2DI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di + : CODE_FOR_vsx_ld_elemrev_v2di); + return altivec_expand_lv_builtin (code, exp, target, false); + } + case VSX_BUILTIN_LD_ELEMREV_V4SF: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf + : CODE_FOR_vsx_ld_elemrev_v4sf); + return altivec_expand_lv_builtin (code, exp, target, false); + } + case VSX_BUILTIN_LD_ELEMREV_V4SI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si + : CODE_FOR_vsx_ld_elemrev_v4si); + return altivec_expand_lv_builtin (code, exp, target, false); + } + case VSX_BUILTIN_LD_ELEMREV_V8HI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi + : CODE_FOR_vsx_ld_elemrev_v8hi); + return altivec_expand_lv_builtin (code, exp, target, false); + } + case VSX_BUILTIN_LD_ELEMREV_V16QI: + { + enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi + : CODE_FOR_vsx_ld_elemrev_v16qi); + return altivec_expand_lv_builtin (code, exp, target, false); + } + break; + default: + break; + /* Fall through. */ + } + + *expandedp = false; + return NULL_RTX; +} + +/* Expand the builtin in EXP and store the result in TARGET. Store + true in *EXPANDEDP if we found a builtin to expand. */ +static rtx +paired_expand_builtin (tree exp, rtx target, bool * expandedp) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + const struct builtin_description *d; + size_t i; + + *expandedp = true; + + switch (fcode) + { + case PAIRED_BUILTIN_STX: + return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp); + case PAIRED_BUILTIN_LX: + return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target); + default: + break; + /* Fall through. */ + } + + /* Expand the paired predicates. */ + d = bdesc_paired_preds; + for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++) + if (d->code == fcode) + return paired_expand_predicate_builtin (d->icode, exp, target); + + *expandedp = false; + return NULL_RTX; +} + +/* Binops that need to be initialized manually, but can be expanded + automagically by rs6000_expand_binop_builtin. */ +static const struct builtin_description bdesc_2arg_spe[] = +{ + { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX }, + { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD }, + { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW }, + { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT }, + { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT } +}; + +/* Expand the builtin in EXP and store the result in TARGET. Store + true in *EXPANDEDP if we found a builtin to expand. + + This expands the SPE builtins that are not simple unary and binary + operations. */ +static rtx +spe_expand_builtin (tree exp, rtx target, bool *expandedp) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg1, arg0; + enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + enum insn_code icode; + machine_mode tmode, mode0; + rtx pat, op0; + const struct builtin_description *d; + size_t i; + + *expandedp = true; + + /* Syntax check for a 5-bit unsigned immediate. */ + switch (fcode) + { + case SPE_BUILTIN_EVSTDD: + case SPE_BUILTIN_EVSTDH: + case SPE_BUILTIN_EVSTDW: + case SPE_BUILTIN_EVSTWHE: + case SPE_BUILTIN_EVSTWHO: + case SPE_BUILTIN_EVSTWWE: + case SPE_BUILTIN_EVSTWWO: + arg1 = CALL_EXPR_ARG (exp, 2); + if (TREE_CODE (arg1) != INTEGER_CST + || TREE_INT_CST_LOW (arg1) & ~0x1f) + { + error ("argument 2 must be a 5-bit unsigned literal"); + return const0_rtx; + } + break; + default: + break; + } + + /* The evsplat*i instructions are not quite generic. */ + switch (fcode) + { + case SPE_BUILTIN_EVSPLATFI: + return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi, + exp, target); + case SPE_BUILTIN_EVSPLATI: + return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati, + exp, target); + default: + break; + } + + d = bdesc_2arg_spe; + for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d) + if (d->code == fcode) + return rs6000_expand_binop_builtin (d->icode, exp, target); + + d = bdesc_spe_predicates; + for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d) + if (d->code == fcode) + return spe_expand_predicate_builtin (d->icode, exp, target); + + d = bdesc_spe_evsel; + for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d) + if (d->code == fcode) + return spe_expand_evsel_builtin (d->icode, exp, target); + + switch (fcode) + { + case SPE_BUILTIN_EVSTDDX: + return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp); + case SPE_BUILTIN_EVSTDHX: + return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp); + case SPE_BUILTIN_EVSTDWX: + return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp); + case SPE_BUILTIN_EVSTWHEX: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp); + case SPE_BUILTIN_EVSTWHOX: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp); + case SPE_BUILTIN_EVSTWWEX: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp); + case SPE_BUILTIN_EVSTWWOX: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp); + case SPE_BUILTIN_EVSTDD: + return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp); + case SPE_BUILTIN_EVSTDH: + return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp); + case SPE_BUILTIN_EVSTDW: + return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp); + case SPE_BUILTIN_EVSTWHE: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp); + case SPE_BUILTIN_EVSTWHO: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp); + case SPE_BUILTIN_EVSTWWE: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp); + case SPE_BUILTIN_EVSTWWO: + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp); + case SPE_BUILTIN_MFSPEFSCR: + icode = CODE_FOR_spe_mfspefscr; + tmode = insn_data[icode].operand[0].mode; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + pat = GEN_FCN (icode) (target); + if (! pat) + return 0; + emit_insn (pat); + return target; + case SPE_BUILTIN_MTSPEFSCR: + icode = CODE_FOR_spe_mtspefscr; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + mode0 = insn_data[icode].operand[0].mode; + + if (arg0 == error_mark_node) + return const0_rtx; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (pat) + emit_insn (pat); + return NULL_RTX; + default: + break; + } + + *expandedp = false; + return NULL_RTX; +} + +static rtx +paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, scratch, tmp; + tree form = CALL_EXPR_ARG (exp, 0); + tree arg0 = CALL_EXPR_ARG (exp, 1); + tree arg1 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + int form_int; + enum rtx_code code; + + if (TREE_CODE (form) != INTEGER_CST) + { + error ("argument 1 of __builtin_paired_predicate must be a constant"); + return const0_rtx; + } + else + form_int = TREE_INT_CST_LOW (form); + + gcc_assert (mode0 == mode1); + + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != SImode + || !(*insn_data[icode].operand[0].predicate) (target, SImode)) + target = gen_reg_rtx (SImode); + if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + scratch = gen_reg_rtx (CCFPmode); + + pat = GEN_FCN (icode) (scratch, op0, op1); + if (!pat) + return const0_rtx; + + emit_insn (pat); + + switch (form_int) + { + /* LT bit. */ + case 0: + code = LT; + break; + /* GT bit. */ + case 1: + code = GT; + break; + /* EQ bit. */ + case 2: + code = EQ; + break; + /* UN bit. */ + case 3: + emit_insn (gen_move_from_CR_ov_bit (target, scratch)); + return target; + default: + error ("argument 1 of __builtin_paired_predicate is out of range"); + return const0_rtx; + } + + tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx); + emit_move_insn (target, tmp); + return target; +} + +static rtx +spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, scratch, tmp; + tree form = CALL_EXPR_ARG (exp, 0); + tree arg0 = CALL_EXPR_ARG (exp, 1); + tree arg1 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + int form_int; + enum rtx_code code; + + if (TREE_CODE (form) != INTEGER_CST) + { + error ("argument 1 of __builtin_spe_predicate must be a constant"); + return const0_rtx; + } + else + form_int = TREE_INT_CST_LOW (form); + + gcc_assert (mode0 == mode1); + + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != SImode + || ! (*insn_data[icode].operand[0].predicate) (target, SImode)) + target = gen_reg_rtx (SImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + scratch = gen_reg_rtx (CCmode); + + pat = GEN_FCN (icode) (scratch, op0, op1); + if (! pat) + return const0_rtx; + emit_insn (pat); + + /* There are 4 variants for each predicate: _any_, _all_, _upper_, + _lower_. We use one compare, but look in different bits of the + CR for each variant. + + There are 2 elements in each SPE simd type (upper/lower). The CR + bits are set as follows: + + BIT0 | BIT 1 | BIT 2 | BIT 3 + U | L | (U | L) | (U & L) + + So, for an "all" relationship, BIT 3 would be set. + For an "any" relationship, BIT 2 would be set. Etc. + + Following traditional nomenclature, these bits map to: + + BIT0 | BIT 1 | BIT 2 | BIT 3 + LT | GT | EQ | OV + + Later, we will generate rtl to look in the LT/EQ/EQ/OV bits. + */ + + switch (form_int) + { + /* All variant. OV bit. */ + case 0: + /* We need to get to the OV bit, which is the ORDERED bit. We + could generate (ordered:SI (reg:CC xx) (const_int 0)), but + that's ugly and will make validate_condition_mode die. + So let's just use another pattern. */ + emit_insn (gen_move_from_CR_ov_bit (target, scratch)); + return target; + /* Any variant. EQ bit. */ + case 1: + code = EQ; + break; + /* Upper variant. LT bit. */ + case 2: + code = LT; + break; + /* Lower variant. GT bit. */ + case 3: + code = GT; + break; + default: + error ("argument 1 of __builtin_spe_predicate is out of range"); + return const0_rtx; + } + + tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx); + emit_move_insn (target, tmp); + + return target; +} + +/* The evsel builtins look like this: + + e = __builtin_spe_evsel_OP (a, b, c, d); + + and work like this: + + e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper]; + e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower]; +*/ + +static rtx +spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, scratch; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + tree arg3 = CALL_EXPR_ARG (exp, 3); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = expand_normal (arg3); + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + + gcc_assert (mode0 == mode1); + + if (arg0 == error_mark_node || arg1 == error_mark_node + || arg2 == error_mark_node || arg3 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != mode0 + || ! (*insn_data[icode].operand[0].predicate) (target, mode0)) + target = gen_reg_rtx (mode0); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode0, op1); + if (! (*insn_data[icode].operand[1].predicate) (op2, mode1)) + op2 = copy_to_mode_reg (mode0, op2); + if (! (*insn_data[icode].operand[1].predicate) (op3, mode1)) + op3 = copy_to_mode_reg (mode0, op3); + + /* Generate the compare. */ + scratch = gen_reg_rtx (CCmode); + pat = GEN_FCN (icode) (scratch, op0, op1); + if (! pat) + return const0_rtx; + emit_insn (pat); + + if (mode0 == V2SImode) + emit_insn (gen_spe_evsel (target, op2, op3, scratch)); + else + emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch)); + + return target; +} + +/* Raise an error message for a builtin function that is called without the + appropriate target options being set. */ + +static void +rs6000_invalid_builtin (enum rs6000_builtins fncode) +{ + size_t uns_fncode = (size_t)fncode; + const char *name = rs6000_builtin_info[uns_fncode].name; + HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask; + + gcc_assert (name != NULL); + if ((fnmask & RS6000_BTM_CELL) != 0) + error ("Builtin function %s is only valid for the cell processor", name); + else if ((fnmask & RS6000_BTM_VSX) != 0) + error ("Builtin function %s requires the -mvsx option", name); + else if ((fnmask & RS6000_BTM_HTM) != 0) + error ("Builtin function %s requires the -mhtm option", name); + else if ((fnmask & RS6000_BTM_ALTIVEC) != 0) + error ("Builtin function %s requires the -maltivec option", name); + else if ((fnmask & RS6000_BTM_PAIRED) != 0) + error ("Builtin function %s requires the -mpaired option", name); + else if ((fnmask & RS6000_BTM_SPE) != 0) + error ("Builtin function %s requires the -mspe option", name); + else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) + == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) + error ("Builtin function %s requires the -mhard-dfp and" + " -mpower8-vector options", name); + else if ((fnmask & RS6000_BTM_DFP) != 0) + error ("Builtin function %s requires the -mhard-dfp option", name); + else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0) + error ("Builtin function %s requires the -mpower8-vector option", name); + else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) + == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) + error ("Builtin function %s requires the -mcpu=power9 and" + " -m64 options", name); + else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0) + error ("Builtin function %s requires the -mcpu=power9 option", name); + else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) + == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) + error ("Builtin function %s requires the -mcpu=power9 and" + " -m64 options", name); + else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC) + error ("Builtin function %s requires the -mcpu=power9 option", name); + else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) + == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) + error ("Builtin function %s requires the -mhard-float and" + " -mlong-double-128 options", name); + else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0) + error ("Builtin function %s requires the -mhard-float option", name); + else if ((fnmask & RS6000_BTM_FLOAT128) != 0) + error ("Builtin function %s requires the -mfloat128 option", name); + else + error ("Builtin function %s is not supported with the current options", + name); +} + +/* Target hook for early folding of built-ins, shamelessly stolen + from ia64.c. */ + +static tree +rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, + tree *args, bool ignore ATTRIBUTE_UNUSED) +{ + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) + { + enum rs6000_builtins fn_code + = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + switch (fn_code) + { + case RS6000_BUILTIN_NANQ: + case RS6000_BUILTIN_NANSQ: + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + const char *str = c_getstr (*args); + int quiet = fn_code == RS6000_BUILTIN_NANQ; + REAL_VALUE_TYPE real; + + if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) + return build_real (type, real); + return NULL_TREE; + } + case RS6000_BUILTIN_INFQ: + case RS6000_BUILTIN_HUGE_VALQ: + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + REAL_VALUE_TYPE inf; + real_inf (&inf); + return build_real (type, inf); + } + default: + break; + } + } +#ifdef SUBTARGET_FOLD_BUILTIN + return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); +#else + return NULL_TREE; +#endif +} + +/* Fold a machine-dependent built-in in GIMPLE. (For folding into + a constant, use rs6000_fold_builtin.) */ + +bool +rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); + enum rs6000_builtins fn_code + = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); + tree arg0, arg1, lhs; + + switch (fn_code) + { + /* Flavors of vec_add. We deliberately don't expand + P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to + TImode, resulting in much poorer code generation. */ + case ALTIVEC_BUILTIN_VADDUBM: + case ALTIVEC_BUILTIN_VADDUHM: + case ALTIVEC_BUILTIN_VADDUWM: + case P8V_BUILTIN_VADDUDM: + case ALTIVEC_BUILTIN_VADDFP: + case VSX_BUILTIN_XVADDDP: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_sub. We deliberately don't expand + P8V_BUILTIN_VSUBUQM. */ + case ALTIVEC_BUILTIN_VSUBUBM: + case ALTIVEC_BUILTIN_VSUBUHM: + case ALTIVEC_BUILTIN_VSUBUWM: + case P8V_BUILTIN_VSUBUDM: + case ALTIVEC_BUILTIN_VSUBFP: + case VSX_BUILTIN_XVSUBDP: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + case VSX_BUILTIN_XVMULSP: + case VSX_BUILTIN_XVMULDP: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Even element flavors of vec_mul (signed). */ + case ALTIVEC_BUILTIN_VMULESB: + case ALTIVEC_BUILTIN_VMULESH: + /* Even element flavors of vec_mul (unsigned). */ + case ALTIVEC_BUILTIN_VMULEUB: + case ALTIVEC_BUILTIN_VMULEUH: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Odd element flavors of vec_mul (signed). */ + case ALTIVEC_BUILTIN_VMULOSB: + case ALTIVEC_BUILTIN_VMULOSH: + /* Odd element flavors of vec_mul (unsigned). */ + case ALTIVEC_BUILTIN_VMULOUB: + case ALTIVEC_BUILTIN_VMULOUH: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_div (Integer). */ + case VSX_BUILTIN_DIV_V2DI: + case VSX_BUILTIN_UDIV_V2DI: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_div (Float). */ + case VSX_BUILTIN_XVDIVSP: + case VSX_BUILTIN_XVDIVDP: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_and. */ + case ALTIVEC_BUILTIN_VAND: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_andc. */ + case ALTIVEC_BUILTIN_VANDC: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before(gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_nand. */ + case P8V_BUILTIN_VEC_NAND: + case P8V_BUILTIN_NAND_V16QI: + case P8V_BUILTIN_NAND_V8HI: + case P8V_BUILTIN_NAND_V4SI: + case P8V_BUILTIN_NAND_V4SF: + case P8V_BUILTIN_NAND_V2DF: + case P8V_BUILTIN_NAND_V2DI: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before(gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_or. */ + case ALTIVEC_BUILTIN_VOR: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* flavors of vec_orc. */ + case P8V_BUILTIN_ORC_V16QI: + case P8V_BUILTIN_ORC_V8HI: + case P8V_BUILTIN_ORC_V4SI: + case P8V_BUILTIN_ORC_V4SF: + case P8V_BUILTIN_ORC_V2DF: + case P8V_BUILTIN_ORC_V2DI: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before(gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_xor. */ + case ALTIVEC_BUILTIN_VXOR: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vec_nor. */ + case ALTIVEC_BUILTIN_VNOR: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before(gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + default: + break; + } + + return false; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + enum rs6000_builtins fcode + = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl); + size_t uns_fcode = (size_t)fcode; + const struct builtin_description *d; + size_t i; + rtx ret; + bool success; + HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask; + bool func_valid_p = ((rs6000_builtin_mask & mask) == mask); + + if (TARGET_DEBUG_BUILTIN) + { + enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; + const char *name1 = rs6000_builtin_info[uns_fcode].name; + const char *name2 = ((icode != CODE_FOR_nothing) + ? get_insn_name ((int)icode) + : "nothing"); + const char *name3; + + switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK) + { + default: name3 = "unknown"; break; + case RS6000_BTC_SPECIAL: name3 = "special"; break; + case RS6000_BTC_UNARY: name3 = "unary"; break; + case RS6000_BTC_BINARY: name3 = "binary"; break; + case RS6000_BTC_TERNARY: name3 = "ternary"; break; + case RS6000_BTC_PREDICATE: name3 = "predicate"; break; + case RS6000_BTC_ABS: name3 = "abs"; break; + case RS6000_BTC_EVSEL: name3 = "evsel"; break; + case RS6000_BTC_DST: name3 = "dst"; break; + } + + + fprintf (stderr, + "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n", + (name1) ? name1 : "---", fcode, + (name2) ? name2 : "---", (int)icode, + name3, + func_valid_p ? "" : ", not valid"); + } + + if (!func_valid_p) + { + rs6000_invalid_builtin (fcode); + + /* Given it is invalid, just generate a normal call. */ + return expand_call (exp, target, ignore); + } + + switch (fcode) + { + case RS6000_BUILTIN_RECIP: + return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target); + + case RS6000_BUILTIN_RECIPF: + return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target); + + case RS6000_BUILTIN_RSQRTF: + return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target); + + case RS6000_BUILTIN_RSQRT: + return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target); + + case POWER7_BUILTIN_BPERMD: + return rs6000_expand_binop_builtin (((TARGET_64BIT) + ? CODE_FOR_bpermd_di + : CODE_FOR_bpermd_si), exp, target); + + case RS6000_BUILTIN_GET_TB: + return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase, + target); + + case RS6000_BUILTIN_MFTB: + return rs6000_expand_zeroop_builtin (((TARGET_64BIT) + ? CODE_FOR_rs6000_mftb_di + : CODE_FOR_rs6000_mftb_si), + target); + + case RS6000_BUILTIN_MFFS: + return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target); + + case RS6000_BUILTIN_MTFSF: + return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp); + + case RS6000_BUILTIN_CPU_INIT: + case RS6000_BUILTIN_CPU_IS: + case RS6000_BUILTIN_CPU_SUPPORTS: + return cpu_expand_builtin (fcode, exp, target); + + case ALTIVEC_BUILTIN_MASK_FOR_LOAD: + case ALTIVEC_BUILTIN_MASK_FOR_STORE: + { + int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct + : (int) CODE_FOR_altivec_lvsl_direct); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode = insn_data[icode].operand[1].mode; + tree arg; + rtx op, addr, pat; + + gcc_assert (TARGET_ALTIVEC); + + arg = CALL_EXPR_ARG (exp, 0); + gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg))); + op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); + addr = memory_address (mode, op); + if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE) + op = addr; + else + { + /* For the load case need to negate the address. */ + op = gen_reg_rtx (GET_MODE (addr)); + emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr))); + } + op = gen_rtx_MEM (mode, op); + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + pat = GEN_FCN (icode) (target, op); + if (!pat) + return 0; + emit_insn (pat); + + return target; + } + + case ALTIVEC_BUILTIN_VCFUX: + case ALTIVEC_BUILTIN_VCFSX: + case ALTIVEC_BUILTIN_VCTUXS: + case ALTIVEC_BUILTIN_VCTSXS: + /* FIXME: There's got to be a nicer way to handle this case than + constructing a new CALL_EXPR. */ + if (call_expr_nargs (exp) == 1) + { + exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp), + 2, CALL_EXPR_ARG (exp, 0), integer_zero_node); + } + break; + + default: + break; + } + + if (TARGET_ALTIVEC) + { + ret = altivec_expand_builtin (exp, target, &success); + + if (success) + return ret; + } + if (TARGET_SPE) + { + ret = spe_expand_builtin (exp, target, &success); + + if (success) + return ret; + } + if (TARGET_PAIRED_FLOAT) + { + ret = paired_expand_builtin (exp, target, &success); + + if (success) + return ret; + } + if (TARGET_HTM) + { + ret = htm_expand_builtin (exp, target, &success); + + if (success) + return ret; + } + + unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK; + /* RS6000_BTC_SPECIAL represents no-operand operators. */ + gcc_assert (attr == RS6000_BTC_UNARY + || attr == RS6000_BTC_BINARY + || attr == RS6000_BTC_TERNARY + || attr == RS6000_BTC_SPECIAL); + + /* Handle simple unary operations. */ + d = bdesc_1arg; + for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == fcode) + return rs6000_expand_unop_builtin (d->icode, exp, target); + + /* Handle simple binary operations. */ + d = bdesc_2arg; + for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == fcode) + return rs6000_expand_binop_builtin (d->icode, exp, target); + + /* Handle simple ternary operations. */ + d = bdesc_3arg; + for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) + if (d->code == fcode) + return rs6000_expand_ternop_builtin (d->icode, exp, target); + + /* Handle simple no-argument operations. */ + d = bdesc_0arg; + for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++) + if (d->code == fcode) + return rs6000_expand_zeroop_builtin (d->icode, target); + + gcc_unreachable (); +} + +/* Create a builtin vector type with a name. Taking care not to give + the canonical type a name. */ + +static tree +rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts) +{ + tree result = build_vector_type (elt_type, num_elts); + + /* Copy so we don't give the canonical type a name. */ + result = build_variant_type_copy (result); + + add_builtin_type (name, result); + + return result; +} + +static void +rs6000_init_builtins (void) +{ + tree tdecl; + tree ftype; + machine_mode mode; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n", + (TARGET_PAIRED_FLOAT) ? ", paired" : "", + (TARGET_SPE) ? ", spe" : "", + (TARGET_ALTIVEC) ? ", altivec" : "", + (TARGET_VSX) ? ", vsx" : ""); + + V2SI_type_node = build_vector_type (intSI_type_node, 2); + V2SF_type_node = build_vector_type (float_type_node, 2); + V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long" + : "__vector long long", + intDI_type_node, 2); + V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2); + V4HI_type_node = build_vector_type (intHI_type_node, 4); + V4SI_type_node = rs6000_vector_type ("__vector signed int", + intSI_type_node, 4); + V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4); + V8HI_type_node = rs6000_vector_type ("__vector signed short", + intHI_type_node, 8); + V16QI_type_node = rs6000_vector_type ("__vector signed char", + intQI_type_node, 16); + + unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char", + unsigned_intQI_type_node, 16); + unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short", + unsigned_intHI_type_node, 8); + unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int", + unsigned_intSI_type_node, 4); + unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 + ? "__vector unsigned long" + : "__vector unsigned long long", + unsigned_intDI_type_node, 2); + + opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2); + opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2); + opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node); + opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4); + + const_str_type_node + = build_pointer_type (build_qualified_type (char_type_node, + TYPE_QUAL_CONST)); + + /* We use V1TI mode as a special container to hold __int128_t items that + must live in VSX registers. */ + if (intTI_type_node) + { + V1TI_type_node = rs6000_vector_type ("__vector __int128", + intTI_type_node, 1); + unsigned_V1TI_type_node + = rs6000_vector_type ("__vector unsigned __int128", + unsigned_intTI_type_node, 1); + } + + /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...' + types, especially in C++ land. Similarly, 'vector pixel' is distinct from + 'vector unsigned short'. */ + + bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node); + bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node); + bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node); + bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node); + pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node); + + long_integer_type_internal_node = long_integer_type_node; + long_unsigned_type_internal_node = long_unsigned_type_node; + long_long_integer_type_internal_node = long_long_integer_type_node; + long_long_unsigned_type_internal_node = long_long_unsigned_type_node; + intQI_type_internal_node = intQI_type_node; + uintQI_type_internal_node = unsigned_intQI_type_node; + intHI_type_internal_node = intHI_type_node; + uintHI_type_internal_node = unsigned_intHI_type_node; + intSI_type_internal_node = intSI_type_node; + uintSI_type_internal_node = unsigned_intSI_type_node; + intDI_type_internal_node = intDI_type_node; + uintDI_type_internal_node = unsigned_intDI_type_node; + intTI_type_internal_node = intTI_type_node; + uintTI_type_internal_node = unsigned_intTI_type_node; + float_type_internal_node = float_type_node; + double_type_internal_node = double_type_node; + long_double_type_internal_node = long_double_type_node; + dfloat64_type_internal_node = dfloat64_type_node; + dfloat128_type_internal_node = dfloat128_type_node; + void_type_internal_node = void_type_node; + + /* 128-bit floating point support. KFmode is IEEE 128-bit floating point. + IFmode is the IBM extended 128-bit format that is a pair of doubles. + TFmode will be either IEEE 128-bit floating point or the IBM double-double + format that uses a pair of doubles, depending on the switches and + defaults. + + We do not enable the actual __float128 keyword unless the user explicitly + asks for it, because the library support is not yet complete. + + If we don't support for either 128-bit IBM double double or IEEE 128-bit + floating point, we need make sure the type is non-zero or else self-test + fails during bootstrap. + + We don't register a built-in type for __ibm128 if the type is the same as + long double. Instead we add a #define for __ibm128 in + rs6000_cpu_cpp_builtins to long double. */ + if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode)) + { + ibm128_float_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (ibm128_float_type_node) = 128; + SET_TYPE_MODE (ibm128_float_type_node, IFmode); + layout_type (ibm128_float_type_node); + + lang_hooks.types.register_builtin_type (ibm128_float_type_node, + "__ibm128"); + } + else + ibm128_float_type_node = long_double_type_node; + + if (TARGET_FLOAT128_KEYWORD) + { + ieee128_float_type_node = float128_type_node; + lang_hooks.types.register_builtin_type (ieee128_float_type_node, + "__float128"); + } + + else if (TARGET_FLOAT128_TYPE) + { + ieee128_float_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (ibm128_float_type_node) = 128; + SET_TYPE_MODE (ieee128_float_type_node, KFmode); + layout_type (ieee128_float_type_node); + + /* If we are not exporting the __float128/_Float128 keywords, we need a + keyword to get the types created. Use __ieee128 as the dummy + keyword. */ + lang_hooks.types.register_builtin_type (ieee128_float_type_node, + "__ieee128"); + } + + else + ieee128_float_type_node = long_double_type_node; + + /* Initialize the modes for builtin_function_type, mapping a machine mode to + tree type node. */ + builtin_mode_to_type[QImode][0] = integer_type_node; + builtin_mode_to_type[HImode][0] = integer_type_node; + builtin_mode_to_type[SImode][0] = intSI_type_node; + builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node; + builtin_mode_to_type[DImode][0] = intDI_type_node; + builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node; + builtin_mode_to_type[TImode][0] = intTI_type_node; + builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node; + builtin_mode_to_type[SFmode][0] = float_type_node; + builtin_mode_to_type[DFmode][0] = double_type_node; + builtin_mode_to_type[IFmode][0] = ibm128_float_type_node; + builtin_mode_to_type[KFmode][0] = ieee128_float_type_node; + builtin_mode_to_type[TFmode][0] = long_double_type_node; + builtin_mode_to_type[DDmode][0] = dfloat64_type_node; + builtin_mode_to_type[TDmode][0] = dfloat128_type_node; + builtin_mode_to_type[V1TImode][0] = V1TI_type_node; + builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node; + builtin_mode_to_type[V2SImode][0] = V2SI_type_node; + builtin_mode_to_type[V2SFmode][0] = V2SF_type_node; + builtin_mode_to_type[V2DImode][0] = V2DI_type_node; + builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node; + builtin_mode_to_type[V2DFmode][0] = V2DF_type_node; + builtin_mode_to_type[V4HImode][0] = V4HI_type_node; + builtin_mode_to_type[V4SImode][0] = V4SI_type_node; + builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node; + builtin_mode_to_type[V4SFmode][0] = V4SF_type_node; + builtin_mode_to_type[V8HImode][0] = V8HI_type_node; + builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node; + builtin_mode_to_type[V16QImode][0] = V16QI_type_node; + builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node; + + tdecl = add_builtin_type ("__bool char", bool_char_type_node); + TYPE_NAME (bool_char_type_node) = tdecl; + + tdecl = add_builtin_type ("__bool short", bool_short_type_node); + TYPE_NAME (bool_short_type_node) = tdecl; + + tdecl = add_builtin_type ("__bool int", bool_int_type_node); + TYPE_NAME (bool_int_type_node) = tdecl; + + tdecl = add_builtin_type ("__pixel", pixel_type_node); + TYPE_NAME (pixel_type_node) = tdecl; + + bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char", + bool_char_type_node, 16); + bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short", + bool_short_type_node, 8); + bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int", + bool_int_type_node, 4); + bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 + ? "__vector __bool long" + : "__vector __bool long long", + bool_long_type_node, 2); + pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", + pixel_type_node, 8); + + /* Paired and SPE builtins are only available if you build a compiler with + the appropriate options, so only create those builtins with the + appropriate compiler option. Create Altivec and VSX builtins on machines + with at least the general purpose extensions (970 and newer) to allow the + use of the target attribute. */ + if (TARGET_PAIRED_FLOAT) + paired_init_builtins (); + if (TARGET_SPE) + spe_init_builtins (); + if (TARGET_EXTRA_BUILTINS) + altivec_init_builtins (); + if (TARGET_HTM) + htm_init_builtins (); + + if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT) + rs6000_common_init_builtins (); + + ftype = build_function_type_list (ieee128_float_type_node, + const_str_type_node, NULL_TREE); + def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ); + def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ); + + ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE); + def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ); + def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ); + + ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode, + RS6000_BUILTIN_RECIP, "__builtin_recipdiv"); + def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP); + + ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode, + RS6000_BUILTIN_RECIPF, "__builtin_recipdivf"); + def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF); + + ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode, + RS6000_BUILTIN_RSQRT, "__builtin_rsqrt"); + def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT); + + ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode, + RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf"); + def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF); + + mode = (TARGET_64BIT) ? DImode : SImode; + ftype = builtin_function_type (mode, mode, mode, VOIDmode, + POWER7_BUILTIN_BPERMD, "__builtin_bpermd"); + def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD); + + ftype = build_function_type_list (unsigned_intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB); + + if (TARGET_64BIT) + ftype = build_function_type_list (unsigned_intDI_type_node, + NULL_TREE); + else + ftype = build_function_type_list (unsigned_intSI_type_node, + NULL_TREE); + def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB); + + ftype = build_function_type_list (double_type_node, NULL_TREE); + def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, double_type_node, + NULL_TREE); + def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF); + + ftype = build_function_type_list (void_type_node, NULL_TREE); + def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT); + + ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node, + NULL_TREE); + def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS); + def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS); + + /* AIX libm provides clog as __clog. */ + if (TARGET_XCOFF && + (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) + set_user_assembler_name (tdecl, "__clog"); + +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif +} + +/* Returns the rs6000 builtin decl for CODE. */ + +static tree +rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT fnmask; + + if (code >= RS6000_BUILTIN_COUNT) + return error_mark_node; + + fnmask = rs6000_builtin_info[code].mask; + if ((fnmask & rs6000_builtin_mask) != fnmask) + { + rs6000_invalid_builtin ((enum rs6000_builtins)code); + return error_mark_node; + } + + return rs6000_builtin_decls[code]; +} + +static void +spe_init_builtins (void) +{ + tree puint_type_node = build_pointer_type (unsigned_type_node); + tree pushort_type_node = build_pointer_type (short_unsigned_type_node); + const struct builtin_description *d; + size_t i; + HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; + + tree v2si_ftype_4_v2si + = build_function_type_list (opaque_V2SI_type_node, + opaque_V2SI_type_node, + opaque_V2SI_type_node, + opaque_V2SI_type_node, + opaque_V2SI_type_node, + NULL_TREE); + + tree v2sf_ftype_4_v2sf + = build_function_type_list (opaque_V2SF_type_node, + opaque_V2SF_type_node, + opaque_V2SF_type_node, + opaque_V2SF_type_node, + opaque_V2SF_type_node, + NULL_TREE); + + tree int_ftype_int_v2si_v2si + = build_function_type_list (integer_type_node, + integer_type_node, + opaque_V2SI_type_node, + opaque_V2SI_type_node, + NULL_TREE); + + tree int_ftype_int_v2sf_v2sf + = build_function_type_list (integer_type_node, + integer_type_node, + opaque_V2SF_type_node, + opaque_V2SF_type_node, + NULL_TREE); + + tree void_ftype_v2si_puint_int + = build_function_type_list (void_type_node, + opaque_V2SI_type_node, + puint_type_node, + integer_type_node, + NULL_TREE); + + tree void_ftype_v2si_puint_char + = build_function_type_list (void_type_node, + opaque_V2SI_type_node, + puint_type_node, + char_type_node, + NULL_TREE); + + tree void_ftype_v2si_pv2si_int + = build_function_type_list (void_type_node, + opaque_V2SI_type_node, + opaque_p_V2SI_type_node, + integer_type_node, + NULL_TREE); + + tree void_ftype_v2si_pv2si_char + = build_function_type_list (void_type_node, + opaque_V2SI_type_node, + opaque_p_V2SI_type_node, + char_type_node, + NULL_TREE); + + tree void_ftype_int + = build_function_type_list (void_type_node, integer_type_node, NULL_TREE); + + tree int_ftype_void + = build_function_type_list (integer_type_node, NULL_TREE); + + tree v2si_ftype_pv2si_int + = build_function_type_list (opaque_V2SI_type_node, + opaque_p_V2SI_type_node, + integer_type_node, + NULL_TREE); + + tree v2si_ftype_puint_int + = build_function_type_list (opaque_V2SI_type_node, + puint_type_node, + integer_type_node, + NULL_TREE); + + tree v2si_ftype_pushort_int + = build_function_type_list (opaque_V2SI_type_node, + pushort_type_node, + integer_type_node, + NULL_TREE); + + tree v2si_ftype_signed_char + = build_function_type_list (opaque_V2SI_type_node, + signed_char_type_node, + NULL_TREE); + + add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node); + + /* Initialize irregular SPE builtins. */ + + def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR); + def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR); + def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX); + def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX); + def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX); + def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX); + def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX); + def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX); + def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX); + def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD); + def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH); + def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW); + def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE); + def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO); + def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE); + def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO); + def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI); + def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI); + + /* Loads. */ + def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX); + def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX); + def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX); + def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX); + def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX); + def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX); + def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX); + def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX); + def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX); + def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX); + def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX); + def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD); + def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW); + def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH); + def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT); + def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT); + def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT); + def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE); + def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS); + def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU); + def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT); + def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT); + + /* Predicates. */ + d = bdesc_spe_predicates; + for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++) + { + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "spe_init_builtins, skip predicate %s\n", + d->name); + continue; + } + + /* Cannot define builtin if the instruction is disabled. */ + gcc_assert (d->icode != CODE_FOR_nothing); + switch (insn_data[d->icode].operand[1].mode) + { + case V2SImode: + type = int_ftype_int_v2si_v2si; + break; + case V2SFmode: + type = int_ftype_int_v2sf_v2sf; + break; + default: + gcc_unreachable (); + } + + def_builtin (d->name, type, d->code); + } + + /* Evsel predicates. */ + d = bdesc_spe_evsel; + for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++) + { + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "spe_init_builtins, skip evsel %s\n", + d->name); + continue; + } + + /* Cannot define builtin if the instruction is disabled. */ + gcc_assert (d->icode != CODE_FOR_nothing); + switch (insn_data[d->icode].operand[1].mode) + { + case V2SImode: + type = v2si_ftype_4_v2si; + break; + case V2SFmode: + type = v2sf_ftype_4_v2sf; + break; + default: + gcc_unreachable (); + } + + def_builtin (d->name, type, d->code); + } +} + +static void +paired_init_builtins (void) +{ + const struct builtin_description *d; + size_t i; + HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; + + tree int_ftype_int_v2sf_v2sf + = build_function_type_list (integer_type_node, + integer_type_node, + V2SF_type_node, + V2SF_type_node, + NULL_TREE); + tree pcfloat_type_node = + build_pointer_type (build_qualified_type + (float_type_node, TYPE_QUAL_CONST)); + + tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node, + long_integer_type_node, + pcfloat_type_node, + NULL_TREE); + tree void_ftype_v2sf_long_pcfloat = + build_function_type_list (void_type_node, + V2SF_type_node, + long_integer_type_node, + pcfloat_type_node, + NULL_TREE); + + + def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat, + PAIRED_BUILTIN_LX); + + + def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat, + PAIRED_BUILTIN_STX); + + /* Predicates. */ + d = bdesc_paired_preds; + for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++) + { + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "paired_init_builtins, skip predicate %s\n", + d->name); + continue; + } + + /* Cannot define builtin if the instruction is disabled. */ + gcc_assert (d->icode != CODE_FOR_nothing); + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n", + (int)i, get_insn_name (d->icode), (int)d->icode, + GET_MODE_NAME (insn_data[d->icode].operand[1].mode)); + + switch (insn_data[d->icode].operand[1].mode) + { + case V2SFmode: + type = int_ftype_int_v2sf_v2sf; + break; + default: + gcc_unreachable (); + } + + def_builtin (d->name, type, d->code); + } +} + +static void +altivec_init_builtins (void) +{ + const struct builtin_description *d; + size_t i; + tree ftype; + tree decl; + HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; + + tree pvoid_type_node = build_pointer_type (void_type_node); + + tree pcvoid_type_node + = build_pointer_type (build_qualified_type (void_type_node, + TYPE_QUAL_CONST)); + + tree int_ftype_opaque + = build_function_type_list (integer_type_node, + opaque_V4SI_type_node, NULL_TREE); + tree opaque_ftype_opaque + = build_function_type_list (integer_type_node, NULL_TREE); + tree opaque_ftype_opaque_int + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, integer_type_node, NULL_TREE); + tree opaque_ftype_opaque_opaque_int + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, opaque_V4SI_type_node, + integer_type_node, NULL_TREE); + tree opaque_ftype_opaque_opaque_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, opaque_V4SI_type_node, + opaque_V4SI_type_node, NULL_TREE); + tree opaque_ftype_opaque_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, opaque_V4SI_type_node, + NULL_TREE); + tree int_ftype_int_opaque_opaque + = build_function_type_list (integer_type_node, + integer_type_node, opaque_V4SI_type_node, + opaque_V4SI_type_node, NULL_TREE); + tree int_ftype_int_v4si_v4si + = build_function_type_list (integer_type_node, + integer_type_node, V4SI_type_node, + V4SI_type_node, NULL_TREE); + tree int_ftype_int_v2di_v2di + = build_function_type_list (integer_type_node, + integer_type_node, V2DI_type_node, + V2DI_type_node, NULL_TREE); + tree void_ftype_v4si + = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); + tree v8hi_ftype_void + = build_function_type_list (V8HI_type_node, NULL_TREE); + tree void_ftype_void + = build_function_type_list (void_type_node, NULL_TREE); + tree void_ftype_int + = build_function_type_list (void_type_node, integer_type_node, NULL_TREE); + + tree opaque_ftype_long_pcvoid + = build_function_type_list (opaque_V4SI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v16qi_ftype_long_pcvoid + = build_function_type_list (V16QI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v8hi_ftype_long_pcvoid + = build_function_type_list (V8HI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v4si_ftype_long_pcvoid + = build_function_type_list (V4SI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v4sf_ftype_long_pcvoid + = build_function_type_list (V4SF_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v2df_ftype_long_pcvoid + = build_function_type_list (V2DF_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v2di_ftype_long_pcvoid + = build_function_type_list (V2DI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + + tree void_ftype_opaque_long_pvoid + = build_function_type_list (void_type_node, + opaque_V4SI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree void_ftype_v4si_long_pvoid + = build_function_type_list (void_type_node, + V4SI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree void_ftype_v16qi_long_pvoid + = build_function_type_list (void_type_node, + V16QI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + + tree void_ftype_v16qi_pvoid_long + = build_function_type_list (void_type_node, + V16QI_type_node, pvoid_type_node, + long_integer_type_node, NULL_TREE); + + tree void_ftype_v8hi_long_pvoid + = build_function_type_list (void_type_node, + V8HI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree void_ftype_v4sf_long_pvoid + = build_function_type_list (void_type_node, + V4SF_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree void_ftype_v2df_long_pvoid + = build_function_type_list (void_type_node, + V2DF_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree void_ftype_v2di_long_pvoid + = build_function_type_list (void_type_node, + V2DI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree int_ftype_int_v8hi_v8hi + = build_function_type_list (integer_type_node, + integer_type_node, V8HI_type_node, + V8HI_type_node, NULL_TREE); + tree int_ftype_int_v16qi_v16qi + = build_function_type_list (integer_type_node, + integer_type_node, V16QI_type_node, + V16QI_type_node, NULL_TREE); + tree int_ftype_int_v4sf_v4sf + = build_function_type_list (integer_type_node, + integer_type_node, V4SF_type_node, + V4SF_type_node, NULL_TREE); + tree int_ftype_int_v2df_v2df + = build_function_type_list (integer_type_node, + integer_type_node, V2DF_type_node, + V2DF_type_node, NULL_TREE); + tree v2di_ftype_v2di + = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); + tree v4si_ftype_v4si + = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); + tree v8hi_ftype_v8hi + = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); + tree v16qi_ftype_v16qi + = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); + tree v4sf_ftype_v4sf + = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); + tree v2df_ftype_v2df + = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); + tree void_ftype_pcvoid_int_int + = build_function_type_list (void_type_node, + pcvoid_type_node, integer_type_node, + integer_type_node, NULL_TREE); + + def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR); + def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR); + def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL); + def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS); + def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL); + def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR); + def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX); + def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX); + def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX); + def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL); + def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVXL_V2DF); + def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVXL_V2DI); + def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVXL_V4SF); + def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVXL_V4SI); + def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVXL_V8HI); + def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVXL_V16QI); + def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX); + def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVX_V2DF); + def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVX_V2DI); + def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVX_V4SF); + def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVX_V4SI); + def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVX_V8HI); + def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVX_V16QI); + def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX); + def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid, + ALTIVEC_BUILTIN_STVX_V2DF); + def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid, + ALTIVEC_BUILTIN_STVX_V2DI); + def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid, + ALTIVEC_BUILTIN_STVX_V4SF); + def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid, + ALTIVEC_BUILTIN_STVX_V4SI); + def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid, + ALTIVEC_BUILTIN_STVX_V8HI); + def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid, + ALTIVEC_BUILTIN_STVX_V16QI); + def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX); + def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL); + def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid, + ALTIVEC_BUILTIN_STVXL_V2DF); + def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid, + ALTIVEC_BUILTIN_STVXL_V2DI); + def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid, + ALTIVEC_BUILTIN_STVXL_V4SF); + def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid, + ALTIVEC_BUILTIN_STVXL_V4SI); + def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid, + ALTIVEC_BUILTIN_STVXL_V8HI); + def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid, + ALTIVEC_BUILTIN_STVXL_V16QI); + def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX); + def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX); + def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD); + def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE); + def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL); + def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL); + def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR); + def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX); + def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX); + def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX); + def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST); + def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE); + def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL); + def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX); + def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX); + def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX); + + def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid, + VSX_BUILTIN_LXVD2X_V2DF); + def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid, + VSX_BUILTIN_LXVD2X_V2DI); + def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid, + VSX_BUILTIN_LXVW4X_V4SF); + def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid, + VSX_BUILTIN_LXVW4X_V4SI); + def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid, + VSX_BUILTIN_LXVW4X_V8HI); + def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid, + VSX_BUILTIN_LXVW4X_V16QI); + def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid, + VSX_BUILTIN_STXVD2X_V2DF); + def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid, + VSX_BUILTIN_STXVD2X_V2DI); + def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid, + VSX_BUILTIN_STXVW4X_V4SF); + def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid, + VSX_BUILTIN_STXVW4X_V4SI); + def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid, + VSX_BUILTIN_STXVW4X_V8HI); + def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid, + VSX_BUILTIN_STXVW4X_V16QI); + + def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V2DF); + def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V2DI); + def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V4SF); + def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V4SI); + def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V2DF); + def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V2DI); + def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V4SF); + def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V4SI); + + if (TARGET_P9_VECTOR) + { + def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V8HI); + def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V16QI); + def_builtin ("__builtin_vsx_st_elemrev_v8hi", + void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI); + def_builtin ("__builtin_vsx_st_elemrev_v16qi", + void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI); + } + else + { + rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI] + = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI]; + rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI] + = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI]; + rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI] + = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI]; + rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI] + = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI]; + } + + def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid, + VSX_BUILTIN_VEC_LD); + def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid, + VSX_BUILTIN_VEC_ST); + def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid, + VSX_BUILTIN_VEC_XL); + def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid, + VSX_BUILTIN_VEC_XST); + + def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP); + def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS); + def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE); + + def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD); + def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT); + def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT); + def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT); + def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW); + def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH); + def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB); + def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF); + def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX); + def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX); + def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS); + def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU); + + def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque, + ALTIVEC_BUILTIN_VEC_ADDE); + def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque, + ALTIVEC_BUILTIN_VEC_ADDEC); + def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque, + ALTIVEC_BUILTIN_VEC_CMPNE); + def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque, + ALTIVEC_BUILTIN_VEC_MUL); + + /* Cell builtins. */ + def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX); + def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL); + def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX); + def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL); + + def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX); + def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL); + def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX); + def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL); + + def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX); + def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL); + def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX); + def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL); + + def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX); + def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL); + def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX); + def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL); + + if (TARGET_P9_VECTOR) + def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, + P9V_BUILTIN_STXVL); + + /* Add the DST variants. */ + d = bdesc_dst; + for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++) + { + HOST_WIDE_INT mask = d->mask; + + /* It is expected that these dst built-in functions may have + d->icode equal to CODE_FOR_nothing. */ + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "altivec_init_builtins, skip dst %s\n", + d->name); + continue; + } + def_builtin (d->name, void_ftype_pcvoid_int_int, d->code); + } + + /* Initialize the predicates. */ + d = bdesc_altivec_preds; + for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++) + { + machine_mode mode1; + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "altivec_init_builtins, skip predicate %s\n", + d->name); + continue; + } + + if (rs6000_overloaded_builtin_p (d->code)) + mode1 = VOIDmode; + else + { + /* Cannot define builtin if the instruction is disabled. */ + gcc_assert (d->icode != CODE_FOR_nothing); + mode1 = insn_data[d->icode].operand[1].mode; + } + + switch (mode1) + { + case VOIDmode: + type = int_ftype_int_opaque_opaque; + break; + case V2DImode: + type = int_ftype_int_v2di_v2di; + break; + case V4SImode: + type = int_ftype_int_v4si_v4si; + break; + case V8HImode: + type = int_ftype_int_v8hi_v8hi; + break; + case V16QImode: + type = int_ftype_int_v16qi_v16qi; + break; + case V4SFmode: + type = int_ftype_int_v4sf_v4sf; + break; + case V2DFmode: + type = int_ftype_int_v2df_v2df; + break; + default: + gcc_unreachable (); + } + + def_builtin (d->name, type, d->code); + } + + /* Initialize the abs* operators. */ + d = bdesc_abs; + for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++) + { + machine_mode mode0; + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "altivec_init_builtins, skip abs %s\n", + d->name); + continue; + } + + /* Cannot define builtin if the instruction is disabled. */ + gcc_assert (d->icode != CODE_FOR_nothing); + mode0 = insn_data[d->icode].operand[0].mode; + + switch (mode0) + { + case V2DImode: + type = v2di_ftype_v2di; + break; + case V4SImode: + type = v4si_ftype_v4si; + break; + case V8HImode: + type = v8hi_ftype_v8hi; + break; + case V16QImode: + type = v16qi_ftype_v16qi; + break; + case V4SFmode: + type = v4sf_ftype_v4sf; + break; + case V2DFmode: + type = v2df_ftype_v2df; + break; + default: + gcc_unreachable (); + } + + def_builtin (d->name, type, d->code); + } + + /* Initialize target builtin that implements + targetm.vectorize.builtin_mask_for_load. */ + + decl = add_builtin_function ("__builtin_altivec_mask_for_load", + v16qi_ftype_long_pcvoid, + ALTIVEC_BUILTIN_MASK_FOR_LOAD, + BUILT_IN_MD, NULL, NULL_TREE); + TREE_READONLY (decl) = 1; + /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */ + altivec_builtin_mask_for_load = decl; + + /* Access to the vec_init patterns. */ + ftype = build_function_type_list (V4SI_type_node, integer_type_node, + integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI); + + ftype = build_function_type_list (V8HI_type_node, short_integer_type_node, + short_integer_type_node, + short_integer_type_node, + short_integer_type_node, + short_integer_type_node, + short_integer_type_node, + short_integer_type_node, + short_integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI); + + ftype = build_function_type_list (V16QI_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, char_type_node, + char_type_node, NULL_TREE); + def_builtin ("__builtin_vec_init_v16qi", ftype, + ALTIVEC_BUILTIN_VEC_INIT_V16QI); + + ftype = build_function_type_list (V4SF_type_node, float_type_node, + float_type_node, float_type_node, + float_type_node, NULL_TREE); + def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF); + + /* VSX builtins. */ + ftype = build_function_type_list (V2DF_type_node, double_type_node, + double_type_node, NULL_TREE); + def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF); + + ftype = build_function_type_list (V2DI_type_node, intDI_type_node, + intDI_type_node, NULL_TREE); + def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI); + + /* Access to the vec_set patterns. */ + ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, + intSI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI); + + ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, + intHI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI); + + ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, + intQI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI); + + ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, + float_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF); + + ftype = build_function_type_list (V2DF_type_node, V2DF_type_node, + double_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF); + + ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, + intDI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI); + + /* Access to the vec_extract patterns. */ + ftype = build_function_type_list (intSI_type_node, V4SI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI); + + ftype = build_function_type_list (intHI_type_node, V8HI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI); + + ftype = build_function_type_list (intQI_type_node, V16QI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI); + + ftype = build_function_type_list (float_type_node, V4SF_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF); + + ftype = build_function_type_list (double_type_node, V2DF_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF); + + ftype = build_function_type_list (intDI_type_node, V2DI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI); + + + if (V1TI_type_node) + { + tree v1ti_ftype_long_pcvoid + = build_function_type_list (V1TI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree void_ftype_v1ti_long_pvoid + = build_function_type_list (void_type_node, + V1TI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid, + VSX_BUILTIN_LXVD2X_V1TI); + def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid, + VSX_BUILTIN_STXVD2X_V1TI); + ftype = build_function_type_list (V1TI_type_node, intTI_type_node, + NULL_TREE, NULL_TREE); + def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI); + ftype = build_function_type_list (V1TI_type_node, V1TI_type_node, + intTI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI); + ftype = build_function_type_list (intTI_type_node, V1TI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI); + } + +} + +static void +htm_init_builtins (void) +{ + HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; + const struct builtin_description *d; + size_t i; + + d = bdesc_htm; + for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++) + { + tree op[MAX_HTM_OPERANDS], type; + HOST_WIDE_INT mask = d->mask; + unsigned attr = rs6000_builtin_info[d->code].attr; + bool void_func = (attr & RS6000_BTC_VOID); + int attr_args = (attr & RS6000_BTC_TYPE_MASK); + int nopnds = 0; + tree gpr_type_node; + tree rettype; + tree argtype; + + /* It is expected that these htm built-in functions may have + d->icode equal to CODE_FOR_nothing. */ + + if (TARGET_32BIT && TARGET_POWERPC64) + gpr_type_node = long_long_unsigned_type_node; + else + gpr_type_node = long_unsigned_type_node; + + if (attr & RS6000_BTC_SPR) + { + rettype = gpr_type_node; + argtype = gpr_type_node; + } + else if (d->code == HTM_BUILTIN_TABORTDC + || d->code == HTM_BUILTIN_TABORTDCI) + { + rettype = unsigned_type_node; + argtype = gpr_type_node; + } + else + { + rettype = unsigned_type_node; + argtype = unsigned_type_node; + } + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "htm_builtin, skip binary %s\n", d->name); + continue; + } + + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n", + (long unsigned) i); + continue; + } + + op[nopnds++] = (void_func) ? void_type_node : rettype; + + if (attr_args == RS6000_BTC_UNARY) + op[nopnds++] = argtype; + else if (attr_args == RS6000_BTC_BINARY) + { + op[nopnds++] = argtype; + op[nopnds++] = argtype; + } + else if (attr_args == RS6000_BTC_TERNARY) + { + op[nopnds++] = argtype; + op[nopnds++] = argtype; + op[nopnds++] = argtype; + } + + switch (nopnds) + { + case 1: + type = build_function_type_list (op[0], NULL_TREE); + break; + case 2: + type = build_function_type_list (op[0], op[1], NULL_TREE); + break; + case 3: + type = build_function_type_list (op[0], op[1], op[2], NULL_TREE); + break; + case 4: + type = build_function_type_list (op[0], op[1], op[2], op[3], + NULL_TREE); + break; + default: + gcc_unreachable (); + } + + def_builtin (d->name, type, d->code); + } +} + +/* Hash function for builtin functions with up to 3 arguments and a return + type. */ +hashval_t +builtin_hasher::hash (builtin_hash_struct *bh) +{ + unsigned ret = 0; + int i; + + for (i = 0; i < 4; i++) + { + ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]); + ret = (ret * 2) + bh->uns_p[i]; + } + + return ret; +} + +/* Compare builtin hash entries H1 and H2 for equivalence. */ +bool +builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2) +{ + return ((p1->mode[0] == p2->mode[0]) + && (p1->mode[1] == p2->mode[1]) + && (p1->mode[2] == p2->mode[2]) + && (p1->mode[3] == p2->mode[3]) + && (p1->uns_p[0] == p2->uns_p[0]) + && (p1->uns_p[1] == p2->uns_p[1]) + && (p1->uns_p[2] == p2->uns_p[2]) + && (p1->uns_p[3] == p2->uns_p[3])); +} + +/* Map types for builtin functions with an explicit return type and up to 3 + arguments. Functions with fewer than 3 arguments use VOIDmode as the type + of the argument. */ +static tree +builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, + machine_mode mode_arg1, machine_mode mode_arg2, + enum rs6000_builtins builtin, const char *name) +{ + struct builtin_hash_struct h; + struct builtin_hash_struct *h2; + int num_args = 3; + int i; + tree ret_type = NULL_TREE; + tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE }; + + /* Create builtin_hash_table. */ + if (builtin_hash_table == NULL) + builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500); + + h.type = NULL_TREE; + h.mode[0] = mode_ret; + h.mode[1] = mode_arg0; + h.mode[2] = mode_arg1; + h.mode[3] = mode_arg2; + h.uns_p[0] = 0; + h.uns_p[1] = 0; + h.uns_p[2] = 0; + h.uns_p[3] = 0; + + /* If the builtin is a type that produces unsigned results or takes unsigned + arguments, and it is returned as a decl for the vectorizer (such as + widening multiplies, permute), make sure the arguments and return value + are type correct. */ + switch (builtin) + { + /* unsigned 1 argument functions. */ + case CRYPTO_BUILTIN_VSBOX: + case P8V_BUILTIN_VGBBD: + case MISC_BUILTIN_CDTBCD: + case MISC_BUILTIN_CBCDTD: + h.uns_p[0] = 1; + h.uns_p[1] = 1; + break; + + /* unsigned 2 argument functions. */ + case ALTIVEC_BUILTIN_VMULEUB: + case ALTIVEC_BUILTIN_VMULEUH: + case ALTIVEC_BUILTIN_VMULOUB: + case ALTIVEC_BUILTIN_VMULOUH: + case CRYPTO_BUILTIN_VCIPHER: + case CRYPTO_BUILTIN_VCIPHERLAST: + case CRYPTO_BUILTIN_VNCIPHER: + case CRYPTO_BUILTIN_VNCIPHERLAST: + case CRYPTO_BUILTIN_VPMSUMB: + case CRYPTO_BUILTIN_VPMSUMH: + case CRYPTO_BUILTIN_VPMSUMW: + case CRYPTO_BUILTIN_VPMSUMD: + case CRYPTO_BUILTIN_VPMSUM: + case MISC_BUILTIN_ADDG6S: + case MISC_BUILTIN_DIVWEU: + case MISC_BUILTIN_DIVWEUO: + case MISC_BUILTIN_DIVDEU: + case MISC_BUILTIN_DIVDEUO: + case VSX_BUILTIN_UDIV_V2DI: + h.uns_p[0] = 1; + h.uns_p[1] = 1; + h.uns_p[2] = 1; + break; + + /* unsigned 3 argument functions. */ + case ALTIVEC_BUILTIN_VPERM_16QI_UNS: + case ALTIVEC_BUILTIN_VPERM_8HI_UNS: + case ALTIVEC_BUILTIN_VPERM_4SI_UNS: + case ALTIVEC_BUILTIN_VPERM_2DI_UNS: + case ALTIVEC_BUILTIN_VSEL_16QI_UNS: + case ALTIVEC_BUILTIN_VSEL_8HI_UNS: + case ALTIVEC_BUILTIN_VSEL_4SI_UNS: + case ALTIVEC_BUILTIN_VSEL_2DI_UNS: + case VSX_BUILTIN_VPERM_16QI_UNS: + case VSX_BUILTIN_VPERM_8HI_UNS: + case VSX_BUILTIN_VPERM_4SI_UNS: + case VSX_BUILTIN_VPERM_2DI_UNS: + case VSX_BUILTIN_XXSEL_16QI_UNS: + case VSX_BUILTIN_XXSEL_8HI_UNS: + case VSX_BUILTIN_XXSEL_4SI_UNS: + case VSX_BUILTIN_XXSEL_2DI_UNS: + case CRYPTO_BUILTIN_VPERMXOR: + case CRYPTO_BUILTIN_VPERMXOR_V2DI: + case CRYPTO_BUILTIN_VPERMXOR_V4SI: + case CRYPTO_BUILTIN_VPERMXOR_V8HI: + case CRYPTO_BUILTIN_VPERMXOR_V16QI: + case CRYPTO_BUILTIN_VSHASIGMAW: + case CRYPTO_BUILTIN_VSHASIGMAD: + case CRYPTO_BUILTIN_VSHASIGMA: + h.uns_p[0] = 1; + h.uns_p[1] = 1; + h.uns_p[2] = 1; + h.uns_p[3] = 1; + break; + + /* signed permute functions with unsigned char mask. */ + case ALTIVEC_BUILTIN_VPERM_16QI: + case ALTIVEC_BUILTIN_VPERM_8HI: + case ALTIVEC_BUILTIN_VPERM_4SI: + case ALTIVEC_BUILTIN_VPERM_4SF: + case ALTIVEC_BUILTIN_VPERM_2DI: + case ALTIVEC_BUILTIN_VPERM_2DF: + case VSX_BUILTIN_VPERM_16QI: + case VSX_BUILTIN_VPERM_8HI: + case VSX_BUILTIN_VPERM_4SI: + case VSX_BUILTIN_VPERM_4SF: + case VSX_BUILTIN_VPERM_2DI: + case VSX_BUILTIN_VPERM_2DF: + h.uns_p[3] = 1; + break; + + /* unsigned args, signed return. */ + case VSX_BUILTIN_XVCVUXDSP: + case VSX_BUILTIN_XVCVUXDDP_UNS: + case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF: + h.uns_p[1] = 1; + break; + + /* signed args, unsigned return. */ + case VSX_BUILTIN_XVCVDPUXDS_UNS: + case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI: + case MISC_BUILTIN_UNPACK_TD: + case MISC_BUILTIN_UNPACK_V1TI: + h.uns_p[0] = 1; + break; + + /* unsigned arguments for 128-bit pack instructions. */ + case MISC_BUILTIN_PACK_TD: + case MISC_BUILTIN_PACK_V1TI: + h.uns_p[1] = 1; + h.uns_p[2] = 1; + break; + + default: + break; + } + + /* Figure out how many args are present. */ + while (num_args > 0 && h.mode[num_args] == VOIDmode) + num_args--; + + ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]]; + if (!ret_type && h.uns_p[0]) + ret_type = builtin_mode_to_type[h.mode[0]][0]; + + if (!ret_type) + fatal_error (input_location, + "internal error: builtin function %s had an unexpected " + "return type %s", name, GET_MODE_NAME (h.mode[0])); + + for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++) + arg_type[i] = NULL_TREE; + + for (i = 0; i < num_args; i++) + { + int m = (int) h.mode[i+1]; + int uns_p = h.uns_p[i+1]; + + arg_type[i] = builtin_mode_to_type[m][uns_p]; + if (!arg_type[i] && uns_p) + arg_type[i] = builtin_mode_to_type[m][0]; + + if (!arg_type[i]) + fatal_error (input_location, + "internal error: builtin function %s, argument %d " + "had unexpected argument type %s", name, i, + GET_MODE_NAME (m)); + } + + builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT); + if (*found == NULL) + { + h2 = ggc_alloc<builtin_hash_struct> (); + *h2 = h; + *found = h2; + + h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1], + arg_type[2], NULL_TREE); + } + + return (*found)->type; +} + +static void +rs6000_common_init_builtins (void) +{ + const struct builtin_description *d; + size_t i; + + tree opaque_ftype_opaque = NULL_TREE; + tree opaque_ftype_opaque_opaque = NULL_TREE; + tree opaque_ftype_opaque_opaque_opaque = NULL_TREE; + tree v2si_ftype = NULL_TREE; + tree v2si_ftype_qi = NULL_TREE; + tree v2si_ftype_v2si_qi = NULL_TREE; + tree v2si_ftype_int_qi = NULL_TREE; + HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; + + if (!TARGET_PAIRED_FLOAT) + { + builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node; + builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node; + } + + /* Paired and SPE builtins are only available if you build a compiler with + the appropriate options, so only create those builtins with the + appropriate compiler option. Create Altivec and VSX builtins on machines + with at least the general purpose extensions (970 and newer) to allow the + use of the target attribute.. */ + + if (TARGET_EXTRA_BUILTINS) + builtin_mask |= RS6000_BTM_COMMON; + + /* Add the ternary operators. */ + d = bdesc_3arg; + for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) + { + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name); + continue; + } + + if (rs6000_overloaded_builtin_p (d->code)) + { + if (! (type = opaque_ftype_opaque_opaque_opaque)) + type = opaque_ftype_opaque_opaque_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, + opaque_V4SI_type_node, + opaque_V4SI_type_node, + NULL_TREE); + } + else + { + enum insn_code icode = d->icode; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n", + d->name); + + continue; + } + + type = builtin_function_type (insn_data[icode].operand[0].mode, + insn_data[icode].operand[1].mode, + insn_data[icode].operand[2].mode, + insn_data[icode].operand[3].mode, + d->code, d->name); + } + + def_builtin (d->name, type, d->code); + } + + /* Add the binary operators. */ + d = bdesc_2arg; + for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + { + machine_mode mode0, mode1, mode2; + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name); + continue; + } + + if (rs6000_overloaded_builtin_p (d->code)) + { + if (! (type = opaque_ftype_opaque_opaque)) + type = opaque_ftype_opaque_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, + opaque_V4SI_type_node, + NULL_TREE); + } + else + { + enum insn_code icode = d->icode; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n", + d->name); + + continue; + } + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode) + { + if (! (type = v2si_ftype_v2si_qi)) + type = v2si_ftype_v2si_qi + = build_function_type_list (opaque_V2SI_type_node, + opaque_V2SI_type_node, + char_type_node, + NULL_TREE); + } + + else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT + && mode2 == QImode) + { + if (! (type = v2si_ftype_int_qi)) + type = v2si_ftype_int_qi + = build_function_type_list (opaque_V2SI_type_node, + integer_type_node, + char_type_node, + NULL_TREE); + } + + else + type = builtin_function_type (mode0, mode1, mode2, VOIDmode, + d->code, d->name); + } + + def_builtin (d->name, type, d->code); + } + + /* Add the simple unary operators. */ + d = bdesc_1arg; + for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + { + machine_mode mode0, mode1; + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name); + continue; + } + + if (rs6000_overloaded_builtin_p (d->code)) + { + if (! (type = opaque_ftype_opaque)) + type = opaque_ftype_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, + NULL_TREE); + } + else + { + enum insn_code icode = d->icode; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n", + (long unsigned)i); + + continue; + } + + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n", + d->name); + + continue; + } + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (mode0 == V2SImode && mode1 == QImode) + { + if (! (type = v2si_ftype_qi)) + type = v2si_ftype_qi + = build_function_type_list (opaque_V2SI_type_node, + char_type_node, + NULL_TREE); + } + + else + type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode, + d->code, d->name); + } + + def_builtin (d->name, type, d->code); + } + + /* Add the simple no-argument operators. */ + d = bdesc_0arg; + for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++) + { + machine_mode mode0; + tree type; + HOST_WIDE_INT mask = d->mask; + + if ((mask & builtin_mask) != mask) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name); + continue; + } + if (rs6000_overloaded_builtin_p (d->code)) + { + if (!opaque_ftype_opaque) + opaque_ftype_opaque + = build_function_type_list (opaque_V4SI_type_node, NULL_TREE); + type = opaque_ftype_opaque; + } + else + { + enum insn_code icode = d->icode; + if (d->name == 0) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n", + (long unsigned) i); + continue; + } + if (icode == CODE_FOR_nothing) + { + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, + "rs6000_builtin, skip no-argument %s (no code)\n", + d->name); + continue; + } + mode0 = insn_data[icode].operand[0].mode; + if (mode0 == V2SImode) + { + /* code for SPE */ + if (! (type = v2si_ftype)) + { + v2si_ftype + = build_function_type_list (opaque_V2SI_type_node, + NULL_TREE); + type = v2si_ftype; + } + } + else + type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode, + d->code, d->name); + } + def_builtin (d->name, type, d->code); + } +} + +/* Set up AIX/Darwin/64-bit Linux quad floating point routines. */ +static void +init_float128_ibm (machine_mode mode) +{ + if (!TARGET_XL_COMPAT) + { + set_optab_libfunc (add_optab, mode, "__gcc_qadd"); + set_optab_libfunc (sub_optab, mode, "__gcc_qsub"); + set_optab_libfunc (smul_optab, mode, "__gcc_qmul"); + set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv"); + + if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE))) + { + set_optab_libfunc (neg_optab, mode, "__gcc_qneg"); + set_optab_libfunc (eq_optab, mode, "__gcc_qeq"); + set_optab_libfunc (ne_optab, mode, "__gcc_qne"); + set_optab_libfunc (gt_optab, mode, "__gcc_qgt"); + set_optab_libfunc (ge_optab, mode, "__gcc_qge"); + set_optab_libfunc (lt_optab, mode, "__gcc_qlt"); + set_optab_libfunc (le_optab, mode, "__gcc_qle"); + + set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq"); + set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq"); + set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos"); + set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod"); + set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi"); + set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou"); + set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq"); + set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq"); + } + + if (!(TARGET_HARD_FLOAT && TARGET_FPRS)) + set_optab_libfunc (unord_optab, mode, "__gcc_qunord"); + } + else + { + set_optab_libfunc (add_optab, mode, "_xlqadd"); + set_optab_libfunc (sub_optab, mode, "_xlqsub"); + set_optab_libfunc (smul_optab, mode, "_xlqmul"); + set_optab_libfunc (sdiv_optab, mode, "_xlqdiv"); + } + + /* Add various conversions for IFmode to use the traditional TFmode + names. */ + if (mode == IFmode) + { + set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2"); + set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2"); + set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2"); + set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2"); + set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2"); + set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2"); + + if (TARGET_POWERPC64) + { + set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti"); + set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti"); + set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf"); + set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf"); + } + } +} + +/* Set up IEEE 128-bit floating point routines. Use different names if the + arguments can be passed in a vector register. The historical PowerPC + implementation of IEEE 128-bit floating point used _q_<op> for the names, so + continue to use that if we aren't using vector registers to pass IEEE + 128-bit floating point. */ + +static void +init_float128_ieee (machine_mode mode) +{ + if (FLOAT128_VECTOR_P (mode)) + { + set_optab_libfunc (add_optab, mode, "__addkf3"); + set_optab_libfunc (sub_optab, mode, "__subkf3"); + set_optab_libfunc (neg_optab, mode, "__negkf2"); + set_optab_libfunc (smul_optab, mode, "__mulkf3"); + set_optab_libfunc (sdiv_optab, mode, "__divkf3"); + set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2"); + set_optab_libfunc (abs_optab, mode, "__abstkf2"); + + set_optab_libfunc (eq_optab, mode, "__eqkf2"); + set_optab_libfunc (ne_optab, mode, "__nekf2"); + set_optab_libfunc (gt_optab, mode, "__gtkf2"); + set_optab_libfunc (ge_optab, mode, "__gekf2"); + set_optab_libfunc (lt_optab, mode, "__ltkf2"); + set_optab_libfunc (le_optab, mode, "__lekf2"); + set_optab_libfunc (unord_optab, mode, "__unordkf2"); + + set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2"); + set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2"); + set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2"); + set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2"); + + set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2"); + if (mode != TFmode && FLOAT128_IBM_P (TFmode)) + set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2"); + + set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2"); + if (mode != TFmode && FLOAT128_IBM_P (TFmode)) + set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2"); + + set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2"); + set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2"); + set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2"); + set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2"); + set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2"); + set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2"); + + set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi"); + set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi"); + set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi"); + set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi"); + + set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf"); + set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf"); + set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf"); + set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf"); + + if (TARGET_POWERPC64) + { + set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti"); + set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti"); + set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf"); + set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf"); + } + } + + else + { + set_optab_libfunc (add_optab, mode, "_q_add"); + set_optab_libfunc (sub_optab, mode, "_q_sub"); + set_optab_libfunc (neg_optab, mode, "_q_neg"); + set_optab_libfunc (smul_optab, mode, "_q_mul"); + set_optab_libfunc (sdiv_optab, mode, "_q_div"); + if (TARGET_PPC_GPOPT) + set_optab_libfunc (sqrt_optab, mode, "_q_sqrt"); + + set_optab_libfunc (eq_optab, mode, "_q_feq"); + set_optab_libfunc (ne_optab, mode, "_q_fne"); + set_optab_libfunc (gt_optab, mode, "_q_fgt"); + set_optab_libfunc (ge_optab, mode, "_q_fge"); + set_optab_libfunc (lt_optab, mode, "_q_flt"); + set_optab_libfunc (le_optab, mode, "_q_fle"); + + set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq"); + set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq"); + set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos"); + set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod"); + set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi"); + set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou"); + set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq"); + set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq"); + } +} + +static void +rs6000_init_libfuncs (void) +{ + /* __float128 support. */ + if (TARGET_FLOAT128_TYPE) + { + init_float128_ibm (IFmode); + init_float128_ieee (KFmode); + } + + /* AIX/Darwin/64-bit Linux quad floating point routines. */ + if (TARGET_LONG_DOUBLE_128) + { + if (!TARGET_IEEEQUAD) + init_float128_ibm (TFmode); + + /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */ + else + init_float128_ieee (TFmode); + } +} + + +/* Expand a block clear operation, and return 1 if successful. Return 0 + if we should let the compiler generate normal code. + + operands[0] is the destination + operands[1] is the length + operands[3] is the alignment */ + +int +expand_block_clear (rtx operands[]) +{ + rtx orig_dest = operands[0]; + rtx bytes_rtx = operands[1]; + rtx align_rtx = operands[3]; + bool constp = (GET_CODE (bytes_rtx) == CONST_INT); + HOST_WIDE_INT align; + HOST_WIDE_INT bytes; + int offset; + int clear_bytes; + int clear_step; + + /* If this is not a fixed size move, just call memcpy */ + if (! constp) + return 0; + + /* This must be a fixed size alignment */ + gcc_assert (GET_CODE (align_rtx) == CONST_INT); + align = INTVAL (align_rtx) * BITS_PER_UNIT; + + /* Anything to clear? */ + bytes = INTVAL (bytes_rtx); + if (bytes <= 0) + return 1; + + /* Use the builtin memset after a point, to avoid huge code bloat. + When optimize_size, avoid any significant code bloat; calling + memset is about 4 instructions, so allow for one instruction to + load zero and three to do clearing. */ + if (TARGET_ALTIVEC && align >= 128) + clear_step = 16; + else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT)) + clear_step = 8; + else if (TARGET_SPE && align >= 64) + clear_step = 8; + else + clear_step = 4; + + if (optimize_size && bytes > 3 * clear_step) + return 0; + if (! optimize_size && bytes > 8 * clear_step) + return 0; + + for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes) + { + machine_mode mode = BLKmode; + rtx dest; + + if (bytes >= 16 && TARGET_ALTIVEC && align >= 128) + { + clear_bytes = 16; + mode = V4SImode; + } + else if (bytes >= 8 && TARGET_SPE && align >= 64) + { + clear_bytes = 8; + mode = V2SImode; + } + else if (bytes >= 8 && TARGET_POWERPC64 + && (align >= 64 || !STRICT_ALIGNMENT)) + { + clear_bytes = 8; + mode = DImode; + if (offset == 0 && align < 64) + { + rtx addr; + + /* If the address form is reg+offset with offset not a + multiple of four, reload into reg indirect form here + rather than waiting for reload. This way we get one + reload, not one per store. */ + addr = XEXP (orig_dest, 0); + if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) + && GET_CODE (XEXP (addr, 1)) == CONST_INT + && (INTVAL (XEXP (addr, 1)) & 3) != 0) + { + addr = copy_addr_to_reg (addr); + orig_dest = replace_equiv_address (orig_dest, addr); + } + } + } + else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) + { /* move 4 bytes */ + clear_bytes = 4; + mode = SImode; + } + else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) + { /* move 2 bytes */ + clear_bytes = 2; + mode = HImode; + } + else /* move 1 byte at a time */ + { + clear_bytes = 1; + mode = QImode; + } + + dest = adjust_address (orig_dest, mode, offset); + + emit_move_insn (dest, CONST0_RTX (mode)); + } + + return 1; +} + +/* Emit a potentially record-form instruction, setting DST from SRC. + If DOT is 0, that is all; otherwise, set CCREG to the result of the + signed comparison of DST with zero. If DOT is 1, the generated RTL + doesn't care about the DST result; if DOT is 2, it does. If CCREG + is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and + a separate COMPARE. */ + +static void +rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg) +{ + if (dot == 0) + { + emit_move_insn (dst, src); + return; + } + + if (cc_reg_not_cr0_operand (ccreg, CCmode)) + { + emit_move_insn (dst, src); + emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx)); + return; + } + + rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx)); + if (dot == 1) + { + rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber))); + } + else + { + rtx set = gen_rtx_SET (dst, src); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set))); + } +} + +/* Figure out the correct instructions to generate to load data for + block compare. MODE is used for the read from memory, and + data is zero extended if REG is wider than MODE. If LE code + is being generated, bswap loads are used. + + REG is the destination register to move the data into. + MEM is the memory block being read. + MODE is the mode of memory to use for the read. */ +static void +do_load_for_compare (rtx reg, rtx mem, machine_mode mode) +{ + switch (GET_MODE (reg)) + { + case DImode: + switch (mode) + { + case QImode: + emit_insn (gen_zero_extendqidi2 (reg, mem)); + break; + case HImode: + { + rtx src = mem; + if (!BYTES_BIG_ENDIAN) + { + src = gen_reg_rtx (HImode); + emit_insn (gen_bswaphi2 (src, mem)); + } + emit_insn (gen_zero_extendhidi2 (reg, src)); + break; + } + case SImode: + { + rtx src = mem; + if (!BYTES_BIG_ENDIAN) + { + src = gen_reg_rtx (SImode); + emit_insn (gen_bswapsi2 (src, mem)); + } + emit_insn (gen_zero_extendsidi2 (reg, src)); + } + break; + case DImode: + if (!BYTES_BIG_ENDIAN) + emit_insn (gen_bswapdi2 (reg, mem)); + else + emit_insn (gen_movdi (reg, mem)); + break; + default: + gcc_unreachable (); + } + break; + + case SImode: + switch (mode) + { + case QImode: + emit_insn (gen_zero_extendqisi2 (reg, mem)); + break; + case HImode: + { + rtx src = mem; + if (!BYTES_BIG_ENDIAN) + { + src = gen_reg_rtx (HImode); + emit_insn (gen_bswaphi2 (src, mem)); + } + emit_insn (gen_zero_extendhisi2 (reg, src)); + break; + } + case SImode: + if (!BYTES_BIG_ENDIAN) + emit_insn (gen_bswapsi2 (reg, mem)); + else + emit_insn (gen_movsi (reg, mem)); + break; + case DImode: + /* DImode is larger than the destination reg so is not expected. */ + gcc_unreachable (); + break; + default: + gcc_unreachable (); + } + break; + default: + gcc_unreachable (); + break; + } +} + +/* Select the mode to be used for reading the next chunk of bytes + in the compare. + + OFFSET is the current read offset from the beginning of the block. + BYTES is the number of bytes remaining to be read. + ALIGN is the minimum alignment of the memory blocks being compared in bytes. + WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is + the largest allowable mode. */ +static machine_mode +select_block_compare_mode (unsigned HOST_WIDE_INT offset, + unsigned HOST_WIDE_INT bytes, + unsigned HOST_WIDE_INT align, bool word_mode_ok) +{ + /* First see if we can do a whole load unit + as that will be more efficient than a larger load + shift. */ + + /* If big, use biggest chunk. + If exactly chunk size, use that size. + If remainder can be done in one piece with shifting, do that. + Do largest chunk possible without violating alignment rules. */ + + /* The most we can read without potential page crossing. */ + unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align); + + if (word_mode_ok && bytes >= UNITS_PER_WORD) + return word_mode; + else if (bytes == GET_MODE_SIZE (SImode)) + return SImode; + else if (bytes == GET_MODE_SIZE (HImode)) + return HImode; + else if (bytes == GET_MODE_SIZE (QImode)) + return QImode; + else if (bytes < GET_MODE_SIZE (SImode) + && offset >= GET_MODE_SIZE (SImode) - bytes) + /* This matches the case were we have SImode and 3 bytes + and offset >= 1 and permits us to move back one and overlap + with the previous read, thus avoiding having to shift + unwanted bytes off of the input. */ + return SImode; + else if (word_mode_ok && bytes < UNITS_PER_WORD + && offset >= UNITS_PER_WORD-bytes) + /* Similarly, if we can use DImode it will get matched here and + can do an overlapping read that ends at the end of the block. */ + return word_mode; + else if (word_mode_ok && maxread >= UNITS_PER_WORD) + /* It is safe to do all remaining in one load of largest size, + possibly with a shift to get rid of unwanted bytes. */ + return word_mode; + else if (maxread >= GET_MODE_SIZE (SImode)) + /* It is safe to do all remaining in one SImode load, + possibly with a shift to get rid of unwanted bytes. */ + return SImode; + else if (bytes > GET_MODE_SIZE (SImode)) + return SImode; + else if (bytes > GET_MODE_SIZE (HImode)) + return HImode; + + /* final fallback is do one byte */ + return QImode; +} + +/* Compute the alignment of pointer+OFFSET where the original alignment + of pointer was BASE_ALIGN. */ +static unsigned HOST_WIDE_INT +compute_current_alignment (unsigned HOST_WIDE_INT base_align, + unsigned HOST_WIDE_INT offset) +{ + if (offset == 0) + return base_align; + return min (base_align, offset & -offset); +} + +/* Expand a block compare operation, and return true if successful. + Return false if we should let the compiler generate normal code, + probably a memcmp call. + + OPERANDS[0] is the target (result). + OPERANDS[1] is the first source. + OPERANDS[2] is the second source. + OPERANDS[3] is the length. + OPERANDS[4] is the alignment. */ +bool +expand_block_compare (rtx operands[]) +{ + rtx target = operands[0]; + rtx orig_src1 = operands[1]; + rtx orig_src2 = operands[2]; + rtx bytes_rtx = operands[3]; + rtx align_rtx = operands[4]; + HOST_WIDE_INT cmp_bytes = 0; + rtx src1 = orig_src1; + rtx src2 = orig_src2; + + /* This case is complicated to handle because the subtract + with carry instructions do not generate the 64-bit + carry and so we must emit code to calculate it ourselves. + We choose not to implement this yet. */ + if (TARGET_32BIT && TARGET_POWERPC64) + return false; + + /* If this is not a fixed size compare, just call memcmp. */ + if (!CONST_INT_P (bytes_rtx)) + return false; + + /* This must be a fixed size alignment. */ + if (!CONST_INT_P (align_rtx)) + return false; + + unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; + + /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ + if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1)) + || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2))) + return false; + + gcc_assert (GET_MODE (target) == SImode); + + /* Anything to move? */ + unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx); + if (bytes == 0) + return true; + + /* The code generated for p7 and older is not faster than glibc + memcmp if alignment is small and length is not short, so bail + out to avoid those conditions. */ + if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED + && ((base_align == 1 && bytes > 16) + || (base_align == 2 && bytes > 32))) + return false; + + rtx tmp_reg_src1 = gen_reg_rtx (word_mode); + rtx tmp_reg_src2 = gen_reg_rtx (word_mode); + /* P7/P8 code uses cond for subfc. but P9 uses + it for cmpld which needs CCUNSmode. */ + rtx cond; + if (TARGET_P9_MISC) + cond = gen_reg_rtx (CCUNSmode); + else + cond = gen_reg_rtx (CCmode); + + /* If we have an LE target without ldbrx and word_mode is DImode, + then we must avoid using word_mode. */ + int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX + && word_mode == DImode); + + /* Strategy phase. How many ops will this take and should we expand it? */ + + unsigned HOST_WIDE_INT offset = 0; + machine_mode load_mode = + select_block_compare_mode (offset, bytes, base_align, word_mode_ok); + unsigned int load_mode_size = GET_MODE_SIZE (load_mode); + + /* We don't want to generate too much code. */ + unsigned HOST_WIDE_INT max_bytes = + load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit; + if (!IN_RANGE (bytes, 1, max_bytes)) + return false; + + bool generate_6432_conversion = false; + rtx convert_label = NULL; + rtx final_label = NULL; + + /* Example of generated code for 18 bytes aligned 1 byte. + Compiled with -fno-reorder-blocks for clarity. + ldbrx 10,31,8 + ldbrx 9,7,8 + subfc. 9,9,10 + bne 0,.L6487 + addi 9,12,8 + addi 5,11,8 + ldbrx 10,0,9 + ldbrx 9,0,5 + subfc. 9,9,10 + bne 0,.L6487 + addi 9,12,16 + lhbrx 10,0,9 + addi 9,11,16 + lhbrx 9,0,9 + subf 9,9,10 + b .L6488 + .p2align 4,,15 + .L6487: #convert_label + popcntd 9,9 + subfe 10,10,10 + or 9,9,10 + .L6488: #final_label + extsw 10,9 + + We start off with DImode for two blocks that jump to the DI->SI conversion + if the difference is found there, then a final block of HImode that skips + the DI->SI conversion. */ + + while (bytes > 0) + { + unsigned int align = compute_current_alignment (base_align, offset); + if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) + load_mode = select_block_compare_mode (offset, bytes, align, + word_mode_ok); + else + load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok); + load_mode_size = GET_MODE_SIZE (load_mode); + if (bytes >= load_mode_size) + cmp_bytes = load_mode_size; + else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) + { + /* Move this load back so it doesn't go past the end. + P8/P9 can do this efficiently. */ + unsigned int extra_bytes = load_mode_size - bytes; + cmp_bytes = bytes; + if (extra_bytes < offset) + { + offset -= extra_bytes; + cmp_bytes = load_mode_size; + bytes = cmp_bytes; + } + } + else + /* P7 and earlier can't do the overlapping load trick fast, + so this forces a non-overlapping load and a shift to get + rid of the extra bytes. */ + cmp_bytes = bytes; + + src1 = adjust_address (orig_src1, load_mode, offset); + src2 = adjust_address (orig_src2, load_mode, offset); + + if (!REG_P (XEXP (src1, 0))) + { + rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); + src1 = replace_equiv_address (src1, src1_reg); + } + set_mem_size (src1, cmp_bytes); + + if (!REG_P (XEXP (src2, 0))) + { + rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); + src2 = replace_equiv_address (src2, src2_reg); + } + set_mem_size (src2, cmp_bytes); + + do_load_for_compare (tmp_reg_src1, src1, load_mode); + do_load_for_compare (tmp_reg_src2, src2, load_mode); + + if (cmp_bytes < load_mode_size) + { + /* Shift unneeded bytes off. */ + rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes)); + if (word_mode == DImode) + { + emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh)); + emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh)); + } + else + { + emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh)); + emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh)); + } + } + + int remain = bytes - cmp_bytes; + if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode)) + { + /* Target is larger than load size so we don't need to + reduce result size. */ + + /* We previously did a block that need 64->32 conversion but + the current block does not, so a label is needed to jump + to the end. */ + if (generate_6432_conversion && !final_label) + final_label = gen_label_rtx (); + + if (remain > 0) + { + /* This is not the last block, branch to the end if the result + of this subtract is not zero. */ + if (!final_label) + final_label = gen_label_rtx (); + rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); + rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); + rtx cr = gen_reg_rtx (CCmode); + rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr); + emit_insn (gen_movsi (target, + gen_lowpart (SImode, tmp_reg_src2))); + rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx); + rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, + fin_ref, pc_rtx); + rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); + JUMP_LABEL (j) = final_label; + LABEL_NUSES (final_label) += 1; + } + else + { + if (word_mode == DImode) + { + emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1, + tmp_reg_src2)); + emit_insn (gen_movsi (target, + gen_lowpart (SImode, tmp_reg_src2))); + } + else + emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2)); + + if (final_label) + { + rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); + rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); + JUMP_LABEL(j) = final_label; + LABEL_NUSES (final_label) += 1; + emit_barrier (); + } + } + } + else + { + /* Do we need a 64->32 conversion block? We need the 64->32 + conversion even if target size == load_mode size because + the subtract generates one extra bit. */ + generate_6432_conversion = true; + + if (remain > 0) + { + if (!convert_label) + convert_label = gen_label_rtx (); + + /* Compare to zero and branch to convert_label if not zero. */ + rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label); + if (TARGET_P9_MISC) + { + /* Generate a compare, and convert with a setb later. */ + rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, + tmp_reg_src2); + emit_insn (gen_rtx_SET (cond, cmp)); + } + else + /* Generate a subfc. and use the longer + sequence for conversion. */ + if (TARGET_64BIT) + emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1, cond)); + else + emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1, cond)); + rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); + rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, + cvt_ref, pc_rtx); + rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); + JUMP_LABEL(j) = convert_label; + LABEL_NUSES (convert_label) += 1; + } + else + { + /* Just do the subtract/compare. Since this is the last block + the convert code will be generated immediately following. */ + if (TARGET_P9_MISC) + { + rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, + tmp_reg_src2); + emit_insn (gen_rtx_SET (cond, cmp)); + } + else + if (TARGET_64BIT) + emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1)); + else + emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1)); + } + } + + offset += cmp_bytes; + bytes -= cmp_bytes; + } + + if (generate_6432_conversion) + { + if (convert_label) + emit_label (convert_label); + + /* We need to produce DI result from sub, then convert to target SI + while maintaining <0 / ==0 / >0 properties. This sequence works: + subfc L,A,B + subfe H,H,H + popcntd L,L + rldimi L,H,6,0 + + This is an alternate one Segher cooked up if somebody + wants to expand this for something that doesn't have popcntd: + subfc L,a,b + subfe H,x,x + addic t,L,-1 + subfe v,t,L + or z,v,H + + And finally, p9 can just do this: + cmpld A,B + setb r */ + + if (TARGET_P9_MISC) + { + emit_insn (gen_setb_unsigned (target, cond)); + } + else + { + if (TARGET_64BIT) + { + rtx tmp_reg_ca = gen_reg_rtx (DImode); + emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca)); + emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2)); + emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca)); + emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2))); + } + else + { + rtx tmp_reg_ca = gen_reg_rtx (SImode); + emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca)); + emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2)); + emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca)); + } + } + } + + if (final_label) + emit_label (final_label); + + gcc_assert (bytes == 0); + return true; +} + +/* Generate alignment check and branch code to set up for + strncmp when we don't have DI alignment. + STRNCMP_LABEL is the label to branch if there is a page crossing. + SRC is the string pointer to be examined. + BYTES is the max number of bytes to compare. */ +static void +expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes) +{ + rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label); + rtx src_check = copy_addr_to_reg (XEXP (src, 0)); + if (GET_MODE (src_check) == SImode) + emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff))); + else + emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff))); + rtx cond = gen_reg_rtx (CCmode); + emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check, + GEN_INT (4096 - bytes))); + + rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx); + + rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, + pc_rtx, lab_ref); + rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); + JUMP_LABEL (j) = strncmp_label; + LABEL_NUSES (strncmp_label) += 1; +} + +/* Expand a string compare operation with length, and return + true if successful. Return false if we should let the + compiler generate normal code, probably a strncmp call. + + OPERANDS[0] is the target (result). + OPERANDS[1] is the first source. + OPERANDS[2] is the second source. + If NO_LENGTH is zero, then: + OPERANDS[3] is the length. + OPERANDS[4] is the alignment in bytes. + If NO_LENGTH is nonzero, then: + OPERANDS[3] is the alignment in bytes. */ +bool +expand_strn_compare (rtx operands[], int no_length) +{ + rtx target = operands[0]; + rtx orig_src1 = operands[1]; + rtx orig_src2 = operands[2]; + rtx bytes_rtx, align_rtx; + if (no_length) + { + bytes_rtx = NULL; + align_rtx = operands[3]; + } + else + { + bytes_rtx = operands[3]; + align_rtx = operands[4]; + } + unsigned HOST_WIDE_INT cmp_bytes = 0; + rtx src1 = orig_src1; + rtx src2 = orig_src2; + + /* If we have a length, it must be constant. This simplifies things + a bit as we don't have to generate code to check if we've exceeded + the length. Later this could be expanded to handle this case. */ + if (!no_length && !CONST_INT_P (bytes_rtx)) + return false; + + /* This must be a fixed size alignment. */ + if (!CONST_INT_P (align_rtx)) + return false; + + unsigned int base_align = UINTVAL (align_rtx); + int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT; + int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT; + + /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ + if (SLOW_UNALIGNED_ACCESS (word_mode, align1) + || SLOW_UNALIGNED_ACCESS (word_mode, align2)) + return false; + + gcc_assert (GET_MODE (target) == SImode); + + /* If we have an LE target without ldbrx and word_mode is DImode, + then we must avoid using word_mode. */ + int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX + && word_mode == DImode); + + unsigned int word_mode_size = GET_MODE_SIZE (word_mode); + + unsigned HOST_WIDE_INT offset = 0; + unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */ + unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */ + if (no_length) + /* Use this as a standin to determine the mode to use. */ + bytes = rs6000_string_compare_inline_limit * word_mode_size; + else + bytes = UINTVAL (bytes_rtx); + + machine_mode load_mode = + select_block_compare_mode (offset, bytes, base_align, word_mode_ok); + unsigned int load_mode_size = GET_MODE_SIZE (load_mode); + compare_length = rs6000_string_compare_inline_limit * load_mode_size; + + /* If we have equality at the end of the last compare and we have not + found the end of the string, we need to call strcmp/strncmp to + compare the remainder. */ + bool equality_compare_rest = false; + + if (no_length) + { + bytes = compare_length; + equality_compare_rest = true; + } + else + { + if (bytes <= compare_length) + compare_length = bytes; + else + equality_compare_rest = true; + } + + rtx result_reg = gen_reg_rtx (word_mode); + rtx final_move_label = gen_label_rtx (); + rtx final_label = gen_label_rtx (); + rtx begin_compare_label = NULL; + + if (base_align < 8) + { + /* Generate code that checks distance to 4k boundary for this case. */ + begin_compare_label = gen_label_rtx (); + rtx strncmp_label = gen_label_rtx (); + rtx jmp; + + /* Strncmp for power8 in glibc does this: + rldicl r8,r3,0,52 + cmpldi cr7,r8,4096-16 + bgt cr7,L(pagecross) */ + + /* Make sure that the length we use for the alignment test and + the subsequent code generation are in agreement so we do not + go past the length we tested for a 4k boundary crossing. */ + unsigned HOST_WIDE_INT align_test = compare_length; + if (align_test < 8) + { + align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test); + base_align = align_test; + } + else + { + align_test = ROUND_UP (align_test, 8); + base_align = 8; + } + + if (align1 < 8) + expand_strncmp_align_check (strncmp_label, src1, align_test); + if (align2 < 8) + expand_strncmp_align_check (strncmp_label, src2, align_test); + + /* Now generate the following sequence: + - branch to begin_compare + - strncmp_label + - call to strncmp + - branch to final_label + - begin_compare_label */ + + rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label); + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref)); + JUMP_LABEL (jmp) = begin_compare_label; + LABEL_NUSES (begin_compare_label) += 1; + emit_barrier (); + + emit_label (strncmp_label); + + if (!REG_P (XEXP (src1, 0))) + { + rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); + src1 = replace_equiv_address (src1, src1_reg); + } + + if (!REG_P (XEXP (src2, 0))) + { + rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); + src2 = replace_equiv_address (src2, src2_reg); + } + + if (no_length) + { + tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + target, LCT_NORMAL, GET_MODE (target), 2, + force_reg (Pmode, XEXP (src1, 0)), Pmode, + force_reg (Pmode, XEXP (src2, 0)), Pmode); + } + else + { + /* -m32 -mpowerpc64 results in word_mode being DImode even + though otherwise it is 32-bit. The length arg to strncmp + is a size_t which will be the same size as pointers. */ + rtx len_rtx; + if (TARGET_64BIT) + len_rtx = gen_reg_rtx (DImode); + else + len_rtx = gen_reg_rtx (SImode); + + emit_move_insn (len_rtx, bytes_rtx); + + tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + target, LCT_NORMAL, GET_MODE (target), 3, + force_reg (Pmode, XEXP (src1, 0)), Pmode, + force_reg (Pmode, XEXP (src2, 0)), Pmode, + len_rtx, GET_MODE (len_rtx)); + } + + rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); + JUMP_LABEL (jmp) = final_label; + LABEL_NUSES (final_label) += 1; + emit_barrier (); + emit_label (begin_compare_label); + } + + rtx cleanup_label = NULL; + rtx tmp_reg_src1 = gen_reg_rtx (word_mode); + rtx tmp_reg_src2 = gen_reg_rtx (word_mode); + + /* Generate sequence of ld/ldbrx, cmpb to compare out + to the length specified. */ + unsigned HOST_WIDE_INT bytes_to_compare = compare_length; + while (bytes_to_compare > 0) + { + /* Compare sequence: + check each 8B with: ld/ld cmpd bne + If equal, use rldicr/cmpb to check for zero byte. + cleanup code at end: + cmpb get byte that differs + cmpb look for zero byte + orc combine + cntlzd get bit of first zero/diff byte + subfic convert for rldcl use + rldcl rldcl extract diff/zero byte + subf subtract for final result + + The last compare can branch around the cleanup code if the + result is zero because the strings are exactly equal. */ + unsigned int align = compute_current_alignment (base_align, offset); + if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) + load_mode = select_block_compare_mode (offset, bytes_to_compare, align, + word_mode_ok); + else + load_mode = select_block_compare_mode (0, bytes_to_compare, align, + word_mode_ok); + load_mode_size = GET_MODE_SIZE (load_mode); + if (bytes_to_compare >= load_mode_size) + cmp_bytes = load_mode_size; + else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) + { + /* Move this load back so it doesn't go past the end. + P8/P9 can do this efficiently. */ + unsigned int extra_bytes = load_mode_size - bytes_to_compare; + cmp_bytes = bytes_to_compare; + if (extra_bytes < offset) + { + offset -= extra_bytes; + cmp_bytes = load_mode_size; + bytes_to_compare = cmp_bytes; + } + } + else + /* P7 and earlier can't do the overlapping load trick fast, + so this forces a non-overlapping load and a shift to get + rid of the extra bytes. */ + cmp_bytes = bytes_to_compare; + + src1 = adjust_address (orig_src1, load_mode, offset); + src2 = adjust_address (orig_src2, load_mode, offset); + + if (!REG_P (XEXP (src1, 0))) + { + rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); + src1 = replace_equiv_address (src1, src1_reg); + } + set_mem_size (src1, cmp_bytes); + + if (!REG_P (XEXP (src2, 0))) + { + rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); + src2 = replace_equiv_address (src2, src2_reg); + } + set_mem_size (src2, cmp_bytes); + + do_load_for_compare (tmp_reg_src1, src1, load_mode); + do_load_for_compare (tmp_reg_src2, src2, load_mode); + + /* We must always left-align the data we read, and + clear any bytes to the right that are beyond the string. + Otherwise the cmpb sequence won't produce the correct + results. The beginning of the compare will be done + with word_mode so will not have any extra shifts or + clear rights. */ + + if (load_mode_size < word_mode_size) + { + /* Rotate left first. */ + rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size)); + if (word_mode == DImode) + { + emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh)); + emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh)); + } + else + { + emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh)); + emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh)); + } + } + + if (cmp_bytes < word_mode_size) + { + /* Now clear right. This plus the rotate can be + turned into a rldicr instruction. */ + HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes); + rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); + if (word_mode == DImode) + { + emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); + emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); + } + else + { + emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); + emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); + } + } + + /* Cases to handle. A and B are chunks of the two strings. + 1: Not end of comparison: + A != B: branch to cleanup code to compute result. + A == B: check for 0 byte, next block if not found. + 2: End of the inline comparison: + A != B: branch to cleanup code to compute result. + A == B: check for 0 byte, call strcmp/strncmp + 3: compared requested N bytes: + A == B: branch to result 0. + A != B: cleanup code to compute result. */ + + unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes; + + rtx dst_label; + if (remain > 0 || equality_compare_rest) + { + /* Branch to cleanup code, otherwise fall through to do + more compares. */ + if (!cleanup_label) + cleanup_label = gen_label_rtx (); + dst_label = cleanup_label; + } + else + /* Branch to end and produce result of 0. */ + dst_label = final_move_label; + + rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label); + rtx cond = gen_reg_rtx (CCmode); + + /* Always produce the 0 result, it is needed if + cmpb finds a 0 byte in this chunk. */ + rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); + rs6000_emit_dot_insn (result_reg, tmp, 1, cond); + + rtx cmp_rtx; + if (remain == 0 && !equality_compare_rest) + cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx); + else + cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); + + rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, + lab_ref, pc_rtx); + rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); + JUMP_LABEL (j) = dst_label; + LABEL_NUSES (dst_label) += 1; + + if (remain > 0 || equality_compare_rest) + { + /* Generate a cmpb to test for a 0 byte and branch + to final result if found. */ + rtx cmpb_zero = gen_reg_rtx (word_mode); + rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label); + rtx condz = gen_reg_rtx (CCmode); + rtx zero_reg = gen_reg_rtx (word_mode); + if (word_mode == SImode) + { + emit_insn (gen_movsi (zero_reg, GEN_INT (0))); + emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); + if (cmp_bytes < word_mode_size) + { + /* Don't want to look at zero bytes past end. */ + HOST_WIDE_INT mb = + BITS_PER_UNIT * (word_mode_size - cmp_bytes); + rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); + emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask)); + } + } + else + { + emit_insn (gen_movdi (zero_reg, GEN_INT (0))); + emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); + if (cmp_bytes < word_mode_size) + { + /* Don't want to look at zero bytes past end. */ + HOST_WIDE_INT mb = + BITS_PER_UNIT * (word_mode_size - cmp_bytes); + rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); + emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask)); + } + } + + emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg)); + rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx); + rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx, + lab_ref_fin, pc_rtx); + rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); + JUMP_LABEL (j2) = final_move_label; + LABEL_NUSES (final_move_label) += 1; + + } + + offset += cmp_bytes; + bytes_to_compare -= cmp_bytes; + } + + if (equality_compare_rest) + { + /* Update pointers past what has been compared already. */ + src1 = adjust_address (orig_src1, load_mode, offset); + src2 = adjust_address (orig_src2, load_mode, offset); + + if (!REG_P (XEXP (src1, 0))) + { + rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); + src1 = replace_equiv_address (src1, src1_reg); + } + set_mem_size (src1, cmp_bytes); + + if (!REG_P (XEXP (src2, 0))) + { + rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); + src2 = replace_equiv_address (src2, src2_reg); + } + set_mem_size (src2, cmp_bytes); + + /* Construct call to strcmp/strncmp to compare the rest of the string. */ + if (no_length) + { + tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + target, LCT_NORMAL, GET_MODE (target), 2, + force_reg (Pmode, XEXP (src1, 0)), Pmode, + force_reg (Pmode, XEXP (src2, 0)), Pmode); + } + else + { + rtx len_rtx; + if (TARGET_64BIT) + len_rtx = gen_reg_rtx (DImode); + else + len_rtx = gen_reg_rtx (SImode); + + emit_move_insn (len_rtx, GEN_INT (bytes - compare_length)); + tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + target, LCT_NORMAL, GET_MODE (target), 3, + force_reg (Pmode, XEXP (src1, 0)), Pmode, + force_reg (Pmode, XEXP (src2, 0)), Pmode, + len_rtx, GET_MODE (len_rtx)); + } + + rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); + rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); + JUMP_LABEL (jmp) = final_label; + LABEL_NUSES (final_label) += 1; + emit_barrier (); + } + + if (cleanup_label) + emit_label (cleanup_label); + + /* Generate the final sequence that identifies the differing + byte and generates the final result, taking into account + zero bytes: + + cmpb cmpb_result1, src1, src2 + cmpb cmpb_result2, src1, zero + orc cmpb_result1, cmp_result1, cmpb_result2 + cntlzd get bit of first zero/diff byte + addi convert for rldcl use + rldcl rldcl extract diff/zero byte + subf subtract for final result + */ + + rtx cmpb_diff = gen_reg_rtx (word_mode); + rtx cmpb_zero = gen_reg_rtx (word_mode); + rtx rot_amt = gen_reg_rtx (word_mode); + rtx zero_reg = gen_reg_rtx (word_mode); + + rtx rot1_1 = gen_reg_rtx (word_mode); + rtx rot1_2 = gen_reg_rtx (word_mode); + rtx rot2_1 = gen_reg_rtx (word_mode); + rtx rot2_2 = gen_reg_rtx (word_mode); + + if (word_mode == SImode) + { + emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); + emit_insn (gen_movsi (zero_reg, GEN_INT (0))); + emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); + emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff)); + emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero)); + emit_insn (gen_clzsi2 (rot_amt, cmpb_diff)); + emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8))); + emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1, + gen_lowpart (SImode, rot_amt))); + emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); + emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2, + gen_lowpart (SImode, rot_amt))); + emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); + emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2)); + } + else + { + emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); + emit_insn (gen_movdi (zero_reg, GEN_INT (0))); + emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); + emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff)); + emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero)); + emit_insn (gen_clzdi2 (rot_amt, cmpb_diff)); + emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8))); + emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1, + gen_lowpart (SImode, rot_amt))); + emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); + emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2, + gen_lowpart (SImode, rot_amt))); + emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); + emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2)); + } + + emit_label (final_move_label); + emit_insn (gen_movsi (target, + gen_lowpart (SImode, result_reg))); + emit_label (final_label); + return true; +} + +/* Expand a block move operation, and return 1 if successful. Return 0 + if we should let the compiler generate normal code. + + operands[0] is the destination + operands[1] is the source + operands[2] is the length + operands[3] is the alignment */ + +#define MAX_MOVE_REG 4 + +int +expand_block_move (rtx operands[]) +{ + rtx orig_dest = operands[0]; + rtx orig_src = operands[1]; + rtx bytes_rtx = operands[2]; + rtx align_rtx = operands[3]; + int constp = (GET_CODE (bytes_rtx) == CONST_INT); + int align; + int bytes; + int offset; + int move_bytes; + rtx stores[MAX_MOVE_REG]; + int num_reg = 0; + + /* If this is not a fixed size move, just call memcpy */ + if (! constp) + return 0; + + /* This must be a fixed size alignment */ + gcc_assert (GET_CODE (align_rtx) == CONST_INT); + align = INTVAL (align_rtx) * BITS_PER_UNIT; + + /* Anything to move? */ + bytes = INTVAL (bytes_rtx); + if (bytes <= 0) + return 1; + + if (bytes > rs6000_block_move_inline_limit) + return 0; + + for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes) + { + union { + rtx (*movmemsi) (rtx, rtx, rtx, rtx); + rtx (*mov) (rtx, rtx); + } gen_func; + machine_mode mode = BLKmode; + rtx src, dest; + + /* Altivec first, since it will be faster than a string move + when it applies, and usually not significantly larger. */ + if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) + { + move_bytes = 16; + mode = V4SImode; + gen_func.mov = gen_movv4si; + } + else if (TARGET_SPE && bytes >= 8 && align >= 64) + { + move_bytes = 8; + mode = V2SImode; + gen_func.mov = gen_movv2si; + } + else if (TARGET_STRING + && bytes > 24 /* move up to 32 bytes at a time */ + && ! fixed_regs[5] + && ! fixed_regs[6] + && ! fixed_regs[7] + && ! fixed_regs[8] + && ! fixed_regs[9] + && ! fixed_regs[10] + && ! fixed_regs[11] + && ! fixed_regs[12]) + { + move_bytes = (bytes > 32) ? 32 : bytes; + gen_func.movmemsi = gen_movmemsi_8reg; + } + else if (TARGET_STRING + && bytes > 16 /* move up to 24 bytes at a time */ + && ! fixed_regs[5] + && ! fixed_regs[6] + && ! fixed_regs[7] + && ! fixed_regs[8] + && ! fixed_regs[9] + && ! fixed_regs[10]) + { + move_bytes = (bytes > 24) ? 24 : bytes; + gen_func.movmemsi = gen_movmemsi_6reg; + } + else if (TARGET_STRING + && bytes > 8 /* move up to 16 bytes at a time */ + && ! fixed_regs[5] + && ! fixed_regs[6] + && ! fixed_regs[7] + && ! fixed_regs[8]) + { + move_bytes = (bytes > 16) ? 16 : bytes; + gen_func.movmemsi = gen_movmemsi_4reg; + } + else if (bytes >= 8 && TARGET_POWERPC64 + && (align >= 64 || !STRICT_ALIGNMENT)) + { + move_bytes = 8; + mode = DImode; + gen_func.mov = gen_movdi; + if (offset == 0 && align < 64) + { + rtx addr; + + /* If the address form is reg+offset with offset not a + multiple of four, reload into reg indirect form here + rather than waiting for reload. This way we get one + reload, not one per load and/or store. */ + addr = XEXP (orig_dest, 0); + if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) + && GET_CODE (XEXP (addr, 1)) == CONST_INT + && (INTVAL (XEXP (addr, 1)) & 3) != 0) + { + addr = copy_addr_to_reg (addr); + orig_dest = replace_equiv_address (orig_dest, addr); + } + addr = XEXP (orig_src, 0); + if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) + && GET_CODE (XEXP (addr, 1)) == CONST_INT + && (INTVAL (XEXP (addr, 1)) & 3) != 0) + { + addr = copy_addr_to_reg (addr); + orig_src = replace_equiv_address (orig_src, addr); + } + } + } + else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64) + { /* move up to 8 bytes at a time */ + move_bytes = (bytes > 8) ? 8 : bytes; + gen_func.movmemsi = gen_movmemsi_2reg; + } + else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) + { /* move 4 bytes */ + move_bytes = 4; + mode = SImode; + gen_func.mov = gen_movsi; + } + else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) + { /* move 2 bytes */ + move_bytes = 2; + mode = HImode; + gen_func.mov = gen_movhi; + } + else if (TARGET_STRING && bytes > 1) + { /* move up to 4 bytes at a time */ + move_bytes = (bytes > 4) ? 4 : bytes; + gen_func.movmemsi = gen_movmemsi_1reg; + } + else /* move 1 byte at a time */ + { + move_bytes = 1; + mode = QImode; + gen_func.mov = gen_movqi; + } + + src = adjust_address (orig_src, mode, offset); + dest = adjust_address (orig_dest, mode, offset); + + if (mode != BLKmode) + { + rtx tmp_reg = gen_reg_rtx (mode); + + emit_insn ((*gen_func.mov) (tmp_reg, src)); + stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg); + } + + if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes) + { + int i; + for (i = 0; i < num_reg; i++) + emit_insn (stores[i]); + num_reg = 0; + } + + if (mode == BLKmode) + { + /* Move the address into scratch registers. The movmemsi + patterns require zero offset. */ + if (!REG_P (XEXP (src, 0))) + { + rtx src_reg = copy_addr_to_reg (XEXP (src, 0)); + src = replace_equiv_address (src, src_reg); + } + set_mem_size (src, move_bytes); + + if (!REG_P (XEXP (dest, 0))) + { + rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0)); + dest = replace_equiv_address (dest, dest_reg); + } + set_mem_size (dest, move_bytes); + + emit_insn ((*gen_func.movmemsi) (dest, src, + GEN_INT (move_bytes & 31), + align_rtx)); + } + } + + return 1; +} + + +/* Return a string to perform a load_multiple operation. + operands[0] is the vector. + operands[1] is the source address. + operands[2] is the first destination register. */ + +const char * +rs6000_output_load_multiple (rtx operands[3]) +{ + /* We have to handle the case where the pseudo used to contain the address + is assigned to one of the output registers. */ + int i, j; + int words = XVECLEN (operands[0], 0); + rtx xop[10]; + + if (XVECLEN (operands[0], 0) == 1) + return "lwz %2,0(%1)"; + + for (i = 0; i < words; i++) + if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1])) + { + if (i == words-1) + { + xop[0] = GEN_INT (4 * (words-1)); + xop[1] = operands[1]; + xop[2] = operands[2]; + output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop); + return ""; + } + else if (i == 0) + { + xop[0] = GEN_INT (4 * (words-1)); + xop[1] = operands[1]; + xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop); + return ""; + } + else + { + for (j = 0; j < words; j++) + if (j != i) + { + xop[0] = GEN_INT (j * 4); + xop[1] = operands[1]; + xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j); + output_asm_insn ("lwz %2,%0(%1)", xop); + } + xop[0] = GEN_INT (i * 4); + xop[1] = operands[1]; + output_asm_insn ("lwz %1,%0(%1)", xop); + return ""; + } + } + + return "lswi %2,%1,%N0"; +} + + +/* A validation routine: say whether CODE, a condition code, and MODE + match. The other alternatives either don't make sense or should + never be generated. */ + +void +validate_condition_mode (enum rtx_code code, machine_mode mode) +{ + gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE + || GET_RTX_CLASS (code) == RTX_COMM_COMPARE) + && GET_MODE_CLASS (mode) == MODE_CC); + + /* These don't make sense. */ + gcc_assert ((code != GT && code != LT && code != GE && code != LE) + || mode != CCUNSmode); + + gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU) + || mode == CCUNSmode); + + gcc_assert (mode == CCFPmode + || (code != ORDERED && code != UNORDERED + && code != UNEQ && code != LTGT + && code != UNGT && code != UNLT + && code != UNGE && code != UNLE)); + + /* These should never be generated except for + flag_finite_math_only. */ + gcc_assert (mode != CCFPmode + || flag_finite_math_only + || (code != LE && code != GE + && code != UNEQ && code != LTGT + && code != UNGT && code != UNLT)); + + /* These are invalid; the information is not there. */ + gcc_assert (mode != CCEQmode || code == EQ || code == NE); +} + + +/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, + rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is + not zero, store there the bit offset (counted from the right) where + the single stretch of 1 bits begins; and similarly for B, the bit + offset where it ends. */ + +bool +rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode) +{ + unsigned HOST_WIDE_INT val = INTVAL (mask); + unsigned HOST_WIDE_INT bit; + int nb, ne; + int n = GET_MODE_PRECISION (mode); + + if (mode != DImode && mode != SImode) + return false; + + if (INTVAL (mask) >= 0) + { + bit = val & -val; + ne = exact_log2 (bit); + nb = exact_log2 (val + bit); + } + else if (val + 1 == 0) + { + nb = n; + ne = 0; + } + else if (val & 1) + { + val = ~val; + bit = val & -val; + nb = exact_log2 (bit); + ne = exact_log2 (val + bit); + } + else + { + bit = val & -val; + ne = exact_log2 (bit); + if (val + bit == 0) + nb = n; + else + nb = 0; + } + + nb--; + + if (nb < 0 || ne < 0 || nb >= n || ne >= n) + return false; + + if (b) + *b = nb; + if (e) + *e = ne; + + return true; +} + +/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl, + or rldicr instruction, to implement an AND with it in mode MODE. */ + +bool +rs6000_is_valid_and_mask (rtx mask, machine_mode mode) +{ + int nb, ne; + + if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) + return false; + + /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that + does not wrap. */ + if (mode == DImode) + return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb)); + + /* For SImode, rlwinm can do everything. */ + if (mode == SImode) + return (nb < 32 && ne < 32); + + return false; +} + +/* Return the instruction template for an AND with mask in mode MODE, with + operands OPERANDS. If DOT is true, make it a record-form instruction. */ + +const char * +rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot) +{ + int nb, ne; + + if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode)) + gcc_unreachable (); + + if (mode == DImode && ne == 0) + { + operands[3] = GEN_INT (63 - nb); + if (dot) + return "rldicl. %0,%1,0,%3"; + return "rldicl %0,%1,0,%3"; + } + + if (mode == DImode && nb == 63) + { + operands[3] = GEN_INT (63 - ne); + if (dot) + return "rldicr. %0,%1,0,%3"; + return "rldicr %0,%1,0,%3"; + } + + if (nb < 32 && ne < 32) + { + operands[3] = GEN_INT (31 - nb); + operands[4] = GEN_INT (31 - ne); + if (dot) + return "rlwinm. %0,%1,0,%3,%4"; + return "rlwinm %0,%1,0,%3,%4"; + } + + gcc_unreachable (); +} + +/* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm, + rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with + shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */ + +bool +rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode) +{ + int nb, ne; + + if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) + return false; + + int n = GET_MODE_PRECISION (mode); + int sh = -1; + + if (CONST_INT_P (XEXP (shift, 1))) + { + sh = INTVAL (XEXP (shift, 1)); + if (sh < 0 || sh >= n) + return false; + } + + rtx_code code = GET_CODE (shift); + + /* Convert any shift by 0 to a rotate, to simplify below code. */ + if (sh == 0) + code = ROTATE; + + /* Convert rotate to simple shift if we can, to make analysis simpler. */ + if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) + code = ASHIFT; + if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) + { + code = LSHIFTRT; + sh = n - sh; + } + + /* DImode rotates need rld*. */ + if (mode == DImode && code == ROTATE) + return (nb == 63 || ne == 0 || ne == sh); + + /* SImode rotates need rlw*. */ + if (mode == SImode && code == ROTATE) + return (nb < 32 && ne < 32 && sh < 32); + + /* Wrap-around masks are only okay for rotates. */ + if (ne > nb) + return false; + + /* Variable shifts are only okay for rotates. */ + if (sh < 0) + return false; + + /* Don't allow ASHIFT if the mask is wrong for that. */ + if (code == ASHIFT && ne < sh) + return false; + + /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT + if the mask is wrong for that. */ + if (nb < 32 && ne < 32 && sh < 32 + && !(code == LSHIFTRT && nb >= 32 - sh)) + return true; + + /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT + if the mask is wrong for that. */ + if (code == LSHIFTRT) + sh = 64 - sh; + if (nb == 63 || ne == 0 || ne == sh) + return !(code == LSHIFTRT && nb >= sh); + + return false; +} + +/* Return the instruction template for a shift with mask in mode MODE, with + operands OPERANDS. If DOT is true, make it a record-form instruction. */ + +const char * +rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot) +{ + int nb, ne; + + if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) + gcc_unreachable (); + + if (mode == DImode && ne == 0) + { + if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) + operands[2] = GEN_INT (64 - INTVAL (operands[2])); + operands[3] = GEN_INT (63 - nb); + if (dot) + return "rld%I2cl. %0,%1,%2,%3"; + return "rld%I2cl %0,%1,%2,%3"; + } + + if (mode == DImode && nb == 63) + { + operands[3] = GEN_INT (63 - ne); + if (dot) + return "rld%I2cr. %0,%1,%2,%3"; + return "rld%I2cr %0,%1,%2,%3"; + } + + if (mode == DImode + && GET_CODE (operands[4]) != LSHIFTRT + && CONST_INT_P (operands[2]) + && ne == INTVAL (operands[2])) + { + operands[3] = GEN_INT (63 - nb); + if (dot) + return "rld%I2c. %0,%1,%2,%3"; + return "rld%I2c %0,%1,%2,%3"; + } + + if (nb < 32 && ne < 32) + { + if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + operands[3] = GEN_INT (31 - nb); + operands[4] = GEN_INT (31 - ne); + /* This insn can also be a 64-bit rotate with mask that really makes + it just a shift right (with mask); the %h below are to adjust for + that situation (shift count is >= 32 in that case). */ + if (dot) + return "rlw%I2nm. %0,%1,%h2,%3,%4"; + return "rlw%I2nm %0,%1,%h2,%3,%4"; + } + + gcc_unreachable (); +} + +/* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or + rldimi instruction, to implement an insert with shift SHIFT (a ROTATE, + ASHIFT, or LSHIFTRT) in mode MODE. */ + +bool +rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode) +{ + int nb, ne; + + if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) + return false; + + int n = GET_MODE_PRECISION (mode); + + int sh = INTVAL (XEXP (shift, 1)); + if (sh < 0 || sh >= n) + return false; + + rtx_code code = GET_CODE (shift); + + /* Convert any shift by 0 to a rotate, to simplify below code. */ + if (sh == 0) + code = ROTATE; + + /* Convert rotate to simple shift if we can, to make analysis simpler. */ + if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) + code = ASHIFT; + if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) + { + code = LSHIFTRT; + sh = n - sh; + } + + /* DImode rotates need rldimi. */ + if (mode == DImode && code == ROTATE) + return (ne == sh); + + /* SImode rotates need rlwimi. */ + if (mode == SImode && code == ROTATE) + return (nb < 32 && ne < 32 && sh < 32); + + /* Wrap-around masks are only okay for rotates. */ + if (ne > nb) + return false; + + /* Don't allow ASHIFT if the mask is wrong for that. */ + if (code == ASHIFT && ne < sh) + return false; + + /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT + if the mask is wrong for that. */ + if (nb < 32 && ne < 32 && sh < 32 + && !(code == LSHIFTRT && nb >= 32 - sh)) + return true; + + /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT + if the mask is wrong for that. */ + if (code == LSHIFTRT) + sh = 64 - sh; + if (ne == sh) + return !(code == LSHIFTRT && nb >= sh); + + return false; +} + +/* Return the instruction template for an insert with mask in mode MODE, with + operands OPERANDS. If DOT is true, make it a record-form instruction. */ + +const char * +rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot) +{ + int nb, ne; + + if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) + gcc_unreachable (); + + /* Prefer rldimi because rlwimi is cracked. */ + if (TARGET_POWERPC64 + && (!dot || mode == DImode) + && GET_CODE (operands[4]) != LSHIFTRT + && ne == INTVAL (operands[2])) + { + operands[3] = GEN_INT (63 - nb); + if (dot) + return "rldimi. %0,%1,%2,%3"; + return "rldimi %0,%1,%2,%3"; + } + + if (nb < 32 && ne < 32) + { + if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + operands[3] = GEN_INT (31 - nb); + operands[4] = GEN_INT (31 - ne); + if (dot) + return "rlwimi. %0,%1,%2,%3,%4"; + return "rlwimi %0,%1,%2,%3,%4"; + } + + gcc_unreachable (); +} + +/* Return whether an AND with C (a CONST_INT) in mode MODE can be done + using two machine instructions. */ + +bool +rs6000_is_valid_2insn_and (rtx c, machine_mode mode) +{ + /* There are two kinds of AND we can handle with two insns: + 1) those we can do with two rl* insn; + 2) ori[s];xori[s]. + + We do not handle that last case yet. */ + + /* If there is just one stretch of ones, we can do it. */ + if (rs6000_is_valid_mask (c, NULL, NULL, mode)) + return true; + + /* Otherwise, fill in the lowest "hole"; if we can do the result with + one insn, we can do the whole thing with two. */ + unsigned HOST_WIDE_INT val = INTVAL (c); + unsigned HOST_WIDE_INT bit1 = val & -val; + unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; + unsigned HOST_WIDE_INT val1 = (val + bit1) & val; + unsigned HOST_WIDE_INT bit3 = val1 & -val1; + return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode); +} + +/* Emit the two insns to do an AND in mode MODE, with operands OPERANDS. + If EXPAND is true, split rotate-and-mask instructions we generate to + their constituent parts as well (this is used during expand); if DOT + is 1, make the last insn a record-form instruction clobbering the + destination GPR and setting the CC reg (from operands[3]); if 2, set + that GPR as well as the CC reg. */ + +void +rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot) +{ + gcc_assert (!(expand && dot)); + + unsigned HOST_WIDE_INT val = INTVAL (operands[2]); + + /* If it is one stretch of ones, it is DImode; shift left, mask, then + shift right. This generates better code than doing the masks without + shifts, or shifting first right and then left. */ + int nb, ne; + if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne) + { + gcc_assert (mode == DImode); + + int shift = 63 - nb; + if (expand) + { + rtx tmp1 = gen_reg_rtx (DImode); + rtx tmp2 = gen_reg_rtx (DImode); + emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift))); + emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift))); + emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift))); + } + else + { + rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift)); + tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift)); + emit_move_insn (operands[0], tmp); + tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift)); + rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); + } + return; + } + + /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1 + that does the rest. */ + unsigned HOST_WIDE_INT bit1 = val & -val; + unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; + unsigned HOST_WIDE_INT val1 = (val + bit1) & val; + unsigned HOST_WIDE_INT bit3 = val1 & -val1; + + unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1; + unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2; + + gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode)); + + /* Two "no-rotate"-and-mask instructions, for SImode. */ + if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode)) + { + gcc_assert (mode == SImode); + + rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; + rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1)); + emit_move_insn (reg, tmp); + tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); + rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); + return; + } + + gcc_assert (mode == DImode); + + /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm + insns; we have to do the first in SImode, because it wraps. */ + if (mask2 <= 0xffffffff + && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode)) + { + rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; + rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]), + GEN_INT (mask1)); + rtx reg_low = gen_lowpart (SImode, reg); + emit_move_insn (reg_low, tmp); + tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); + rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); + return; + } + + /* Two rld* insns: rotate, clear the hole in the middle (which now is + at the top end), rotate back and clear the other hole. */ + int right = exact_log2 (bit3); + int left = 64 - right; + + /* Rotate the mask too. */ + mask1 = (mask1 >> right) | ((bit2 - 1) << left); + + if (expand) + { + rtx tmp1 = gen_reg_rtx (DImode); + rtx tmp2 = gen_reg_rtx (DImode); + rtx tmp3 = gen_reg_rtx (DImode); + emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left))); + emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1))); + emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right))); + emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2))); + } + else + { + rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left)); + tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1)); + emit_move_insn (operands[0], tmp); + tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right)); + tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2)); + rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); + } +} + +/* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates + for lfq and stfq insns iff the registers are hard registers. */ + +int +registers_ok_for_quad_peep (rtx reg1, rtx reg2) +{ + /* We might have been passed a SUBREG. */ + if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) + return 0; + + /* We might have been passed non floating point registers. */ + if (!FP_REGNO_P (REGNO (reg1)) + || !FP_REGNO_P (REGNO (reg2))) + return 0; + + return (REGNO (reg1) == REGNO (reg2) - 1); +} + +/* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn. + addr1 and addr2 must be in consecutive memory locations + (addr2 == addr1 + 8). */ + +int +mems_ok_for_quad_peep (rtx mem1, rtx mem2) +{ + rtx addr1, addr2; + unsigned int reg1, reg2; + int offset1, offset2; + + /* The mems cannot be volatile. */ + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) + return 0; + + addr1 = XEXP (mem1, 0); + addr2 = XEXP (mem2, 0); + + /* Extract an offset (if used) from the first addr. */ + if (GET_CODE (addr1) == PLUS) + { + /* If not a REG, return zero. */ + if (GET_CODE (XEXP (addr1, 0)) != REG) + return 0; + else + { + reg1 = REGNO (XEXP (addr1, 0)); + /* The offset must be constant! */ + if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) + return 0; + offset1 = INTVAL (XEXP (addr1, 1)); + } + } + else if (GET_CODE (addr1) != REG) + return 0; + else + { + reg1 = REGNO (addr1); + /* This was a simple (mem (reg)) expression. Offset is 0. */ + offset1 = 0; + } + + /* And now for the second addr. */ + if (GET_CODE (addr2) == PLUS) + { + /* If not a REG, return zero. */ + if (GET_CODE (XEXP (addr2, 0)) != REG) + return 0; + else + { + reg2 = REGNO (XEXP (addr2, 0)); + /* The offset must be constant. */ + if (GET_CODE (XEXP (addr2, 1)) != CONST_INT) + return 0; + offset2 = INTVAL (XEXP (addr2, 1)); + } + } + else if (GET_CODE (addr2) != REG) + return 0; + else + { + reg2 = REGNO (addr2); + /* This was a simple (mem (reg)) expression. Offset is 0. */ + offset2 = 0; + } + + /* Both of these must have the same base register. */ + if (reg1 != reg2) + return 0; + + /* The offset for the second addr must be 8 more than the first addr. */ + if (offset2 != offset1 + 8) + return 0; + + /* All the tests passed. addr1 and addr2 are valid for lfq or stfq + instructions. */ + return 1; +} + + +rtx +rs6000_secondary_memory_needed_rtx (machine_mode mode) +{ + static bool eliminated = false; + rtx ret; + + if (mode != SDmode || TARGET_NO_SDMODE_STACK) + ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); + else + { + rtx mem = cfun->machine->sdmode_stack_slot; + gcc_assert (mem != NULL_RTX); + + if (!eliminated) + { + mem = eliminate_regs (mem, VOIDmode, NULL_RTX); + cfun->machine->sdmode_stack_slot = mem; + eliminated = true; + } + ret = mem; + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n", + GET_MODE_NAME (mode)); + if (!ret) + fprintf (stderr, "\tNULL_RTX\n"); + else + debug_rtx (ret); + } + + return ret; +} + +/* Return the mode to be used for memory when a secondary memory + location is needed. For SDmode values we need to use DDmode, in + all other cases we can use the same mode. */ +machine_mode +rs6000_secondary_memory_needed_mode (machine_mode mode) +{ + if (lra_in_progress && mode == SDmode) + return DDmode; + return mode; +} + +static tree +rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) +{ + /* Don't walk into types. */ + if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp)) + { + *walk_subtrees = 0; + return NULL_TREE; + } + + switch (TREE_CODE (*tp)) + { + case VAR_DECL: + case PARM_DECL: + case FIELD_DECL: + case RESULT_DECL: + case SSA_NAME: + case REAL_CST: + case MEM_REF: + case VIEW_CONVERT_EXPR: + if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode) + return *tp; + break; + default: + break; + } + + return NULL_TREE; +} + +/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work + on traditional floating point registers, and the VMRGOW/VMRGEW instructions + only work on the traditional altivec registers, note if an altivec register + was chosen. */ + +static enum rs6000_reg_type +register_to_reg_type (rtx reg, bool *is_altivec) +{ + HOST_WIDE_INT regno; + enum reg_class rclass; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (!REG_P (reg)) + return NO_REG_TYPE; + + regno = REGNO (reg); + if (regno >= FIRST_PSEUDO_REGISTER) + { + if (!lra_in_progress && !reload_in_progress && !reload_completed) + return PSEUDO_REG_TYPE; + + regno = true_regnum (reg); + if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) + return PSEUDO_REG_TYPE; + } + + gcc_assert (regno >= 0); + + if (is_altivec && ALTIVEC_REGNO_P (regno)) + *is_altivec = true; + + rclass = rs6000_regno_regclass[regno]; + return reg_class_to_reg_type[(int)rclass]; +} + +/* Helper function to return the cost of adding a TOC entry address. */ + +static inline int +rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask) +{ + int ret; + + if (TARGET_CMODEL != CMODEL_SMALL) + ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2; + + else + ret = (TARGET_MINIMAL_TOC) ? 6 : 3; + + return ret; +} + +/* Helper function for rs6000_secondary_reload to determine whether the memory + address (ADDR) with a given register class (RCLASS) and machine mode (MODE) + needs reloading. Return negative if the memory is not handled by the memory + helper functions and to try a different reload method, 0 if no additional + instructions are need, and positive to give the extra cost for the + memory. */ + +static int +rs6000_secondary_reload_memory (rtx addr, + enum reg_class rclass, + machine_mode mode) +{ + int extra_cost = 0; + rtx reg, and_arg, plus_arg0, plus_arg1; + addr_mask_type addr_mask; + const char *type = NULL; + const char *fail_msg = NULL; + + if (GPR_REG_CLASS_P (rclass)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; + + else if (rclass == FLOAT_REGS) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; + + else if (rclass == ALTIVEC_REGS) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; + + /* For the combined VSX_REGS, turn off Altivec AND -16. */ + else if (rclass == VSX_REGS) + addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX] + & ~RELOAD_REG_AND_M16); + + /* If the register allocator hasn't made up its mind yet on the register + class to use, settle on defaults to use. */ + else if (rclass == NO_REGS) + { + addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY] + & ~RELOAD_REG_AND_M16); + + if ((addr_mask & RELOAD_REG_MULTIPLE) != 0) + addr_mask &= ~(RELOAD_REG_INDEXED + | RELOAD_REG_PRE_INCDEC + | RELOAD_REG_PRE_MODIFY); + } + + else + addr_mask = 0; + + /* If the register isn't valid in this register class, just return now. */ + if ((addr_mask & RELOAD_REG_VALID) == 0) + { + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "rs6000_secondary_reload_memory: mode = %s, class = %s, " + "not valid in class\n", + GET_MODE_NAME (mode), reg_class_names[rclass]); + debug_rtx (addr); + } + + return -1; + } + + switch (GET_CODE (addr)) + { + /* Does the register class supports auto update forms for this mode? We + don't need a scratch register, since the powerpc only supports + PRE_INC, PRE_DEC, and PRE_MODIFY. */ + case PRE_INC: + case PRE_DEC: + reg = XEXP (addr, 0); + if (!base_reg_operand (addr, GET_MODE (reg))) + { + fail_msg = "no base register #1"; + extra_cost = -1; + } + + else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) + { + extra_cost = 1; + type = "update"; + } + break; + + case PRE_MODIFY: + reg = XEXP (addr, 0); + plus_arg1 = XEXP (addr, 1); + if (!base_reg_operand (reg, GET_MODE (reg)) + || GET_CODE (plus_arg1) != PLUS + || !rtx_equal_p (reg, XEXP (plus_arg1, 0))) + { + fail_msg = "bad PRE_MODIFY"; + extra_cost = -1; + } + + else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) + { + extra_cost = 1; + type = "update"; + } + break; + + /* Do we need to simulate AND -16 to clear the bottom address bits used + in VMX load/stores? Only allow the AND for vector sizes. */ + case AND: + and_arg = XEXP (addr, 0); + if (GET_MODE_SIZE (mode) != 16 + || GET_CODE (XEXP (addr, 1)) != CONST_INT + || INTVAL (XEXP (addr, 1)) != -16) + { + fail_msg = "bad Altivec AND #1"; + extra_cost = -1; + } + + if (rclass != ALTIVEC_REGS) + { + if (legitimate_indirect_address_p (and_arg, false)) + extra_cost = 1; + + else if (legitimate_indexed_address_p (and_arg, false)) + extra_cost = 2; + + else + { + fail_msg = "bad Altivec AND #2"; + extra_cost = -1; + } + + type = "and"; + } + break; + + /* If this is an indirect address, make sure it is a base register. */ + case REG: + case SUBREG: + if (!legitimate_indirect_address_p (addr, false)) + { + extra_cost = 1; + type = "move"; + } + break; + + /* If this is an indexed address, make sure the register class can handle + indexed addresses for this mode. */ + case PLUS: + plus_arg0 = XEXP (addr, 0); + plus_arg1 = XEXP (addr, 1); + + /* (plus (plus (reg) (constant)) (constant)) is generated during + push_reload processing, so handle it now. */ + if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1)) + { + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "offset"; + } + } + + /* (plus (plus (reg) (constant)) (reg)) is also generated during + push_reload processing, so handle it now. */ + else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1)) + { + if ((addr_mask & RELOAD_REG_INDEXED) == 0) + { + extra_cost = 1; + type = "indexed #2"; + } + } + + else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0))) + { + fail_msg = "no base register #2"; + extra_cost = -1; + } + + else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1))) + { + if ((addr_mask & RELOAD_REG_INDEXED) == 0 + || !legitimate_indexed_address_p (addr, false)) + { + extra_cost = 1; + type = "indexed"; + } + } + + else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0 + && CONST_INT_P (plus_arg1)) + { + if (!quad_address_offset_p (INTVAL (plus_arg1))) + { + extra_cost = 1; + type = "vector d-form offset"; + } + } + + /* Make sure the register class can handle offset addresses. */ + else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) + { + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "offset #2"; + } + } + + else + { + fail_msg = "bad PLUS"; + extra_cost = -1; + } + + break; + + case LO_SUM: + /* Quad offsets are restricted and can't handle normal addresses. */ + if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) + { + extra_cost = -1; + type = "vector d-form lo_sum"; + } + + else if (!legitimate_lo_sum_address_p (mode, addr, false)) + { + fail_msg = "bad LO_SUM"; + extra_cost = -1; + } + + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "lo_sum"; + } + break; + + /* Static addresses need to create a TOC entry. */ + case CONST: + case SYMBOL_REF: + case LABEL_REF: + if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) + { + extra_cost = -1; + type = "vector d-form lo_sum #2"; + } + + else + { + type = "address"; + extra_cost = rs6000_secondary_reload_toc_costs (addr_mask); + } + break; + + /* TOC references look like offsetable memory. */ + case UNSPEC: + if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL) + { + fail_msg = "bad UNSPEC"; + extra_cost = -1; + } + + else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) + { + extra_cost = -1; + type = "vector d-form lo_sum #3"; + } + + else if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "toc reference"; + } + break; + + default: + { + fail_msg = "bad address"; + extra_cost = -1; + } + } + + if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */) + { + if (extra_cost < 0) + fprintf (stderr, + "rs6000_secondary_reload_memory error: mode = %s, " + "class = %s, addr_mask = '%s', %s\n", + GET_MODE_NAME (mode), + reg_class_names[rclass], + rs6000_debug_addr_mask (addr_mask, false), + (fail_msg != NULL) ? fail_msg : "<bad address>"); + + else + fprintf (stderr, + "rs6000_secondary_reload_memory: mode = %s, class = %s, " + "addr_mask = '%s', extra cost = %d, %s\n", + GET_MODE_NAME (mode), + reg_class_names[rclass], + rs6000_debug_addr_mask (addr_mask, false), + extra_cost, + (type) ? type : "<none>"); + + debug_rtx (addr); + } + + return extra_cost; +} + +/* Helper function for rs6000_secondary_reload to return true if a move to a + different register classe is really a simple move. */ + +static bool +rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + machine_mode mode) +{ + int size = GET_MODE_SIZE (mode); + + /* Add support for various direct moves available. In this function, we only + look at cases where we don't need any extra registers, and one or more + simple move insns are issued. Originally small integers are not allowed + in FPR/VSX registers. Single precision binary floating is not a simple + move because we need to convert to the single precision memory layout. + The 4-byte SDmode can be moved. TDmode values are disallowed since they + need special direct move handling, which we do not support yet. */ + if (TARGET_DIRECT_MOVE + && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) + { + if (TARGET_POWERPC64) + { + /* ISA 2.07: MTVSRD or MVFVSRD. */ + if (size == 8) + return true; + + /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */ + if (size == 16 && TARGET_P9_VECTOR && mode != TDmode) + return true; + } + + /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ + if (TARGET_VSX_SMALL_INTEGER) + { + if (mode == SImode) + return true; + + if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) + return true; + } + + /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ + if (mode == SDmode) + return true; + } + + /* Power6+: MFTGPR or MFFGPR. */ + else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8 + && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE) + || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + /* Move to/from SPR. */ + else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) + && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) + || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + return false; +} + +/* Direct move helper function for rs6000_secondary_reload, handle all of the + special direct moves that involve allocating an extra register, return the + insn code of the helper function if there is such a function or + CODE_FOR_nothing if not. */ + +static bool +rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + bool ret = false; + enum insn_code icode = CODE_FOR_nothing; + int cost = 0; + int size = GET_MODE_SIZE (mode); + + if (TARGET_POWERPC64 && size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + ISA 2.07 (power8, power9) when running in 64-bit mode using + XXPERMDI to glue the two 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reg_addr[mode].reload_vsx_gpr; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reg_addr[mode].reload_gpr_vsx; + } + } + + else if (TARGET_POWERPC64 && mode == SFmode) + { + if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* xscvdpspn, mfvsrd, and. */ + icode = reg_addr[mode].reload_gpr_vsx; + } + + else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 2; /* mtvsrz, xscvspdpn. */ + icode = reg_addr[mode].reload_vsx_gpr; + } + } + + else if (!TARGET_POWERPC64 && size == 8) + { + /* Handle moving 64-bit values from GPRs to floating point registers on + ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two + 32-bit values back together. Altivec register classes must be handled + specially since a different instruction is used, and the secondary + reload support requires a single instruction class in the scratch + register constraint. However, right now TFmode is not allowed in + Altivec registers, so the pattern will never match. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) + { + cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ + icode = reg_addr[mode].reload_fpr_gpr; + } + } + + if (icode != CODE_FOR_nothing) + { + ret = true; + if (sri) + { + sri->icode = icode; + sri->extra_cost = cost; + } + } + + return ret; +} + +/* Return whether a move between two register classes can be done either + directly (simple move) or via a pattern that uses a single extra temporary + (using ISA 2.07's direct move in this case. */ + +static bool +rs6000_secondary_reload_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + /* Fall back to load/store reloads if either type is not a register. */ + if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) + return false; + + /* If we haven't allocated registers yet, assume the move can be done for the + standard register types. */ + if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) + || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) + || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) + return true; + + /* Moves to the same set of registers is a simple move for non-specialized + registers. */ + if (to_type == from_type && IS_STD_REG_TYPE (to_type)) + return true; + + /* Check whether a simple move can be done directly. */ + if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) + { + if (sri) + { + sri->icode = CODE_FOR_nothing; + sri->extra_cost = 0; + } + return true; + } + + /* Now check if we can do it in a few steps. */ + return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, + altivec_p); +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch or immediate register. Return the class + needed for the immediate register. + + For VSX and Altivec, we may need a register to convert sp+offset into + reg+sp. + + For misaligned 64-bit gpr loads and stores we need a register to + convert an offset address to indirect. */ + +static reg_class_t +rs6000_secondary_reload (bool in_p, + rtx x, + reg_class_t rclass_i, + machine_mode mode, + secondary_reload_info *sri) +{ + enum reg_class rclass = (enum reg_class) rclass_i; + reg_class_t ret = ALL_REGS; + enum insn_code icode; + bool default_p = false; + bool done_p = false; + + /* Allow subreg of memory before/during reload. */ + bool memory_p = (MEM_P (x) + || (!reload_completed && GET_CODE (x) == SUBREG + && MEM_P (SUBREG_REG (x)))); + + sri->icode = CODE_FOR_nothing; + sri->t_icode = CODE_FOR_nothing; + sri->extra_cost = 0; + icode = ((in_p) + ? reg_addr[mode].reload_load + : reg_addr[mode].reload_store); + + if (REG_P (x) || register_operand (x, mode)) + { + enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; + bool altivec_p = (rclass == ALTIVEC_REGS); + enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); + + if (!in_p) + std::swap (to_type, from_type); + + /* Can we do a direct move of some sort? */ + if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, + altivec_p)) + { + icode = (enum insn_code)sri->icode; + default_p = false; + done_p = true; + ret = NO_REGS; + } + } + + /* Make sure 0.0 is not reloaded or forced into memory. */ + if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass)) + { + ret = NO_REGS; + default_p = false; + done_p = true; + } + + /* If this is a scalar floating point value and we want to load it into the + traditional Altivec registers, do it via a move via a traditional floating + point register, unless we have D-form addressing. Also make sure that + non-zero constants use a FPR. */ + if (!done_p && reg_addr[mode].scalar_in_vmx_p + && !mode_supports_vmx_dform (mode) + && (rclass == VSX_REGS || rclass == ALTIVEC_REGS) + && (memory_p || (GET_CODE (x) == CONST_DOUBLE))) + { + ret = FLOAT_REGS; + default_p = false; + done_p = true; + } + + /* Handle reload of load/stores if we have reload helper functions. */ + if (!done_p && icode != CODE_FOR_nothing && memory_p) + { + int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass, + mode); + + if (extra_cost >= 0) + { + done_p = true; + ret = NO_REGS; + if (extra_cost > 0) + { + sri->extra_cost = extra_cost; + sri->icode = icode; + } + } + } + + /* Handle unaligned loads and stores of integer registers. */ + if (!done_p && TARGET_POWERPC64 + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE + && memory_p + && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) + { + rtx addr = XEXP (x, 0); + rtx off = address_offset (addr); + + if (off != NULL_RTX) + { + unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; + unsigned HOST_WIDE_INT offset = INTVAL (off); + + /* We need a secondary reload when our legitimate_address_p + says the address is good (as otherwise the entire address + will be reloaded), and the offset is not a multiple of + four or we have an address wrap. Address wrap will only + occur for LO_SUMs since legitimate_offset_address_p + rejects addresses for 16-byte mems that will wrap. */ + if (GET_CODE (addr) == LO_SUM + ? (1 /* legitimate_address_p allows any offset for lo_sum */ + && ((offset & 3) != 0 + || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra)) + : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ + && (offset & 3) != 0)) + { + /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */ + if (in_p) + sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load + : CODE_FOR_reload_di_load); + else + sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store + : CODE_FOR_reload_di_store); + sri->extra_cost = 2; + ret = NO_REGS; + done_p = true; + } + else + default_p = true; + } + else + default_p = true; + } + + if (!done_p && !TARGET_POWERPC64 + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE + && memory_p + && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) + { + rtx addr = XEXP (x, 0); + rtx off = address_offset (addr); + + if (off != NULL_RTX) + { + unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; + unsigned HOST_WIDE_INT offset = INTVAL (off); + + /* We need a secondary reload when our legitimate_address_p + says the address is good (as otherwise the entire address + will be reloaded), and we have a wrap. + + legitimate_lo_sum_address_p allows LO_SUM addresses to + have any offset so test for wrap in the low 16 bits. + + legitimate_offset_address_p checks for the range + [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7] + for mode size of 16. We wrap at [0x7ffc,0x7fff] and + [0x7ff4,0x7fff] respectively, so test for the + intersection of these ranges, [0x7ffc,0x7fff] and + [0x7ff4,0x7ff7] respectively. + + Note that the address we see here may have been + manipulated by legitimize_reload_address. */ + if (GET_CODE (addr) == LO_SUM + ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra + : offset - (0x8000 - extra) < UNITS_PER_WORD) + { + if (in_p) + sri->icode = CODE_FOR_reload_si_load; + else + sri->icode = CODE_FOR_reload_si_store; + sri->extra_cost = 2; + ret = NO_REGS; + done_p = true; + } + else + default_p = true; + } + else + default_p = true; + } + + if (!done_p) + default_p = true; + + if (default_p) + ret = default_secondary_reload (in_p, x, rclass, mode, sri); + + gcc_assert (ret != ALL_REGS); + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, " + "mode = %s", + reg_class_names[ret], + in_p ? "true" : "false", + reg_class_names[rclass], + GET_MODE_NAME (mode)); + + if (reload_completed) + fputs (", after reload", stderr); + + if (!done_p) + fputs (", done_p not set", stderr); + + if (default_p) + fputs (", default secondary reload", stderr); + + if (sri->icode != CODE_FOR_nothing) + fprintf (stderr, ", reload func = %s, extra cost = %d", + insn_data[sri->icode].name, sri->extra_cost); + + else if (sri->extra_cost > 0) + fprintf (stderr, ", extra cost = %d", sri->extra_cost); + + fputs ("\n", stderr); + debug_rtx (x); + } + + return ret; +} + +/* Better tracing for rs6000_secondary_reload_inner. */ + +static void +rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch, + bool store_p) +{ + rtx set, clobber; + + gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX); + + fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line, + store_p ? "store" : "load"); + + if (store_p) + set = gen_rtx_SET (mem, reg); + else + set = gen_rtx_SET (reg, mem); + + clobber = gen_rtx_CLOBBER (VOIDmode, scratch); + debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); +} + +static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool) + ATTRIBUTE_NORETURN; + +static void +rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch, + bool store_p) +{ + rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p); + gcc_unreachable (); +} + +/* Fixup reload addresses for values in GPR, FPR, and VMX registers that have + reload helper functions. These were identified in + rs6000_secondary_reload_memory, and if reload decided to use the secondary + reload, it calls the insns: + reload_<RELOAD:mode>_<P:mptrsize>_store + reload_<RELOAD:mode>_<P:mptrsize>_load + + which in turn calls this function, to do whatever is necessary to create + valid addresses. */ + +void +rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) +{ + int regno = true_regnum (reg); + machine_mode mode = GET_MODE (reg); + addr_mask_type addr_mask; + rtx addr; + rtx new_addr; + rtx op_reg, op0, op1; + rtx and_op; + rtx cc_clobber; + rtvec rv; + + if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem) + || !base_reg_operand (scratch, GET_MODE (scratch))) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; + + else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; + + else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; + + else + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + /* Make sure the mode is valid in this register class. */ + if ((addr_mask & RELOAD_REG_VALID) == 0) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + if (TARGET_DEBUG_ADDR) + rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p); + + new_addr = addr = XEXP (mem, 0); + switch (GET_CODE (addr)) + { + /* Does the register class support auto update forms for this mode? If + not, do the update now. We don't need a scratch register, since the + powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */ + case PRE_INC: + case PRE_DEC: + op_reg = XEXP (addr, 0); + if (!base_reg_operand (op_reg, Pmode)) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) + { + emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode)))); + new_addr = op_reg; + } + break; + + case PRE_MODIFY: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if (!base_reg_operand (op0, Pmode) + || GET_CODE (op1) != PLUS + || !rtx_equal_p (op0, XEXP (op1, 0))) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) + { + emit_insn (gen_rtx_SET (op0, op1)); + new_addr = reg; + } + break; + + /* Do we need to simulate AND -16 to clear the bottom address bits used + in VMX load/stores? */ + case AND: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if ((addr_mask & RELOAD_REG_AND_M16) == 0) + { + if (REG_P (op0) || GET_CODE (op0) == SUBREG) + op_reg = op0; + + else if (GET_CODE (op1) == PLUS) + { + emit_insn (gen_rtx_SET (scratch, op1)); + op_reg = scratch; + } + + else + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1); + cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode)); + rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber); + emit_insn (gen_rtx_PARALLEL (VOIDmode, rv)); + new_addr = scratch; + } + break; + + /* If this is an indirect address, make sure it is a base register. */ + case REG: + case SUBREG: + if (!base_reg_operand (addr, GET_MODE (addr))) + { + emit_insn (gen_rtx_SET (scratch, addr)); + new_addr = scratch; + } + break; + + /* If this is an indexed address, make sure the register class can handle + indexed addresses for this mode. */ + case PLUS: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if (!base_reg_operand (op0, Pmode)) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + else if (int_reg_operand (op1, Pmode)) + { + if ((addr_mask & RELOAD_REG_INDEXED) == 0) + { + emit_insn (gen_rtx_SET (scratch, addr)); + new_addr = scratch; + } + } + + else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1)) + { + if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0) + || !quad_address_p (addr, mode, false)) + { + emit_insn (gen_rtx_SET (scratch, addr)); + new_addr = scratch; + } + } + + /* Make sure the register class can handle offset addresses. */ + else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) + { + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + emit_insn (gen_rtx_SET (scratch, addr)); + new_addr = scratch; + } + } + + else + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + break; + + case LO_SUM: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if (!base_reg_operand (op0, Pmode)) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + else if (int_reg_operand (op1, Pmode)) + { + if ((addr_mask & RELOAD_REG_INDEXED) == 0) + { + emit_insn (gen_rtx_SET (scratch, addr)); + new_addr = scratch; + } + } + + /* Quad offsets are restricted and can't handle normal addresses. */ + else if (mode_supports_vsx_dform_quad (mode)) + { + emit_insn (gen_rtx_SET (scratch, addr)); + new_addr = scratch; + } + + /* Make sure the register class can handle offset addresses. */ + else if (legitimate_lo_sum_address_p (mode, addr, false)) + { + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + emit_insn (gen_rtx_SET (scratch, addr)); + new_addr = scratch; + } + } + + else + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + break; + + case SYMBOL_REF: + case CONST: + case LABEL_REF: + rs6000_emit_move (scratch, addr, Pmode); + new_addr = scratch; + break; + + default: + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + } + + /* Adjust the address if it changed. */ + if (addr != new_addr) + { + mem = replace_equiv_address_nv (mem, new_addr); + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n"); + } + + /* Now create the move. */ + if (store_p) + emit_insn (gen_rtx_SET (mem, reg)); + else + emit_insn (gen_rtx_SET (reg, mem)); + + return; +} + +/* Convert reloads involving 64-bit gprs and misaligned offset + addressing, or multiple 32-bit gprs and offsets that are too large, + to use indirect addressing. */ + +void +rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p) +{ + int regno = true_regnum (reg); + enum reg_class rclass; + rtx addr; + rtx scratch_or_premodify = scratch; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n", + store_p ? "store" : "load"); + fprintf (stderr, "reg:\n"); + debug_rtx (reg); + fprintf (stderr, "mem:\n"); + debug_rtx (mem); + fprintf (stderr, "scratch:\n"); + debug_rtx (scratch); + } + + gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER); + gcc_assert (GET_CODE (mem) == MEM); + rclass = REGNO_REG_CLASS (regno); + gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS); + addr = XEXP (mem, 0); + + if (GET_CODE (addr) == PRE_MODIFY) + { + gcc_assert (REG_P (XEXP (addr, 0)) + && GET_CODE (XEXP (addr, 1)) == PLUS + && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0)); + scratch_or_premodify = XEXP (addr, 0); + if (!HARD_REGISTER_P (scratch_or_premodify)) + /* If we have a pseudo here then reload will have arranged + to have it replaced, but only in the original insn. + Use the replacement here too. */ + scratch_or_premodify = find_replacement (&XEXP (addr, 0)); + + /* RTL emitted by rs6000_secondary_reload_gpr uses RTL + expressions from the original insn, without unsharing them. + Any RTL that points into the original insn will of course + have register replacements applied. That is why we don't + need to look for replacements under the PLUS. */ + addr = XEXP (addr, 1); + } + gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); + + rs6000_emit_move (scratch_or_premodify, addr, Pmode); + + mem = replace_equiv_address_nv (mem, scratch_or_premodify); + + /* Now create the move. */ + if (store_p) + emit_insn (gen_rtx_SET (mem, reg)); + else + emit_insn (gen_rtx_SET (reg, mem)); + + return; +} + +/* Allocate a 64-bit stack slot to be used for copying SDmode values through if + this function has any SDmode references. If we are on a power7 or later, we + don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions + can load/store the value. */ + +static void +rs6000_alloc_sdmode_stack_slot (void) +{ + tree t; + basic_block bb; + gimple_stmt_iterator gsi; + + gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX); + /* We use a different approach for dealing with the secondary + memory in LRA. */ + if (ira_use_lra_p) + return; + + if (TARGET_NO_SDMODE_STACK) + return; + + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL); + if (ret) + { + rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); + cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, + SDmode, 0); + return; + } + } + + /* Check for any SDmode parameters of the function. */ + for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t)) + { + if (TREE_TYPE (t) == error_mark_node) + continue; + + if (TYPE_MODE (TREE_TYPE (t)) == SDmode + || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode) + { + rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); + cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, + SDmode, 0); + return; + } + } +} + +static void +rs6000_instantiate_decls (void) +{ + if (cfun->machine->sdmode_stack_slot != NULL_RTX) + instantiate_decl_rtl (cfun->machine->sdmode_stack_slot); +} + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. + + On the RS/6000, we have to return NO_REGS when we want to reload a + floating-point CONST_DOUBLE to force it to be copied to memory. + + We also don't want to reload integer values into floating-point + registers if we can at all help it. In fact, this can + cause reload to die, if it tries to generate a reload of CTR + into a FP register and discovers it doesn't have the memory location + required. + + ??? Would it be a good idea to have reload do the converse, that is + try to reload floating modes into FP registers if possible? + */ + +static enum reg_class +rs6000_preferred_reload_class (rtx x, enum reg_class rclass) +{ + machine_mode mode = GET_MODE (x); + bool is_constant = CONSTANT_P (x); + + /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred + reload class for it. */ + if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS) + && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0) + return NO_REGS; + + if ((rclass == FLOAT_REGS || rclass == VSX_REGS) + && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0) + return NO_REGS; + + /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow + the reloading of address expressions using PLUS into floating point + registers. */ + if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS) + { + if (is_constant) + { + /* Zero is always allowed in all VSX registers. */ + if (x == CONST0_RTX (mode)) + return rclass; + + /* If this is a vector constant that can be formed with a few Altivec + instructions, we want altivec registers. */ + if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode)) + return ALTIVEC_REGS; + + /* If this is an integer constant that can easily be loaded into + vector registers, allow it. */ + if (CONST_INT_P (x)) + { + HOST_WIDE_INT value = INTVAL (x); + + /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA + 2.06 can generate it in the Altivec registers with + VSPLTI<x>. */ + if (value == -1) + { + if (TARGET_P8_VECTOR) + return rclass; + else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS) + return ALTIVEC_REGS; + else + return NO_REGS; + } + + /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and + a sign extend in the Altivec registers. */ + if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR + && TARGET_VSX_SMALL_INTEGER + && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)) + return ALTIVEC_REGS; + } + + /* Force constant to memory. */ + return NO_REGS; + } + + /* D-form addressing can easily reload the value. */ + if (mode_supports_vmx_dform (mode) + || mode_supports_vsx_dform_quad (mode)) + return rclass; + + /* If this is a scalar floating point value and we don't have D-form + addressing, prefer the traditional floating point registers so that we + can use D-form (register+offset) addressing. */ + if (rclass == VSX_REGS + && (mode == SFmode || GET_MODE_SIZE (mode) == 8)) + return FLOAT_REGS; + + /* Prefer the Altivec registers if Altivec is handling the vector + operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec + loads. */ + if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode) + || mode == V1TImode) + return ALTIVEC_REGS; + + return rclass; + } + + if (is_constant || GET_CODE (x) == PLUS) + { + if (reg_class_subset_p (GENERAL_REGS, rclass)) + return GENERAL_REGS; + if (reg_class_subset_p (BASE_REGS, rclass)) + return BASE_REGS; + return NO_REGS; + } + + if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS) + return GENERAL_REGS; + + return rclass; +} + +/* Debug version of rs6000_preferred_reload_class. */ +static enum reg_class +rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) +{ + enum reg_class ret = rs6000_preferred_reload_class (x, rclass); + + fprintf (stderr, + "\nrs6000_preferred_reload_class, return %s, rclass = %s, " + "mode = %s, x:\n", + reg_class_names[ret], reg_class_names[rclass], + GET_MODE_NAME (GET_MODE (x))); + debug_rtx (x); + + return ret; +} + +/* If we are copying between FP or AltiVec registers and anything else, we need + a memory location. The exception is when we are targeting ppc64 and the + move to/from fpr to gpr instructions are available. Also, under VSX, you + can copy vector registers from the FP register set to the Altivec register + set and vice versa. */ + +static bool +rs6000_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, + machine_mode mode) +{ + enum rs6000_reg_type from_type, to_type; + bool altivec_p = ((from_class == ALTIVEC_REGS) + || (to_class == ALTIVEC_REGS)); + + /* If a simple/direct move is available, we don't need secondary memory */ + from_type = reg_class_to_reg_type[(int)from_class]; + to_type = reg_class_to_reg_type[(int)to_class]; + + if (rs6000_secondary_reload_move (to_type, from_type, mode, + (secondary_reload_info *)0, altivec_p)) + return false; + + /* If we have a floating point or vector register class, we need to use + memory to transfer the data. */ + if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) + return true; + + return false; +} + +/* Debug version of rs6000_secondary_memory_needed. */ +static bool +rs6000_debug_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, + machine_mode mode) +{ + bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode); + + fprintf (stderr, + "rs6000_secondary_memory_needed, return: %s, from_class = %s, " + "to_class = %s, mode = %s\n", + ret ? "true" : "false", + reg_class_names[from_class], + reg_class_names[to_class], + GET_MODE_NAME (mode)); + + return ret; +} + +/* Return the register class of a scratch register needed to copy IN into + or out of a register in RCLASS in MODE. If it can be done directly, + NO_REGS is returned. */ + +static enum reg_class +rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode, + rtx in) +{ + int regno; + + if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN +#if TARGET_MACHO + && MACHOPIC_INDIRECT +#endif + )) + { + /* We cannot copy a symbolic operand directly into anything + other than BASE_REGS for TARGET_ELF. So indicate that a + register from BASE_REGS is needed as an intermediate + register. + + On Darwin, pic addresses require a load from memory, which + needs a base register. */ + if (rclass != BASE_REGS + && (GET_CODE (in) == SYMBOL_REF + || GET_CODE (in) == HIGH + || GET_CODE (in) == LABEL_REF + || GET_CODE (in) == CONST)) + return BASE_REGS; + } + + if (GET_CODE (in) == REG) + { + regno = REGNO (in); + if (regno >= FIRST_PSEUDO_REGISTER) + { + regno = true_regnum (in); + if (regno >= FIRST_PSEUDO_REGISTER) + regno = -1; + } + } + else if (GET_CODE (in) == SUBREG) + { + regno = true_regnum (in); + if (regno >= FIRST_PSEUDO_REGISTER) + regno = -1; + } + else + regno = -1; + + /* If we have VSX register moves, prefer moving scalar values between + Altivec registers and GPR by going via an FPR (and then via memory) + instead of reloading the secondary memory address for Altivec moves. */ + if (TARGET_VSX + && GET_MODE_SIZE (mode) < 16 + && !mode_supports_vmx_dform (mode) + && (((rclass == GENERAL_REGS || rclass == BASE_REGS) + && (regno >= 0 && ALTIVEC_REGNO_P (regno))) + || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS) + && (regno >= 0 && INT_REGNO_P (regno))))) + return FLOAT_REGS; + + /* We can place anything into GENERAL_REGS and can put GENERAL_REGS + into anything. */ + if (rclass == GENERAL_REGS || rclass == BASE_REGS + || (regno >= 0 && INT_REGNO_P (regno))) + return NO_REGS; + + /* Constants, memory, and VSX registers can go into VSX registers (both the + traditional floating point and the altivec registers). */ + if (rclass == VSX_REGS + && (regno == -1 || VSX_REGNO_P (regno))) + return NO_REGS; + + /* Constants, memory, and FP registers can go into FP registers. */ + if ((regno == -1 || FP_REGNO_P (regno)) + && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS)) + return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS; + + /* Memory, and AltiVec registers can go into AltiVec registers. */ + if ((regno == -1 || ALTIVEC_REGNO_P (regno)) + && rclass == ALTIVEC_REGS) + return NO_REGS; + + /* We can copy among the CR registers. */ + if ((rclass == CR_REGS || rclass == CR0_REGS) + && regno >= 0 && CR_REGNO_P (regno)) + return NO_REGS; + + /* Otherwise, we need GENERAL_REGS. */ + return GENERAL_REGS; +} + +/* Debug version of rs6000_secondary_reload_class. */ +static enum reg_class +rs6000_debug_secondary_reload_class (enum reg_class rclass, + machine_mode mode, rtx in) +{ + enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in); + fprintf (stderr, + "\nrs6000_secondary_reload_class, return %s, rclass = %s, " + "mode = %s, input rtx:\n", + reg_class_names[ret], reg_class_names[rclass], + GET_MODE_NAME (mode)); + debug_rtx (in); + + return ret; +} + +/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */ + +static bool +rs6000_cannot_change_mode_class (machine_mode from, + machine_mode to, + enum reg_class rclass) +{ + unsigned from_size = GET_MODE_SIZE (from); + unsigned to_size = GET_MODE_SIZE (to); + + if (from_size != to_size) + { + enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; + + if (reg_classes_intersect_p (xclass, rclass)) + { + unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to]; + unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from]; + bool to_float128_vector_p = FLOAT128_VECTOR_P (to); + bool from_float128_vector_p = FLOAT128_VECTOR_P (from); + + /* Don't allow 64-bit types to overlap with 128-bit types that take a + single register under VSX because the scalar part of the register + is in the upper 64-bits, and not the lower 64-bits. Types like + TFmode/TDmode that take 2 scalar register can overlap. 128-bit + IEEE floating point can't overlap, and neither can small + values. */ + + if (to_float128_vector_p && from_float128_vector_p) + return false; + + else if (to_float128_vector_p || from_float128_vector_p) + return true; + + /* TDmode in floating-mode registers must always go into a register + pair with the most significant word in the even-numbered register + to match ISA requirements. In little-endian mode, this does not + match subreg numbering, so we cannot allow subregs. */ + if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode)) + return true; + + if (from_size < 8 || to_size < 8) + return true; + + if (from_size == 8 && (8 * to_nregs) != to_size) + return true; + + if (to_size == 8 && (8 * from_nregs) != from_size) + return true; + + return false; + } + else + return false; + } + + if (TARGET_E500_DOUBLE + && ((((to) == DFmode) + ((from) == DFmode)) == 1 + || (((to) == TFmode) + ((from) == TFmode)) == 1 + || (((to) == IFmode) + ((from) == IFmode)) == 1 + || (((to) == KFmode) + ((from) == KFmode)) == 1 + || (((to) == DDmode) + ((from) == DDmode)) == 1 + || (((to) == TDmode) + ((from) == TDmode)) == 1 + || (((to) == DImode) + ((from) == DImode)) == 1)) + return true; + + /* Since the VSX register set includes traditional floating point registers + and altivec registers, just check for the size being different instead of + trying to check whether the modes are vector modes. Otherwise it won't + allow say DF and DI to change classes. For types like TFmode and TDmode + that take 2 64-bit registers, rather than a single 128-bit register, don't + allow subregs of those types to other 128 bit types. */ + if (TARGET_VSX && VSX_REG_CLASS_P (rclass)) + { + unsigned num_regs = (from_size + 15) / 16; + if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs + || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs) + return true; + + return (from_size != 8 && from_size != 16); + } + + if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS + && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1) + return true; + + if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1 + && reg_classes_intersect_p (GENERAL_REGS, rclass)) + return true; + + return false; +} + +/* Debug version of rs6000_cannot_change_mode_class. */ +static bool +rs6000_debug_cannot_change_mode_class (machine_mode from, + machine_mode to, + enum reg_class rclass) +{ + bool ret = rs6000_cannot_change_mode_class (from, to, rclass); + + fprintf (stderr, + "rs6000_cannot_change_mode_class, return %s, from = %s, " + "to = %s, rclass = %s\n", + ret ? "true" : "false", + GET_MODE_NAME (from), GET_MODE_NAME (to), + reg_class_names[rclass]); + + return ret; +} + +/* Return a string to do a move operation of 128 bits of data. */ + +const char * +rs6000_output_move_128bit (rtx operands[]) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + machine_mode mode = GET_MODE (dest); + int dest_regno; + int src_regno; + bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p; + bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p; + + if (REG_P (dest)) + { + dest_regno = REGNO (dest); + dest_gpr_p = INT_REGNO_P (dest_regno); + dest_fp_p = FP_REGNO_P (dest_regno); + dest_vmx_p = ALTIVEC_REGNO_P (dest_regno); + dest_vsx_p = dest_fp_p | dest_vmx_p; + } + else + { + dest_regno = -1; + dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false; + } + + if (REG_P (src)) + { + src_regno = REGNO (src); + src_gpr_p = INT_REGNO_P (src_regno); + src_fp_p = FP_REGNO_P (src_regno); + src_vmx_p = ALTIVEC_REGNO_P (src_regno); + src_vsx_p = src_fp_p | src_vmx_p; + } + else + { + src_regno = -1; + src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false; + } + + /* Register moves. */ + if (dest_regno >= 0 && src_regno >= 0) + { + if (dest_gpr_p) + { + if (src_gpr_p) + return "#"; + + if (TARGET_DIRECT_MOVE_128 && src_vsx_p) + return (WORDS_BIG_ENDIAN + ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1" + : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1"); + + else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) + return "#"; + } + + else if (TARGET_VSX && dest_vsx_p) + { + if (src_vsx_p) + return "xxlor %x0,%x1,%x1"; + + else if (TARGET_DIRECT_MOVE_128 && src_gpr_p) + return (WORDS_BIG_ENDIAN + ? "mtvsrdd %x0,%1,%L1" + : "mtvsrdd %x0,%L1,%1"); + + else if (TARGET_DIRECT_MOVE && src_gpr_p) + return "#"; + } + + else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p) + return "vor %0,%1,%1"; + + else if (dest_fp_p && src_fp_p) + return "#"; + } + + /* Loads. */ + else if (dest_regno >= 0 && MEM_P (src)) + { + if (dest_gpr_p) + { + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) + return "lq %0,%1"; + else + return "#"; + } + + else if (TARGET_ALTIVEC && dest_vmx_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "lvx %0,%y1"; + + else if (TARGET_VSX && dest_vsx_p) + { + if (mode_supports_vsx_dform_quad (mode) + && quad_address_p (XEXP (src, 0), mode, true)) + return "lxv %x0,%1"; + + else if (TARGET_P9_VECTOR) + return "lxvx %x0,%y1"; + + else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "lxvw4x %x0,%y1"; + + else + return "lxvd2x %x0,%y1"; + } + + else if (TARGET_ALTIVEC && dest_vmx_p) + return "lvx %0,%y1"; + + else if (dest_fp_p) + return "#"; + } + + /* Stores. */ + else if (src_regno >= 0 && MEM_P (dest)) + { + if (src_gpr_p) + { + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) + return "stq %1,%0"; + else + return "#"; + } + + else if (TARGET_ALTIVEC && src_vmx_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "stvx %1,%y0"; + + else if (TARGET_VSX && src_vsx_p) + { + if (mode_supports_vsx_dform_quad (mode) + && quad_address_p (XEXP (dest, 0), mode, true)) + return "stxv %x1,%0"; + + else if (TARGET_P9_VECTOR) + return "stxvx %x1,%y0"; + + else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "stxvw4x %x1,%y0"; + + else + return "stxvd2x %x1,%y0"; + } + + else if (TARGET_ALTIVEC && src_vmx_p) + return "stvx %1,%y0"; + + else if (src_fp_p) + return "#"; + } + + /* Constants. */ + else if (dest_regno >= 0 + && (GET_CODE (src) == CONST_INT + || GET_CODE (src) == CONST_WIDE_INT + || GET_CODE (src) == CONST_DOUBLE + || GET_CODE (src) == CONST_VECTOR)) + { + if (dest_gpr_p) + return "#"; + + else if ((dest_vmx_p && TARGET_ALTIVEC) + || (dest_vsx_p && TARGET_VSX)) + return output_vec_const_move (operands); + } + + fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); +} + +/* Validate a 128-bit move. */ +bool +rs6000_move_128bit_ok_p (rtx operands[]) +{ + machine_mode mode = GET_MODE (operands[0]); + return (gpc_reg_operand (operands[0], mode) + || gpc_reg_operand (operands[1], mode)); +} + +/* Return true if a 128-bit move needs to be split. */ +bool +rs6000_split_128bit_ok_p (rtx operands[]) +{ + if (!reload_completed) + return false; + + if (!gpr_or_gpr_p (operands[0], operands[1])) + return false; + + if (quad_load_store_p (operands[0], operands[1])) + return false; + + return true; +} + + +/* Given a comparison operation, return the bit number in CCR to test. We + know this is a valid comparison. + + SCC_P is 1 if this is for an scc. That means that %D will have been + used instead of %C, so the bits will be in different places. + + Return -1 if OP isn't a valid comparison for some reason. */ + +int +ccr_bit (rtx op, int scc_p) +{ + enum rtx_code code = GET_CODE (op); + machine_mode cc_mode; + int cc_regnum; + int base_bit; + rtx reg; + + if (!COMPARISON_P (op)) + return -1; + + reg = XEXP (op, 0); + + gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg))); + + cc_mode = GET_MODE (reg); + cc_regnum = REGNO (reg); + base_bit = 4 * (cc_regnum - CR0_REGNO); + + validate_condition_mode (code, cc_mode); + + /* When generating a sCOND operation, only positive conditions are + allowed. */ + gcc_assert (!scc_p + || code == EQ || code == GT || code == LT || code == UNORDERED + || code == GTU || code == LTU); + + switch (code) + { + case NE: + return scc_p ? base_bit + 3 : base_bit + 2; + case EQ: + return base_bit + 2; + case GT: case GTU: case UNLE: + return base_bit + 1; + case LT: case LTU: case UNGE: + return base_bit; + case ORDERED: case UNORDERED: + return base_bit + 3; + + case GE: case GEU: + /* If scc, we will have done a cror to put the bit in the + unordered position. So test that bit. For integer, this is ! LT + unless this is an scc insn. */ + return scc_p ? base_bit + 3 : base_bit; + + case LE: case LEU: + return scc_p ? base_bit + 3 : base_bit + 1; + + default: + gcc_unreachable (); + } +} + +/* Return the GOT register. */ + +rtx +rs6000_got_register (rtx value ATTRIBUTE_UNUSED) +{ + /* The second flow pass currently (June 1999) can't update + regs_ever_live without disturbing other parts of the compiler, so + update it here to make the prolog/epilogue code happy. */ + if (!can_create_pseudo_p () + && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) + df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true); + + crtl->uses_pic_offset_table = 1; + + return pic_offset_table_rtx; +} + +static rs6000_stack_t stack_info; + +/* Function to init struct machine_function. + This will be called, via a pointer variable, + from push_function_context. */ + +static struct machine_function * +rs6000_init_machine_status (void) +{ + stack_info.reload_completed = 0; + return ggc_cleared_alloc<machine_function> (); +} + +#define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode) + +/* Write out a function code label. */ + +void +rs6000_output_function_entry (FILE *file, const char *fname) +{ + if (fname[0] != '.') + { + switch (DEFAULT_ABI) + { + default: + gcc_unreachable (); + + case ABI_AIX: + if (DOT_SYMBOLS) + putc ('.', file); + else + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L."); + break; + + case ABI_ELFv2: + case ABI_V4: + case ABI_DARWIN: + break; + } + } + + RS6000_OUTPUT_BASENAME (file, fname); +} + +/* Print an operand. Recognize special options, documented below. */ + +#if TARGET_ELF +#define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel") +#define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13) +#else +#define SMALL_DATA_RELOC "sda21" +#define SMALL_DATA_REG 0 +#endif + +void +print_operand (FILE *file, rtx x, int code) +{ + int i; + unsigned HOST_WIDE_INT uval; + + switch (code) + { + /* %a is output_address. */ + + /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise + output_operand. */ + + case 'D': + /* Like 'J' but get to the GT bit only. */ + gcc_assert (REG_P (x)); + + /* Bit 1 is GT bit. */ + i = 4 * (REGNO (x) - CR0_REGNO) + 1; + + /* Add one for shift count in rlinm for scc. */ + fprintf (file, "%d", i + 1); + return; + + case 'e': + /* If the low 16 bits are 0, but some other bit is set, write 's'. */ + if (! INT_P (x)) + { + output_operand_lossage ("invalid %%e value"); + return; + } + + uval = INTVAL (x); + if ((uval & 0xffff) == 0 && uval != 0) + putc ('s', file); + return; + + case 'E': + /* X is a CR register. Print the number of the EQ bit of the CR */ + if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) + output_operand_lossage ("invalid %%E value"); + else + fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2); + return; + + case 'f': + /* X is a CR register. Print the shift count needed to move it + to the high-order four bits. */ + if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) + output_operand_lossage ("invalid %%f value"); + else + fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO)); + return; + + case 'F': + /* Similar, but print the count for the rotate in the opposite + direction. */ + if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) + output_operand_lossage ("invalid %%F value"); + else + fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO)); + return; + + case 'G': + /* X is a constant integer. If it is negative, print "m", + otherwise print "z". This is to make an aze or ame insn. */ + if (GET_CODE (x) != CONST_INT) + output_operand_lossage ("invalid %%G value"); + else if (INTVAL (x) >= 0) + putc ('z', file); + else + putc ('m', file); + return; + + case 'h': + /* If constant, output low-order five bits. Otherwise, write + normally. */ + if (INT_P (x)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31); + else + print_operand (file, x, 0); + return; + + case 'H': + /* If constant, output low-order six bits. Otherwise, write + normally. */ + if (INT_P (x)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63); + else + print_operand (file, x, 0); + return; + + case 'I': + /* Print `i' if this is a constant, else nothing. */ + if (INT_P (x)) + putc ('i', file); + return; + + case 'j': + /* Write the bit number in CCR for jump. */ + i = ccr_bit (x, 0); + if (i == -1) + output_operand_lossage ("invalid %%j code"); + else + fprintf (file, "%d", i); + return; + + case 'J': + /* Similar, but add one for shift count in rlinm for scc and pass + scc flag to `ccr_bit'. */ + i = ccr_bit (x, 1); + if (i == -1) + output_operand_lossage ("invalid %%J code"); + else + /* If we want bit 31, write a shift count of zero, not 32. */ + fprintf (file, "%d", i == 31 ? 0 : i + 1); + return; + + case 'k': + /* X must be a constant. Write the 1's complement of the + constant. */ + if (! INT_P (x)) + output_operand_lossage ("invalid %%k value"); + else + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); + return; + + case 'K': + /* X must be a symbolic constant on ELF. Write an + expression suitable for an 'addi' that adds in the low 16 + bits of the MEM. */ + if (GET_CODE (x) == CONST) + { + if (GET_CODE (XEXP (x, 0)) != PLUS + || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF + && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF) + || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) + output_operand_lossage ("invalid %%K value"); + } + print_operand_address (file, x); + fputs ("@l", file); + return; + + /* %l is output_asm_label. */ + + case 'L': + /* Write second word of DImode or DFmode reference. Works on register + or non-indexed memory only. */ + if (REG_P (x)) + fputs (reg_names[REGNO (x) + 1], file); + else if (MEM_P (x)) + { + machine_mode mode = GET_MODE (x); + /* Handle possible auto-increment. Since it is pre-increment and + we have already done it, we can just use an offset of word. */ + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC) + output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), + UNITS_PER_WORD)); + else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) + output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), + UNITS_PER_WORD)); + else + output_address (mode, XEXP (adjust_address_nv (x, SImode, + UNITS_PER_WORD), + 0)); + + if (small_data_operand (x, GET_MODE (x))) + fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, + reg_names[SMALL_DATA_REG]); + } + return; + + case 'N': + /* Write the number of elements in the vector times 4. */ + if (GET_CODE (x) != PARALLEL) + output_operand_lossage ("invalid %%N value"); + else + fprintf (file, "%d", XVECLEN (x, 0) * 4); + return; + + case 'O': + /* Similar, but subtract 1 first. */ + if (GET_CODE (x) != PARALLEL) + output_operand_lossage ("invalid %%O value"); + else + fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4); + return; + + case 'p': + /* X is a CONST_INT that is a power of two. Output the logarithm. */ + if (! INT_P (x) + || INTVAL (x) < 0 + || (i = exact_log2 (INTVAL (x))) < 0) + output_operand_lossage ("invalid %%p value"); + else + fprintf (file, "%d", i); + return; + + case 'P': + /* The operand must be an indirect memory reference. The result + is the register name. */ + if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG + || REGNO (XEXP (x, 0)) >= 32) + output_operand_lossage ("invalid %%P value"); + else + fputs (reg_names[REGNO (XEXP (x, 0))], file); + return; + + case 'q': + /* This outputs the logical code corresponding to a boolean + expression. The expression may have one or both operands + negated (if one, only the first one). For condition register + logical operations, it will also treat the negated + CR codes as NOTs, but not handle NOTs of them. */ + { + const char *const *t = 0; + const char *s; + enum rtx_code code = GET_CODE (x); + static const char * const tbl[3][3] = { + { "and", "andc", "nor" }, + { "or", "orc", "nand" }, + { "xor", "eqv", "xor" } }; + + if (code == AND) + t = tbl[0]; + else if (code == IOR) + t = tbl[1]; + else if (code == XOR) + t = tbl[2]; + else + output_operand_lossage ("invalid %%q value"); + + if (GET_CODE (XEXP (x, 0)) != NOT) + s = t[0]; + else + { + if (GET_CODE (XEXP (x, 1)) == NOT) + s = t[2]; + else + s = t[1]; + } + + fputs (s, file); + } + return; + + case 'Q': + if (! TARGET_MFCRF) + return; + fputc (',', file); + /* FALLTHRU */ + + case 'R': + /* X is a CR register. Print the mask for `mtcrf'. */ + if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) + output_operand_lossage ("invalid %%R value"); + else + fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO)); + return; + + case 's': + /* Low 5 bits of 32 - value */ + if (! INT_P (x)) + output_operand_lossage ("invalid %%s value"); + else + fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31); + return; + + case 't': + /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */ + gcc_assert (REG_P (x) && GET_MODE (x) == CCmode); + + /* Bit 3 is OV bit. */ + i = 4 * (REGNO (x) - CR0_REGNO) + 3; + + /* If we want bit 31, write a shift count of zero, not 32. */ + fprintf (file, "%d", i == 31 ? 0 : i + 1); + return; + + case 'T': + /* Print the symbolic name of a branch target register. */ + if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO + && REGNO (x) != CTR_REGNO)) + output_operand_lossage ("invalid %%T value"); + else if (REGNO (x) == LR_REGNO) + fputs ("lr", file); + else + fputs ("ctr", file); + return; + + case 'u': + /* High-order or low-order 16 bits of constant, whichever is non-zero, + for use in unsigned operand. */ + if (! INT_P (x)) + { + output_operand_lossage ("invalid %%u value"); + return; + } + + uval = INTVAL (x); + if ((uval & 0xffff) == 0) + uval >>= 16; + + fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff); + return; + + case 'v': + /* High-order 16 bits of constant for use in signed operand. */ + if (! INT_P (x)) + output_operand_lossage ("invalid %%v value"); + else + fprintf (file, HOST_WIDE_INT_PRINT_HEX, + (INTVAL (x) >> 16) & 0xffff); + return; + + case 'U': + /* Print `u' if this has an auto-increment or auto-decrement. */ + if (MEM_P (x) + && (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC + || GET_CODE (XEXP (x, 0)) == PRE_MODIFY)) + putc ('u', file); + return; + + case 'V': + /* Print the trap code for this operand. */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("eq", file); /* 4 */ + break; + case NE: + fputs ("ne", file); /* 24 */ + break; + case LT: + fputs ("lt", file); /* 16 */ + break; + case LE: + fputs ("le", file); /* 20 */ + break; + case GT: + fputs ("gt", file); /* 8 */ + break; + case GE: + fputs ("ge", file); /* 12 */ + break; + case LTU: + fputs ("llt", file); /* 2 */ + break; + case LEU: + fputs ("lle", file); /* 6 */ + break; + case GTU: + fputs ("lgt", file); /* 1 */ + break; + case GEU: + fputs ("lge", file); /* 5 */ + break; + default: + gcc_unreachable (); + } + break; + + case 'w': + /* If constant, low-order 16 bits of constant, signed. Otherwise, write + normally. */ + if (INT_P (x)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000); + else + print_operand (file, x, 0); + return; + + case 'x': + /* X is a FPR or Altivec register used in a VSX context. */ + if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x))) + output_operand_lossage ("invalid %%x value"); + else + { + int reg = REGNO (x); + int vsx_reg = (FP_REGNO_P (reg) + ? reg - 32 + : reg - FIRST_ALTIVEC_REGNO + 32); + +#ifdef TARGET_REGNAMES + if (TARGET_REGNAMES) + fprintf (file, "%%vs%d", vsx_reg); + else +#endif + fprintf (file, "%d", vsx_reg); + } + return; + + case 'X': + if (MEM_P (x) + && (legitimate_indexed_address_p (XEXP (x, 0), 0) + || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY + && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0)))) + putc ('x', file); + return; + + case 'Y': + /* Like 'L', for third word of TImode/PTImode */ + if (REG_P (x)) + fputs (reg_names[REGNO (x) + 2], file); + else if (MEM_P (x)) + { + machine_mode mode = GET_MODE (x); + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC) + output_address (mode, plus_constant (Pmode, + XEXP (XEXP (x, 0), 0), 8)); + else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) + output_address (mode, plus_constant (Pmode, + XEXP (XEXP (x, 0), 0), 8)); + else + output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0)); + if (small_data_operand (x, GET_MODE (x))) + fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, + reg_names[SMALL_DATA_REG]); + } + return; + + case 'z': + /* X is a SYMBOL_REF. Write out the name preceded by a + period and without any trailing data in brackets. Used for function + names. If we are configured for System V (or the embedded ABI) on + the PowerPC, do not emit the period, since those systems do not use + TOCs and the like. */ + gcc_assert (GET_CODE (x) == SYMBOL_REF); + + /* For macho, check to see if we need a stub. */ + if (TARGET_MACHO) + { + const char *name = XSTR (x, 0); +#if TARGET_MACHO + if (darwin_emit_branch_islands + && MACHOPIC_INDIRECT + && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) + name = machopic_indirection_name (x, /*stub_p=*/true); +#endif + assemble_name (file, name); + } + else if (!DOT_SYMBOLS) + assemble_name (file, XSTR (x, 0)); + else + rs6000_output_function_entry (file, XSTR (x, 0)); + return; + + case 'Z': + /* Like 'L', for last word of TImode/PTImode. */ + if (REG_P (x)) + fputs (reg_names[REGNO (x) + 3], file); + else if (MEM_P (x)) + { + machine_mode mode = GET_MODE (x); + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC) + output_address (mode, plus_constant (Pmode, + XEXP (XEXP (x, 0), 0), 12)); + else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) + output_address (mode, plus_constant (Pmode, + XEXP (XEXP (x, 0), 0), 12)); + else + output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0)); + if (small_data_operand (x, GET_MODE (x))) + fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, + reg_names[SMALL_DATA_REG]); + } + return; + + /* Print AltiVec or SPE memory operand. */ + case 'y': + { + rtx tmp; + + gcc_assert (MEM_P (x)); + + tmp = XEXP (x, 0); + + /* Ugly hack because %y is overloaded. */ + if ((TARGET_SPE || TARGET_E500_DOUBLE) + && (GET_MODE_SIZE (GET_MODE (x)) == 8 + || FLOAT128_2REG_P (GET_MODE (x)) + || GET_MODE (x) == TImode + || GET_MODE (x) == PTImode)) + { + /* Handle [reg]. */ + if (REG_P (tmp)) + { + fprintf (file, "0(%s)", reg_names[REGNO (tmp)]); + break; + } + /* Handle [reg+UIMM]. */ + else if (GET_CODE (tmp) == PLUS && + GET_CODE (XEXP (tmp, 1)) == CONST_INT) + { + int x; + + gcc_assert (REG_P (XEXP (tmp, 0))); + + x = INTVAL (XEXP (tmp, 1)); + fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]); + break; + } + + /* Fall through. Must be [reg+reg]. */ + } + if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x)) + && GET_CODE (tmp) == AND + && GET_CODE (XEXP (tmp, 1)) == CONST_INT + && INTVAL (XEXP (tmp, 1)) == -16) + tmp = XEXP (tmp, 0); + else if (VECTOR_MEM_VSX_P (GET_MODE (x)) + && GET_CODE (tmp) == PRE_MODIFY) + tmp = XEXP (tmp, 1); + if (REG_P (tmp)) + fprintf (file, "0,%s", reg_names[REGNO (tmp)]); + else + { + if (GET_CODE (tmp) != PLUS + || !REG_P (XEXP (tmp, 0)) + || !REG_P (XEXP (tmp, 1))) + { + output_operand_lossage ("invalid %%y value, try using the 'Z' constraint"); + break; + } + + if (REGNO (XEXP (tmp, 0)) == 0) + fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ], + reg_names[ REGNO (XEXP (tmp, 0)) ]); + else + fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ], + reg_names[ REGNO (XEXP (tmp, 1)) ]); + } + break; + } + + case 0: + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (MEM_P (x)) + { + /* We need to handle PRE_INC and PRE_DEC here, since we need to + know the width from the mode. */ + if (GET_CODE (XEXP (x, 0)) == PRE_INC) + fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)), + reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); + else if (GET_CODE (XEXP (x, 0)) == PRE_DEC) + fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)), + reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); + else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) + output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1)); + else + output_address (GET_MODE (x), XEXP (x, 0)); + } + else + { + if (toc_relative_expr_p (x, false)) + /* This hack along with a corresponding hack in + rs6000_output_addr_const_extra arranges to output addends + where the assembler expects to find them. eg. + (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4) + without this hack would be output as "x@toc+4". We + want "x+4@toc". */ + output_addr_const (file, CONST_CAST_RTX (tocrel_base)); + else + output_addr_const (file, x); + } + return; + + case '&': + if (const char *name = get_some_local_dynamic_name ()) + assemble_name (file, name); + else + output_operand_lossage ("'%%&' used without any " + "local dynamic TLS references"); + return; + + default: + output_operand_lossage ("invalid %%xn code"); + } +} + +/* Print the address of an operand. */ + +void +print_operand_address (FILE *file, rtx x) +{ + if (REG_P (x)) + fprintf (file, "0(%s)", reg_names[ REGNO (x) ]); + else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST + || GET_CODE (x) == LABEL_REF) + { + output_addr_const (file, x); + if (small_data_operand (x, GET_MODE (x))) + fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, + reg_names[SMALL_DATA_REG]); + else + gcc_assert (!TARGET_TOC); + } + else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) + && REG_P (XEXP (x, 1))) + { + if (REGNO (XEXP (x, 0)) == 0) + fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ], + reg_names[ REGNO (XEXP (x, 0)) ]); + else + fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ], + reg_names[ REGNO (XEXP (x, 1)) ]); + } + else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == CONST_INT) + fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", + INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]); +#if TARGET_MACHO + else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) + && CONSTANT_P (XEXP (x, 1))) + { + fprintf (file, "lo16("); + output_addr_const (file, XEXP (x, 1)); + fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); + } +#endif +#if TARGET_ELF + else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) + && CONSTANT_P (XEXP (x, 1))) + { + output_addr_const (file, XEXP (x, 1)); + fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); + } +#endif + else if (toc_relative_expr_p (x, false)) + { + /* This hack along with a corresponding hack in + rs6000_output_addr_const_extra arranges to output addends + where the assembler expects to find them. eg. + (lo_sum (reg 9) + . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8)) + without this hack would be output as "x@toc+8@l(9)". We + want "x+8@toc@l(9)". */ + output_addr_const (file, CONST_CAST_RTX (tocrel_base)); + if (GET_CODE (x) == LO_SUM) + fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]); + else + fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]); + } + else + gcc_unreachable (); +} + +/* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +rs6000_output_addr_const_extra (FILE *file, rtx x) +{ + if (GET_CODE (x) == UNSPEC) + switch (XINT (x, 1)) + { + case UNSPEC_TOCREL: + gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF + && REG_P (XVECEXP (x, 0, 1)) + && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER); + output_addr_const (file, XVECEXP (x, 0, 0)); + if (x == tocrel_base && tocrel_offset != const0_rtx) + { + if (INTVAL (tocrel_offset) >= 0) + fprintf (file, "+"); + output_addr_const (file, CONST_CAST_RTX (tocrel_offset)); + } + if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC)) + { + putc ('-', file); + assemble_name (file, toc_label_name); + need_toc_init = 1; + } + else if (TARGET_ELF) + fputs ("@toc", file); + return true; + +#if TARGET_MACHO + case UNSPEC_MACHOPIC_OFFSET: + output_addr_const (file, XVECEXP (x, 0, 0)); + putc ('-', file); + machopic_output_function_base_name (file); + return true; +#endif + } + return false; +} + +/* Target hook for assembling integer objects. The PowerPC version has + to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP + is defined. It also needs to handle DI-mode objects on 64-bit + targets. */ + +static bool +rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ +#ifdef RELOCATABLE_NEEDS_FIXUP + /* Special handling for SI values. */ + if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p) + { + static int recurse = 0; + + /* For -mrelocatable, we mark all addresses that need to be fixed up in + the .fixup section. Since the TOC section is already relocated, we + don't need to mark it here. We used to skip the text section, but it + should never be valid for relocated addresses to be placed in the text + section. */ + if (DEFAULT_ABI == ABI_V4 + && (TARGET_RELOCATABLE || flag_pic > 1) + && in_section != toc_section + && !recurse + && !CONST_SCALAR_INT_P (x) + && CONSTANT_P (x)) + { + char buf[256]; + + recurse = 1; + ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno); + fixuplabelno++; + ASM_OUTPUT_LABEL (asm_out_file, buf); + fprintf (asm_out_file, "\t.long\t("); + output_addr_const (asm_out_file, x); + fprintf (asm_out_file, ")@fixup\n"); + fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n"); + ASM_OUTPUT_ALIGN (asm_out_file, 2); + fprintf (asm_out_file, "\t.long\t"); + assemble_name (asm_out_file, buf); + fprintf (asm_out_file, "\n\t.previous\n"); + recurse = 0; + return true; + } + /* Remove initial .'s to turn a -mcall-aixdesc function + address into the address of the descriptor, not the function + itself. */ + else if (GET_CODE (x) == SYMBOL_REF + && XSTR (x, 0)[0] == '.' + && DEFAULT_ABI == ABI_AIX) + { + const char *name = XSTR (x, 0); + while (*name == '.') + name++; + + fprintf (asm_out_file, "\t.long\t%s\n", name); + return true; + } + } +#endif /* RELOCATABLE_NEEDS_FIXUP */ + return default_assemble_integer (x, size, aligned_p); +} + +#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO +/* Emit an assembler directive to set symbol visibility for DECL to + VISIBILITY_TYPE. */ + +static void +rs6000_assemble_visibility (tree decl, int vis) +{ + if (TARGET_XCOFF) + return; + + /* Functions need to have their entry point symbol visibility set as + well as their descriptor symbol visibility. */ + if (DEFAULT_ABI == ABI_AIX + && DOT_SYMBOLS + && TREE_CODE (decl) == FUNCTION_DECL) + { + static const char * const visibility_types[] = { + NULL, "protected", "hidden", "internal" + }; + + const char *name, *type; + + name = ((* targetm.strip_name_encoding) + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)))); + type = visibility_types[vis]; + + fprintf (asm_out_file, "\t.%s\t%s\n", type, name); + fprintf (asm_out_file, "\t.%s\t.%s\n", type, name); + } + else + default_assemble_visibility (decl, vis); +} +#endif + +enum rtx_code +rs6000_reverse_condition (machine_mode mode, enum rtx_code code) +{ + /* Reversal of FP compares takes care -- an ordered compare + becomes an unordered compare and vice versa. */ + if (mode == CCFPmode + && (!flag_finite_math_only + || code == UNLT || code == UNLE || code == UNGT || code == UNGE + || code == UNEQ || code == LTGT)) + return reverse_condition_maybe_unordered (code); + else + return reverse_condition (code); +} + +/* Generate a compare for CODE. Return a brand-new rtx that + represents the result of the compare. */ + +static rtx +rs6000_generate_compare (rtx cmp, machine_mode mode) +{ + machine_mode comp_mode; + rtx compare_result; + enum rtx_code code = GET_CODE (cmp); + rtx op0 = XEXP (cmp, 0); + rtx op1 = XEXP (cmp, 1); + + if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) + comp_mode = CCmode; + else if (FLOAT_MODE_P (mode)) + comp_mode = CCFPmode; + else if (code == GTU || code == LTU + || code == GEU || code == LEU) + comp_mode = CCUNSmode; + else if ((code == EQ || code == NE) + && unsigned_reg_p (op0) + && (unsigned_reg_p (op1) + || (CONST_INT_P (op1) && INTVAL (op1) != 0))) + /* These are unsigned values, perhaps there will be a later + ordering compare that can be shared with this one. */ + comp_mode = CCUNSmode; + else + comp_mode = CCmode; + + /* If we have an unsigned compare, make sure we don't have a signed value as + an immediate. */ + if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT + && INTVAL (op1) < 0) + { + op0 = copy_rtx_if_shared (op0); + op1 = force_reg (GET_MODE (op0), op1); + cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1); + } + + /* First, the compare. */ + compare_result = gen_reg_rtx (comp_mode); + + /* E500 FP compare instructions on the GPRs. Yuck! */ + if ((!TARGET_FPRS && TARGET_HARD_FLOAT) + && FLOAT_MODE_P (mode)) + { + rtx cmp, or_result, compare_result2; + machine_mode op_mode = GET_MODE (op0); + bool reverse_p; + + if (op_mode == VOIDmode) + op_mode = GET_MODE (op1); + + /* First reverse the condition codes that aren't directly supported. */ + switch (code) + { + case NE: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + code = reverse_condition_maybe_unordered (code); + reverse_p = true; + break; + + case EQ: + case LT: + case LE: + case GT: + case GE: + reverse_p = false; + break; + + default: + gcc_unreachable (); + } + + /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only. + This explains the following mess. */ + + switch (code) + { + case EQ: + switch (op_mode) + { + case SFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstsfeq_gpr (compare_result, op0, op1) + : gen_cmpsfeq_gpr (compare_result, op0, op1); + break; + + case DFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstdfeq_gpr (compare_result, op0, op1) + : gen_cmpdfeq_gpr (compare_result, op0, op1); + break; + + case TFmode: + case IFmode: + case KFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tsttfeq_gpr (compare_result, op0, op1) + : gen_cmptfeq_gpr (compare_result, op0, op1); + break; + + default: + gcc_unreachable (); + } + break; + + case GT: + case GE: + switch (op_mode) + { + case SFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstsfgt_gpr (compare_result, op0, op1) + : gen_cmpsfgt_gpr (compare_result, op0, op1); + break; + + case DFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstdfgt_gpr (compare_result, op0, op1) + : gen_cmpdfgt_gpr (compare_result, op0, op1); + break; + + case TFmode: + case IFmode: + case KFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tsttfgt_gpr (compare_result, op0, op1) + : gen_cmptfgt_gpr (compare_result, op0, op1); + break; + + default: + gcc_unreachable (); + } + break; + + case LT: + case LE: + switch (op_mode) + { + case SFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstsflt_gpr (compare_result, op0, op1) + : gen_cmpsflt_gpr (compare_result, op0, op1); + break; + + case DFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstdflt_gpr (compare_result, op0, op1) + : gen_cmpdflt_gpr (compare_result, op0, op1); + break; + + case TFmode: + case IFmode: + case KFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tsttflt_gpr (compare_result, op0, op1) + : gen_cmptflt_gpr (compare_result, op0, op1); + break; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + + /* Synthesize LE and GE from LT/GT || EQ. */ + if (code == LE || code == GE) + { + emit_insn (cmp); + + compare_result2 = gen_reg_rtx (CCFPmode); + + /* Do the EQ. */ + switch (op_mode) + { + case SFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstsfeq_gpr (compare_result2, op0, op1) + : gen_cmpsfeq_gpr (compare_result2, op0, op1); + break; + + case DFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tstdfeq_gpr (compare_result2, op0, op1) + : gen_cmpdfeq_gpr (compare_result2, op0, op1); + break; + + case TFmode: + case IFmode: + case KFmode: + cmp = (flag_finite_math_only && !flag_trapping_math) + ? gen_tsttfeq_gpr (compare_result2, op0, op1) + : gen_cmptfeq_gpr (compare_result2, op0, op1); + break; + + default: + gcc_unreachable (); + } + + emit_insn (cmp); + + /* OR them together. */ + or_result = gen_reg_rtx (CCFPmode); + cmp = gen_e500_cr_ior_compare (or_result, compare_result, + compare_result2); + compare_result = or_result; + } + + code = reverse_p ? NE : EQ; + + emit_insn (cmp); + } + + /* IEEE 128-bit support in VSX registers when we do not have hardware + support. */ + else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) + { + rtx libfunc = NULL_RTX; + bool check_nan = false; + rtx dest; + + switch (code) + { + case EQ: + case NE: + libfunc = optab_libfunc (eq_optab, mode); + break; + + case GT: + case GE: + libfunc = optab_libfunc (ge_optab, mode); + break; + + case LT: + case LE: + libfunc = optab_libfunc (le_optab, mode); + break; + + case UNORDERED: + case ORDERED: + libfunc = optab_libfunc (unord_optab, mode); + code = (code == UNORDERED) ? NE : EQ; + break; + + case UNGE: + case UNGT: + check_nan = true; + libfunc = optab_libfunc (ge_optab, mode); + code = (code == UNGE) ? GE : GT; + break; + + case UNLE: + case UNLT: + check_nan = true; + libfunc = optab_libfunc (le_optab, mode); + code = (code == UNLE) ? LE : LT; + break; + + case UNEQ: + case LTGT: + check_nan = true; + libfunc = optab_libfunc (eq_optab, mode); + code = (code = UNEQ) ? EQ : NE; + break; + + default: + gcc_unreachable (); + } + + gcc_assert (libfunc); + + if (!check_nan) + dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, + SImode, 2, op0, mode, op1, mode); + + /* The library signals an exception for signalling NaNs, so we need to + handle isgreater, etc. by first checking isordered. */ + else + { + rtx ne_rtx, normal_dest, unord_dest; + rtx unord_func = optab_libfunc (unord_optab, mode); + rtx join_label = gen_label_rtx (); + rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label); + rtx unord_cmp = gen_reg_rtx (comp_mode); + + + /* Test for either value being a NaN. */ + gcc_assert (unord_func); + unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST, + SImode, 2, op0, mode, op1, + mode); + + /* Set value (0) if either value is a NaN, and jump to the join + label. */ + dest = gen_reg_rtx (SImode); + emit_move_insn (dest, const1_rtx); + emit_insn (gen_rtx_SET (unord_cmp, + gen_rtx_COMPARE (comp_mode, unord_dest, + const0_rtx))); + + ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, + join_ref, + pc_rtx))); + + /* Do the normal comparison, knowing that the values are not + NaNs. */ + normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, + SImode, 2, op0, mode, op1, + mode); + + emit_insn (gen_cstoresi4 (dest, + gen_rtx_fmt_ee (code, SImode, normal_dest, + const0_rtx), + normal_dest, const0_rtx)); + + /* Join NaN and non-Nan paths. Compare dest against 0. */ + emit_label (join_label); + code = NE; + } + + emit_insn (gen_rtx_SET (compare_result, + gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); + } + + else + { + /* Generate XLC-compatible TFmode compare as PARALLEL with extra + CLOBBERs to match cmptf_internal2 pattern. */ + if (comp_mode == CCFPmode && TARGET_XL_COMPAT + && FLOAT128_IBM_P (GET_MODE (op0)) + && TARGET_HARD_FLOAT && TARGET_FPRS) + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (10, + gen_rtx_SET (compare_result, + gen_rtx_COMPARE (comp_mode, op0, op1)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode))))); + else if (GET_CODE (op1) == UNSPEC + && XINT (op1, 1) == UNSPEC_SP_TEST) + { + rtx op1b = XVECEXP (op1, 0, 0); + comp_mode = CCEQmode; + compare_result = gen_reg_rtx (CCEQmode); + if (TARGET_64BIT) + emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b)); + else + emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b)); + } + else + emit_insn (gen_rtx_SET (compare_result, + gen_rtx_COMPARE (comp_mode, op0, op1))); + } + + /* Some kinds of FP comparisons need an OR operation; + under flag_finite_math_only we don't bother. */ + if (FLOAT_MODE_P (mode) + && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW) + && !flag_finite_math_only + && !(TARGET_HARD_FLOAT && !TARGET_FPRS) + && (code == LE || code == GE + || code == UNEQ || code == LTGT + || code == UNGT || code == UNLT)) + { + enum rtx_code or1, or2; + rtx or1_rtx, or2_rtx, compare2_rtx; + rtx or_result = gen_reg_rtx (CCEQmode); + + switch (code) + { + case LE: or1 = LT; or2 = EQ; break; + case GE: or1 = GT; or2 = EQ; break; + case UNEQ: or1 = UNORDERED; or2 = EQ; break; + case LTGT: or1 = LT; or2 = GT; break; + case UNGT: or1 = UNORDERED; or2 = GT; break; + case UNLT: or1 = UNORDERED; or2 = LT; break; + default: gcc_unreachable (); + } + validate_condition_mode (or1, comp_mode); + validate_condition_mode (or2, comp_mode); + or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx); + or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx); + compare2_rtx = gen_rtx_COMPARE (CCEQmode, + gen_rtx_IOR (SImode, or1_rtx, or2_rtx), + const_true_rtx); + emit_insn (gen_rtx_SET (or_result, compare2_rtx)); + + compare_result = or_result; + code = EQ; + } + + validate_condition_mode (code, GET_MODE (compare_result)); + + return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx); +} + + +/* Return the diagnostic message string if the binary operation OP is + not permitted on TYPE1 and TYPE2, NULL otherwise. */ + +static const char* +rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED, + const_tree type1, + const_tree type2) +{ + enum machine_mode mode1 = TYPE_MODE (type1); + enum machine_mode mode2 = TYPE_MODE (type2); + + /* For complex modes, use the inner type. */ + if (COMPLEX_MODE_P (mode1)) + mode1 = GET_MODE_INNER (mode1); + + if (COMPLEX_MODE_P (mode2)) + mode2 = GET_MODE_INNER (mode2); + + /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended + double to intermix unless -mfloat128-convert. */ + if (mode1 == mode2) + return NULL; + + if (!TARGET_FLOAT128_CVT) + { + if ((mode1 == KFmode && mode2 == IFmode) + || (mode1 == IFmode && mode2 == KFmode)) + return N_("__float128 and __ibm128 cannot be used in the same " + "expression"); + + if (TARGET_IEEEQUAD + && ((mode1 == IFmode && mode2 == TFmode) + || (mode1 == TFmode && mode2 == IFmode))) + return N_("__ibm128 and long double cannot be used in the same " + "expression"); + + if (!TARGET_IEEEQUAD + && ((mode1 == KFmode && mode2 == TFmode) + || (mode1 == TFmode && mode2 == KFmode))) + return N_("__float128 and long double cannot be used in the same " + "expression"); + } + + return NULL; +} + + +/* Expand floating point conversion to/from __float128 and __ibm128. */ + +void +rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) +{ + machine_mode dest_mode = GET_MODE (dest); + machine_mode src_mode = GET_MODE (src); + convert_optab cvt = unknown_optab; + bool do_move = false; + rtx libfunc = NULL_RTX; + rtx dest2; + typedef rtx (*rtx_2func_t) (rtx, rtx); + rtx_2func_t hw_convert = (rtx_2func_t)0; + size_t kf_or_tf; + + struct hw_conv_t { + rtx_2func_t from_df; + rtx_2func_t from_sf; + rtx_2func_t from_si_sign; + rtx_2func_t from_si_uns; + rtx_2func_t from_di_sign; + rtx_2func_t from_di_uns; + rtx_2func_t to_df; + rtx_2func_t to_sf; + rtx_2func_t to_si_sign; + rtx_2func_t to_si_uns; + rtx_2func_t to_di_sign; + rtx_2func_t to_di_uns; + } hw_conversions[2] = { + /* convertions to/from KFmode */ + { + gen_extenddfkf2_hw, /* KFmode <- DFmode. */ + gen_extendsfkf2_hw, /* KFmode <- SFmode. */ + gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */ + gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */ + gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */ + gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */ + gen_trunckfdf2_hw, /* DFmode <- KFmode. */ + gen_trunckfsf2_hw, /* SFmode <- KFmode. */ + gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */ + gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */ + gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */ + gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */ + }, + + /* convertions to/from TFmode */ + { + gen_extenddftf2_hw, /* TFmode <- DFmode. */ + gen_extendsftf2_hw, /* TFmode <- SFmode. */ + gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */ + gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */ + gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */ + gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */ + gen_trunctfdf2_hw, /* DFmode <- TFmode. */ + gen_trunctfsf2_hw, /* SFmode <- TFmode. */ + gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */ + gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */ + gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */ + gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */ + }, + }; + + if (dest_mode == src_mode) + gcc_unreachable (); + + /* Eliminate memory operations. */ + if (MEM_P (src)) + src = force_reg (src_mode, src); + + if (MEM_P (dest)) + { + rtx tmp = gen_reg_rtx (dest_mode); + rs6000_expand_float128_convert (tmp, src, unsigned_p); + rs6000_emit_move (dest, tmp, dest_mode); + return; + } + + /* Convert to IEEE 128-bit floating point. */ + if (FLOAT128_IEEE_P (dest_mode)) + { + if (dest_mode == KFmode) + kf_or_tf = 0; + else if (dest_mode == TFmode) + kf_or_tf = 1; + else + gcc_unreachable (); + + switch (src_mode) + { + case DFmode: + cvt = sext_optab; + hw_convert = hw_conversions[kf_or_tf].from_df; + break; + + case SFmode: + cvt = sext_optab; + hw_convert = hw_conversions[kf_or_tf].from_sf; + break; + + case KFmode: + case IFmode: + case TFmode: + if (FLOAT128_IBM_P (src_mode)) + cvt = sext_optab; + else + do_move = true; + break; + + case SImode: + if (unsigned_p) + { + cvt = ufloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_si_uns; + } + else + { + cvt = sfloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_si_sign; + } + break; + + case DImode: + if (unsigned_p) + { + cvt = ufloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_di_uns; + } + else + { + cvt = sfloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_di_sign; + } + break; + + default: + gcc_unreachable (); + } + } + + /* Convert from IEEE 128-bit floating point. */ + else if (FLOAT128_IEEE_P (src_mode)) + { + if (src_mode == KFmode) + kf_or_tf = 0; + else if (src_mode == TFmode) + kf_or_tf = 1; + else + gcc_unreachable (); + + switch (dest_mode) + { + case DFmode: + cvt = trunc_optab; + hw_convert = hw_conversions[kf_or_tf].to_df; + break; + + case SFmode: + cvt = trunc_optab; + hw_convert = hw_conversions[kf_or_tf].to_sf; + break; + + case KFmode: + case IFmode: + case TFmode: + if (FLOAT128_IBM_P (dest_mode)) + cvt = trunc_optab; + else + do_move = true; + break; + + case SImode: + if (unsigned_p) + { + cvt = ufix_optab; + hw_convert = hw_conversions[kf_or_tf].to_si_uns; + } + else + { + cvt = sfix_optab; + hw_convert = hw_conversions[kf_or_tf].to_si_sign; + } + break; + + case DImode: + if (unsigned_p) + { + cvt = ufix_optab; + hw_convert = hw_conversions[kf_or_tf].to_di_uns; + } + else + { + cvt = sfix_optab; + hw_convert = hw_conversions[kf_or_tf].to_di_sign; + } + break; + + default: + gcc_unreachable (); + } + } + + /* Both IBM format. */ + else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode)) + do_move = true; + + else + gcc_unreachable (); + + /* Handle conversion between TFmode/KFmode. */ + if (do_move) + emit_move_insn (dest, gen_lowpart (dest_mode, src)); + + /* Handle conversion if we have hardware support. */ + else if (TARGET_FLOAT128_HW && hw_convert) + emit_insn ((hw_convert) (dest, src)); + + /* Call an external function to do the conversion. */ + else if (cvt != unknown_optab) + { + libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); + gcc_assert (libfunc != NULL_RTX); + + dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src, + src_mode); + + gcc_assert (dest2 != NULL_RTX); + if (!rtx_equal_p (dest, dest2)) + emit_move_insn (dest, dest2); + } + + else + gcc_unreachable (); + + return; +} + + +/* Emit the RTL for an sISEL pattern. */ + +void +rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[]) +{ + rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx); +} + +/* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH + can be used as that dest register. Return the dest register. */ + +rtx +rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch) +{ + if (op2 == const0_rtx) + return op1; + + if (GET_CODE (scratch) == SCRATCH) + scratch = gen_reg_rtx (mode); + + if (logical_operand (op2, mode)) + emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2))); + else + emit_insn (gen_rtx_SET (scratch, + gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2)))); + + return scratch; +} + +void +rs6000_emit_sCOND (machine_mode mode, rtx operands[]) +{ + rtx condition_rtx; + machine_mode op_mode; + enum rtx_code cond_code; + rtx result = operands[0]; + + condition_rtx = rs6000_generate_compare (operands[1], mode); + cond_code = GET_CODE (condition_rtx); + + if (FLOAT_MODE_P (mode) + && !TARGET_FPRS && TARGET_HARD_FLOAT) + { + rtx t; + + PUT_MODE (condition_rtx, SImode); + t = XEXP (condition_rtx, 0); + + gcc_assert (cond_code == NE || cond_code == EQ); + + if (cond_code == NE) + emit_insn (gen_e500_flip_gt_bit (t, t)); + + emit_insn (gen_move_from_CR_gt_bit (result, t)); + return; + } + + if (cond_code == NE + || cond_code == GE || cond_code == LE + || cond_code == GEU || cond_code == LEU + || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE) + { + rtx not_result = gen_reg_rtx (CCEQmode); + rtx not_op, rev_cond_rtx; + machine_mode cc_mode; + + cc_mode = GET_MODE (XEXP (condition_rtx, 0)); + + rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code), + SImode, XEXP (condition_rtx, 0), const0_rtx); + not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); + emit_insn (gen_rtx_SET (not_result, not_op)); + condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); + } + + op_mode = GET_MODE (XEXP (operands[1], 0)); + if (op_mode == VOIDmode) + op_mode = GET_MODE (XEXP (operands[1], 1)); + + if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode))) + { + PUT_MODE (condition_rtx, DImode); + convert_move (result, condition_rtx, 0); + } + else + { + PUT_MODE (condition_rtx, SImode); + emit_insn (gen_rtx_SET (result, condition_rtx)); + } +} + +/* Emit a branch of kind CODE to location LOC. */ + +void +rs6000_emit_cbranch (machine_mode mode, rtx operands[]) +{ + rtx condition_rtx, loc_ref; + + condition_rtx = rs6000_generate_compare (operands[0], mode); + loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, + loc_ref, pc_rtx))); +} + +/* Return the string to output a conditional branch to LABEL, which is + the operand template of the label, or NULL if the branch is really a + conditional return. + + OP is the conditional expression. XEXP (OP, 0) is assumed to be a + condition code register and its mode specifies what kind of + comparison we made. + + REVERSED is nonzero if we should reverse the sense of the comparison. + + INSN is the insn. */ + +char * +output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn) +{ + static char string[64]; + enum rtx_code code = GET_CODE (op); + rtx cc_reg = XEXP (op, 0); + machine_mode mode = GET_MODE (cc_reg); + int cc_regno = REGNO (cc_reg) - CR0_REGNO; + int need_longbranch = label != NULL && get_attr_length (insn) == 8; + int really_reversed = reversed ^ need_longbranch; + char *s = string; + const char *ccode; + const char *pred; + rtx note; + + validate_condition_mode (code, mode); + + /* Work out which way this really branches. We could use + reverse_condition_maybe_unordered here always but this + makes the resulting assembler clearer. */ + if (really_reversed) + { + /* Reversal of FP compares takes care -- an ordered compare + becomes an unordered compare and vice versa. */ + if (mode == CCFPmode) + code = reverse_condition_maybe_unordered (code); + else + code = reverse_condition (code); + } + + if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode) + { + /* The efscmp/tst* instructions twiddle bit 2, which maps nicely + to the GT bit. */ + switch (code) + { + case EQ: + /* Opposite of GT. */ + code = GT; + break; + + case NE: + code = UNLE; + break; + + default: + gcc_unreachable (); + } + } + + switch (code) + { + /* Not all of these are actually distinct opcodes, but + we distinguish them for clarity of the resulting assembler. */ + case NE: case LTGT: + ccode = "ne"; break; + case EQ: case UNEQ: + ccode = "eq"; break; + case GE: case GEU: + ccode = "ge"; break; + case GT: case GTU: case UNGT: + ccode = "gt"; break; + case LE: case LEU: + ccode = "le"; break; + case LT: case LTU: case UNLT: + ccode = "lt"; break; + case UNORDERED: ccode = "un"; break; + case ORDERED: ccode = "nu"; break; + case UNGE: ccode = "nl"; break; + case UNLE: ccode = "ng"; break; + default: + gcc_unreachable (); + } + + /* Maybe we have a guess as to how likely the branch is. */ + pred = ""; + note = find_reg_note (insn, REG_BR_PROB, NULL_RTX); + if (note != NULL_RTX) + { + /* PROB is the difference from 50%. */ + int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2; + + /* Only hint for highly probable/improbable branches on newer cpus when + we have real profile data, as static prediction overrides processor + dynamic prediction. For older cpus we may as well always hint, but + assume not taken for branches that are very close to 50% as a + mispredicted taken branch is more expensive than a + mispredicted not-taken branch. */ + if (rs6000_always_hint + || (abs (prob) > REG_BR_PROB_BASE / 100 * 48 + && (profile_status_for_fn (cfun) != PROFILE_GUESSED) + && br_prob_note_reliable_p (note))) + { + if (abs (prob) > REG_BR_PROB_BASE / 20 + && ((prob > 0) ^ need_longbranch)) + pred = "+"; + else + pred = "-"; + } + } + + if (label == NULL) + s += sprintf (s, "b%slr%s ", ccode, pred); + else + s += sprintf (s, "b%s%s ", ccode, pred); + + /* We need to escape any '%' characters in the reg_names string. + Assume they'd only be the first character.... */ + if (reg_names[cc_regno + CR0_REGNO][0] == '%') + *s++ = '%'; + s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]); + + if (label != NULL) + { + /* If the branch distance was too far, we may have to use an + unconditional branch to go the distance. */ + if (need_longbranch) + s += sprintf (s, ",$+8\n\tb %s", label); + else + s += sprintf (s, ",%s", label); + } + + return string; +} + +/* Return the string to flip the GT bit on a CR. */ +char * +output_e500_flip_gt_bit (rtx dst, rtx src) +{ + static char string[64]; + int a, b; + + gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst)) + && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src))); + + /* GT bit. */ + a = 4 * (REGNO (dst) - CR0_REGNO) + 1; + b = 4 * (REGNO (src) - CR0_REGNO) + 1; + + sprintf (string, "crnot %d,%d", a, b); + return string; +} + +/* Return insn for VSX or Altivec comparisons. */ + +static rtx +rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1) +{ + rtx mask; + machine_mode mode = GET_MODE (op0); + + switch (code) + { + default: + break; + + case GE: + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + return NULL_RTX; + /* FALLTHRU */ + + case EQ: + case GT: + case GTU: + case ORDERED: + case UNORDERED: + case UNEQ: + case LTGT: + mask = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1))); + return mask; + } + + return NULL_RTX; +} + +/* Emit vector compare for operands OP0 and OP1 using code RCODE. + DMODE is expected destination mode. This is a recursive function. */ + +static rtx +rs6000_emit_vector_compare (enum rtx_code rcode, + rtx op0, rtx op1, + machine_mode dmode) +{ + rtx mask; + bool swap_operands = false; + bool try_again = false; + + gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode)); + gcc_assert (GET_MODE (op0) == GET_MODE (op1)); + + /* See if the comparison works as is. */ + mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); + if (mask) + return mask; + + switch (rcode) + { + case LT: + rcode = GT; + swap_operands = true; + try_again = true; + break; + case LTU: + rcode = GTU; + swap_operands = true; + try_again = true; + break; + case NE: + case UNLE: + case UNLT: + case UNGE: + case UNGT: + /* Invert condition and try again. + e.g., A != B becomes ~(A==B). */ + { + enum rtx_code rev_code; + enum insn_code nor_code; + rtx mask2; + + rev_code = reverse_condition_maybe_unordered (rcode); + if (rev_code == UNKNOWN) + return NULL_RTX; + + nor_code = optab_handler (one_cmpl_optab, dmode); + if (nor_code == CODE_FOR_nothing) + return NULL_RTX; + + mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode); + if (!mask2) + return NULL_RTX; + + mask = gen_reg_rtx (dmode); + emit_insn (GEN_FCN (nor_code) (mask, mask2)); + return mask; + } + break; + case GE: + case GEU: + case LE: + case LEU: + /* Try GT/GTU/LT/LTU OR EQ */ + { + rtx c_rtx, eq_rtx; + enum insn_code ior_code; + enum rtx_code new_code; + + switch (rcode) + { + case GE: + new_code = GT; + break; + + case GEU: + new_code = GTU; + break; + + case LE: + new_code = LT; + break; + + case LEU: + new_code = LTU; + break; + + default: + gcc_unreachable (); + } + + ior_code = optab_handler (ior_optab, dmode); + if (ior_code == CODE_FOR_nothing) + return NULL_RTX; + + c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode); + if (!c_rtx) + return NULL_RTX; + + eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode); + if (!eq_rtx) + return NULL_RTX; + + mask = gen_reg_rtx (dmode); + emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); + return mask; + } + break; + default: + return NULL_RTX; + } + + if (try_again) + { + if (swap_operands) + std::swap (op0, op1); + + mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); + if (mask) + return mask; + } + + /* You only get two chances. */ + return NULL_RTX; +} + +/* Emit vector conditional expression. DEST is destination. OP_TRUE and + OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two + operands for the relation operation COND. */ + +int +rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, + rtx cond, rtx cc_op0, rtx cc_op1) +{ + machine_mode dest_mode = GET_MODE (dest); + machine_mode mask_mode = GET_MODE (cc_op0); + enum rtx_code rcode = GET_CODE (cond); + machine_mode cc_mode = CCmode; + rtx mask; + rtx cond2; + bool invert_move = false; + + if (VECTOR_UNIT_NONE_P (dest_mode)) + return 0; + + gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode) + && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode)); + + switch (rcode) + { + /* Swap operands if we can, and fall back to doing the operation as + specified, and doing a NOR to invert the test. */ + case NE: + case UNLE: + case UNLT: + case UNGE: + case UNGT: + /* Invert condition and try again. + e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */ + invert_move = true; + rcode = reverse_condition_maybe_unordered (rcode); + if (rcode == UNKNOWN) + return 0; + break; + + case GE: + case LE: + if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT) + { + /* Invert condition to avoid compound test. */ + invert_move = true; + rcode = reverse_condition (rcode); + } + break; + + case GTU: + case GEU: + case LTU: + case LEU: + /* Mark unsigned tests with CCUNSmode. */ + cc_mode = CCUNSmode; + + /* Invert condition to avoid compound test if necessary. */ + if (rcode == GEU || rcode == LEU) + { + invert_move = true; + rcode = reverse_condition (rcode); + } + break; + + default: + break; + } + + /* Get the vector mask for the given relational operations. */ + mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode); + + if (!mask) + return 0; + + if (invert_move) + std::swap (op_true, op_false); + + /* Optimize vec1 == vec2, to know the mask generates -1/0. */ + if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT + && (GET_CODE (op_true) == CONST_VECTOR + || GET_CODE (op_false) == CONST_VECTOR)) + { + rtx constant_0 = CONST0_RTX (dest_mode); + rtx constant_m1 = CONSTM1_RTX (dest_mode); + + if (op_true == constant_m1 && op_false == constant_0) + { + emit_move_insn (dest, mask); + return 1; + } + + else if (op_true == constant_0 && op_false == constant_m1) + { + emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask))); + return 1; + } + + /* If we can't use the vector comparison directly, perhaps we can use + the mask for the true or false fields, instead of loading up a + constant. */ + if (op_true == constant_m1) + op_true = mask; + + if (op_false == constant_0) + op_false = mask; + } + + if (!REG_P (op_true) && !SUBREG_P (op_true)) + op_true = force_reg (dest_mode, op_true); + + if (!REG_P (op_false) && !SUBREG_P (op_false)) + op_false = force_reg (dest_mode, op_false); + + cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask), + CONST0_RTX (dest_mode)); + emit_insn (gen_rtx_SET (dest, + gen_rtx_IF_THEN_ELSE (dest_mode, + cond2, + op_true, + op_false))); + return 1; +} + +/* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction + for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last + comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the + hardware has no such operation. */ + +static int +rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond) +{ + enum rtx_code code = GET_CODE (op); + rtx op0 = XEXP (op, 0); + rtx op1 = XEXP (op, 1); + machine_mode compare_mode = GET_MODE (op0); + machine_mode result_mode = GET_MODE (dest); + bool max_p = false; + + if (result_mode != compare_mode) + return 0; + + if (code == GE || code == GT) + max_p = true; + else if (code == LE || code == LT) + max_p = false; + else + return 0; + + if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond)) + ; + + else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)) + max_p = !max_p; + + else + return 0; + + rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1); + return 1; +} + +/* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and + XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the + operands of the last comparison is nonzero/true, FALSE_COND if it is + zero/false. Return 0 if the hardware has no such operation. */ + +static int +rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) +{ + enum rtx_code code = GET_CODE (op); + rtx op0 = XEXP (op, 0); + rtx op1 = XEXP (op, 1); + machine_mode result_mode = GET_MODE (dest); + rtx compare_rtx; + rtx cmove_rtx; + rtx clobber_rtx; + + if (!can_create_pseudo_p ()) + return 0; + + switch (code) + { + case EQ: + case GE: + case GT: + break; + + case NE: + case LT: + case LE: + code = swap_condition (code); + std::swap (op0, op1); + break; + + default: + return 0; + } + + /* Generate: [(parallel [(set (dest) + (if_then_else (op (cmp1) (cmp2)) + (true) + (false))) + (clobber (scratch))])]. */ + + compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1); + cmove_rtx = gen_rtx_SET (dest, + gen_rtx_IF_THEN_ELSE (result_mode, + compare_rtx, + true_cond, + false_cond)); + + clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, cmove_rtx, clobber_rtx))); + + return 1; +} + +/* Emit a conditional move: move TRUE_COND to DEST if OP of the + operands of the last comparison is nonzero/true, FALSE_COND if it + is zero/false. Return 0 if the hardware has no such operation. */ + +int +rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) +{ + enum rtx_code code = GET_CODE (op); + rtx op0 = XEXP (op, 0); + rtx op1 = XEXP (op, 1); + machine_mode compare_mode = GET_MODE (op0); + machine_mode result_mode = GET_MODE (dest); + rtx temp; + bool is_against_zero; + + /* These modes should always match. */ + if (GET_MODE (op1) != compare_mode + /* In the isel case however, we can use a compare immediate, so + op1 may be a small constant. */ + && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode))) + return 0; + if (GET_MODE (true_cond) != result_mode) + return 0; + if (GET_MODE (false_cond) != result_mode) + return 0; + + /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */ + if (TARGET_P9_MINMAX + && (compare_mode == SFmode || compare_mode == DFmode) + && (result_mode == SFmode || result_mode == DFmode)) + { + if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond)) + return 1; + + if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond)) + return 1; + } + + /* Don't allow using floating point comparisons for integer results for + now. */ + if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode)) + return 0; + + /* First, work out if the hardware can do this at all, or + if it's too slow.... */ + if (!FLOAT_MODE_P (compare_mode)) + { + if (TARGET_ISEL) + return rs6000_emit_int_cmove (dest, op, true_cond, false_cond); + return 0; + } + else if (TARGET_HARD_FLOAT && !TARGET_FPRS + && SCALAR_FLOAT_MODE_P (compare_mode)) + return 0; + + is_against_zero = op1 == CONST0_RTX (compare_mode); + + /* A floating-point subtract might overflow, underflow, or produce + an inexact result, thus changing the floating-point flags, so it + can't be generated if we care about that. It's safe if one side + of the construct is zero, since then no subtract will be + generated. */ + if (SCALAR_FLOAT_MODE_P (compare_mode) + && flag_trapping_math && ! is_against_zero) + return 0; + + /* Eliminate half of the comparisons by switching operands, this + makes the remaining code simpler. */ + if (code == UNLT || code == UNGT || code == UNORDERED || code == NE + || code == LTGT || code == LT || code == UNLE) + { + code = reverse_condition_maybe_unordered (code); + temp = true_cond; + true_cond = false_cond; + false_cond = temp; + } + + /* UNEQ and LTGT take four instructions for a comparison with zero, + it'll probably be faster to use a branch here too. */ + if (code == UNEQ && HONOR_NANS (compare_mode)) + return 0; + + /* We're going to try to implement comparisons by performing + a subtract, then comparing against zero. Unfortunately, + Inf - Inf is NaN which is not zero, and so if we don't + know that the operand is finite and the comparison + would treat EQ different to UNORDERED, we can't do it. */ + if (HONOR_INFINITIES (compare_mode) + && code != GT && code != UNGE + && (GET_CODE (op1) != CONST_DOUBLE + || real_isinf (CONST_DOUBLE_REAL_VALUE (op1))) + /* Constructs of the form (a OP b ? a : b) are safe. */ + && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond)) + || (! rtx_equal_p (op0, true_cond) + && ! rtx_equal_p (op1, true_cond)))) + return 0; + + /* At this point we know we can use fsel. */ + + /* Reduce the comparison to a comparison against zero. */ + if (! is_against_zero) + { + temp = gen_reg_rtx (compare_mode); + emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1))); + op0 = temp; + op1 = CONST0_RTX (compare_mode); + } + + /* If we don't care about NaNs we can reduce some of the comparisons + down to faster ones. */ + if (! HONOR_NANS (compare_mode)) + switch (code) + { + case GT: + code = LE; + temp = true_cond; + true_cond = false_cond; + false_cond = temp; + break; + case UNGE: + code = GE; + break; + case UNEQ: + code = EQ; + break; + default: + break; + } + + /* Now, reduce everything down to a GE. */ + switch (code) + { + case GE: + break; + + case LE: + temp = gen_reg_rtx (compare_mode); + emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); + op0 = temp; + break; + + case ORDERED: + temp = gen_reg_rtx (compare_mode); + emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0))); + op0 = temp; + break; + + case EQ: + temp = gen_reg_rtx (compare_mode); + emit_insn (gen_rtx_SET (temp, + gen_rtx_NEG (compare_mode, + gen_rtx_ABS (compare_mode, op0)))); + op0 = temp; + break; + + case UNGE: + /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */ + temp = gen_reg_rtx (result_mode); + emit_insn (gen_rtx_SET (temp, + gen_rtx_IF_THEN_ELSE (result_mode, + gen_rtx_GE (VOIDmode, + op0, op1), + true_cond, false_cond))); + false_cond = true_cond; + true_cond = temp; + + temp = gen_reg_rtx (compare_mode); + emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); + op0 = temp; + break; + + case GT: + /* a GT 0 <-> (a GE 0 && -a UNLT 0) */ + temp = gen_reg_rtx (result_mode); + emit_insn (gen_rtx_SET (temp, + gen_rtx_IF_THEN_ELSE (result_mode, + gen_rtx_GE (VOIDmode, + op0, op1), + true_cond, false_cond))); + true_cond = false_cond; + false_cond = temp; + + temp = gen_reg_rtx (compare_mode); + emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); + op0 = temp; + break; + + default: + gcc_unreachable (); + } + + emit_insn (gen_rtx_SET (dest, + gen_rtx_IF_THEN_ELSE (result_mode, + gen_rtx_GE (VOIDmode, + op0, op1), + true_cond, false_cond))); + return 1; +} + +/* Same as above, but for ints (isel). */ + +static int +rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) +{ + rtx condition_rtx, cr; + machine_mode mode = GET_MODE (dest); + enum rtx_code cond_code; + rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx); + bool signedp; + + if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode)) + return 0; + + /* We still have to do the compare, because isel doesn't do a + compare, it just looks at the CRx bits set by a previous compare + instruction. */ + condition_rtx = rs6000_generate_compare (op, mode); + cond_code = GET_CODE (condition_rtx); + cr = XEXP (condition_rtx, 0); + signedp = GET_MODE (cr) == CCmode; + + isel_func = (mode == SImode + ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si) + : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di)); + + switch (cond_code) + { + case LT: case GT: case LTU: case GTU: case EQ: + /* isel handles these directly. */ + break; + + default: + /* We need to swap the sense of the comparison. */ + { + std::swap (false_cond, true_cond); + PUT_CODE (condition_rtx, reverse_condition (cond_code)); + } + break; + } + + false_cond = force_reg (mode, false_cond); + if (true_cond != const0_rtx) + true_cond = force_reg (mode, true_cond); + + emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr)); + + return 1; +} + +const char * +output_isel (rtx *operands) +{ + enum rtx_code code; + + code = GET_CODE (operands[1]); + + if (code == GE || code == GEU || code == LE || code == LEU || code == NE) + { + gcc_assert (GET_CODE (operands[2]) == REG + && GET_CODE (operands[3]) == REG); + PUT_CODE (operands[1], reverse_condition (code)); + return "isel %0,%3,%2,%j1"; + } + + return "isel %0,%2,%3,%j1"; +} + +void +rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) +{ + machine_mode mode = GET_MODE (op0); + enum rtx_code c; + rtx target; + + /* VSX/altivec have direct min/max insns. */ + if ((code == SMAX || code == SMIN) + && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) + || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode)))) + { + emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1))); + return; + } + + if (code == SMAX || code == SMIN) + c = GE; + else + c = GEU; + + if (code == SMAX || code == UMAX) + target = emit_conditional_move (dest, c, op0, op1, mode, + op0, op1, mode, 0); + else + target = emit_conditional_move (dest, c, op0, op1, mode, + op1, op0, mode, 0); + gcc_assert (target); + if (target != dest) + emit_move_insn (dest, target); +} + +/* Split a signbit operation on 64-bit machines with direct move. Also allow + for the value to come from memory or if it is already loaded into a GPR. */ + +void +rs6000_split_signbit (rtx dest, rtx src) +{ + machine_mode d_mode = GET_MODE (dest); + machine_mode s_mode = GET_MODE (src); + rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest); + rtx shift_reg = dest_di; + + gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64); + + if (MEM_P (src)) + { + rtx mem = (WORDS_BIG_ENDIAN + ? adjust_address (src, DImode, 0) + : adjust_address (src, DImode, 8)); + emit_insn (gen_rtx_SET (dest_di, mem)); + } + + else + { + unsigned int r = reg_or_subregno (src); + + if (INT_REGNO_P (r)) + shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0)); + + else + { + /* Generate the special mfvsrd instruction to get it in a GPR. */ + gcc_assert (VSX_REGNO_P (r)); + if (s_mode == KFmode) + emit_insn (gen_signbitkf2_dm2 (dest_di, src)); + else + emit_insn (gen_signbittf2_dm2 (dest_di, src)); + } + } + + emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63))); + return; +} + +/* A subroutine of the atomic operation splitters. Jump to LABEL if + COND is true. Mark the jump as unlikely to be taken. */ + +static void +emit_unlikely_jump (rtx cond, rtx label) +{ + int very_unlikely = REG_BR_PROB_BASE / 100 - 1; + rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); + rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + add_int_reg_note (insn, REG_BR_PROB, very_unlikely); +} + +/* A subroutine of the atomic operation splitters. Emit a load-locked + instruction in MODE. For QI/HImode, possibly use a pattern than includes + the zero_extend operation. */ + +static void +emit_load_locked (machine_mode mode, rtx reg, rtx mem) +{ + rtx (*fn) (rtx, rtx) = NULL; + + switch (mode) + { + case QImode: + fn = gen_load_lockedqi; + break; + case HImode: + fn = gen_load_lockedhi; + break; + case SImode: + if (GET_MODE (mem) == QImode) + fn = gen_load_lockedqi_si; + else if (GET_MODE (mem) == HImode) + fn = gen_load_lockedhi_si; + else + fn = gen_load_lockedsi; + break; + case DImode: + fn = gen_load_lockeddi; + break; + case TImode: + fn = gen_load_lockedti; + break; + default: + gcc_unreachable (); + } + emit_insn (fn (reg, mem)); +} + +/* A subroutine of the atomic operation splitters. Emit a store-conditional + instruction in MODE. */ + +static void +emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val) +{ + rtx (*fn) (rtx, rtx, rtx) = NULL; + + switch (mode) + { + case QImode: + fn = gen_store_conditionalqi; + break; + case HImode: + fn = gen_store_conditionalhi; + break; + case SImode: + fn = gen_store_conditionalsi; + break; + case DImode: + fn = gen_store_conditionaldi; + break; + case TImode: + fn = gen_store_conditionalti; + break; + default: + gcc_unreachable (); + } + + /* Emit sync before stwcx. to address PPC405 Erratum. */ + if (PPC405_ERRATUM77) + emit_insn (gen_hwsync ()); + + emit_insn (fn (res, mem, val)); +} + +/* Expand barriers before and after a load_locked/store_cond sequence. */ + +static rtx +rs6000_pre_atomic_barrier (rtx mem, enum memmodel model) +{ + rtx addr = XEXP (mem, 0); + int strict_p = (reload_in_progress || reload_completed); + + if (!legitimate_indirect_address_p (addr, strict_p) + && !legitimate_indexed_address_p (addr, strict_p)) + { + addr = force_reg (Pmode, addr); + mem = replace_equiv_address_nv (mem, addr); + } + + switch (model) + { + case MEMMODEL_RELAXED: + case MEMMODEL_CONSUME: + case MEMMODEL_ACQUIRE: + break; + case MEMMODEL_RELEASE: + case MEMMODEL_ACQ_REL: + emit_insn (gen_lwsync ()); + break; + case MEMMODEL_SEQ_CST: + emit_insn (gen_hwsync ()); + break; + default: + gcc_unreachable (); + } + return mem; +} + +static void +rs6000_post_atomic_barrier (enum memmodel model) +{ + switch (model) + { + case MEMMODEL_RELAXED: + case MEMMODEL_CONSUME: + case MEMMODEL_RELEASE: + break; + case MEMMODEL_ACQUIRE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + emit_insn (gen_isync ()); + break; + default: + gcc_unreachable (); + } +} + +/* A subroutine of the various atomic expanders. For sub-word operations, + we must adjust things to operate on SImode. Given the original MEM, + return a new aligned memory. Also build and return the quantities by + which to shift and mask. */ + +static rtx +rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) +{ + rtx addr, align, shift, mask, mem; + HOST_WIDE_INT shift_mask; + machine_mode mode = GET_MODE (orig_mem); + + /* For smaller modes, we have to implement this via SImode. */ + shift_mask = (mode == QImode ? 0x18 : 0x10); + + addr = XEXP (orig_mem, 0); + addr = force_reg (GET_MODE (addr), addr); + + /* Aligned memory containing subword. Generate a new memory. We + do not want any of the existing MEM_ATTR data, as we're now + accessing memory outside the original object. */ + align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + mem = gen_rtx_MEM (SImode, align); + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); + if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) + set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); + + /* Shift amount for subword relative to aligned word. */ + shift = gen_reg_rtx (SImode); + addr = gen_lowpart (SImode, addr); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3))); + emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask))); + if (BYTES_BIG_ENDIAN) + shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), + shift, 1, OPTAB_LIB_WIDEN); + *pshift = shift; + + /* Mask for insertion. */ + mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)), + shift, NULL_RTX, 1, OPTAB_LIB_WIDEN); + *pmask = mask; + + return mem; +} + +/* A subroutine of the various atomic expanders. For sub-word operands, + combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */ + +static rtx +rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask) +{ + rtx x; + + x = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode, + gen_rtx_NOT (SImode, mask), + oldval))); + + x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN); + + return x; +} + +/* A subroutine of the various atomic expanders. For sub-word operands, + extract WIDE to NARROW via SHIFT. */ + +static void +rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift) +{ + wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift, + wide, 1, OPTAB_LIB_WIDEN); + emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide)); +} + +/* Expand an atomic compare and swap operation. */ + +void +rs6000_expand_atomic_compare_and_swap (rtx operands[]) +{ + rtx boolval, retval, mem, oldval, newval, cond; + rtx label1, label2, x, mask, shift; + machine_mode mode, orig_mode; + enum memmodel mod_s, mod_f; + bool is_weak; + + boolval = operands[0]; + retval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = (INTVAL (operands[5]) != 0); + mod_s = memmodel_base (INTVAL (operands[6])); + mod_f = memmodel_base (INTVAL (operands[7])); + orig_mode = mode = GET_MODE (mem); + + mask = shift = NULL_RTX; + if (mode == QImode || mode == HImode) + { + /* Before power8, we didn't have access to lbarx/lharx, so generate a + lwarx and shift/mask operations. With power8, we need to do the + comparison in SImode, but the store is still done in QI/HImode. */ + oldval = convert_modes (SImode, mode, oldval, 1); + + if (!TARGET_SYNC_HI_QI) + { + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); + + /* Shift and mask OLDVAL into position with the word. */ + oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + + /* Shift and mask NEWVAL into position within the word. */ + newval = convert_modes (SImode, mode, newval, 1); + newval = expand_simple_binop (SImode, ASHIFT, newval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + } + + /* Prepare to adjust the return value. */ + retval = gen_reg_rtx (SImode); + mode = SImode; + } + else if (reg_overlap_mentioned_p (retval, oldval)) + oldval = copy_to_reg (oldval); + + if (mode != TImode && !reg_or_short_operand (oldval, mode)) + oldval = copy_to_mode_reg (mode, oldval); + + if (reg_overlap_mentioned_p (retval, newval)) + newval = copy_to_reg (newval); + + mem = rs6000_pre_atomic_barrier (mem, mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + emit_label (XEXP (label1, 0)); + } + label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + + emit_load_locked (mode, retval, mem); + + x = retval; + if (mask) + x = expand_simple_binop (SImode, AND, retval, mask, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + + cond = gen_reg_rtx (CCmode); + /* If we have TImode, synthesize a comparison. */ + if (mode != TImode) + x = gen_rtx_COMPARE (CCmode, x, oldval); + else + { + rtx xor1_result = gen_reg_rtx (DImode); + rtx xor2_result = gen_reg_rtx (DImode); + rtx or_result = gen_reg_rtx (DImode); + rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); + rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); + rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); + rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); + + emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); + emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); + emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); + x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); + } + + emit_insn (gen_rtx_SET (cond, x)); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (x, label2); + + x = newval; + if (mask) + x = rs6000_mask_atomic_subword (retval, newval, mask); + + emit_store_conditional (orig_mode, cond, mem, x); + + if (!is_weak) + { + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (x, label1); + } + + if (!is_mm_relaxed (mod_f)) + emit_label (XEXP (label2, 0)); + + rs6000_post_atomic_barrier (mod_s); + + if (is_mm_relaxed (mod_f)) + emit_label (XEXP (label2, 0)); + + if (shift) + rs6000_finish_atomic_subword (operands[1], retval, shift); + else if (mode != GET_MODE (operands[1])) + convert_move (operands[1], retval, 1); + + /* In all cases, CR0 contains EQ on success, and NE on failure. */ + x = gen_rtx_EQ (SImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (boolval, x)); +} + +/* Expand an atomic exchange operation. */ + +void +rs6000_expand_atomic_exchange (rtx operands[]) +{ + rtx retval, mem, val, cond; + machine_mode mode; + enum memmodel model; + rtx label, x, mask, shift; + + retval = operands[0]; + mem = operands[1]; + val = operands[2]; + model = memmodel_base (INTVAL (operands[3])); + mode = GET_MODE (mem); + + mask = shift = NULL_RTX; + if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) + { + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); + + /* Shift and mask VAL into position with the word. */ + val = convert_modes (SImode, mode, val, 1); + val = expand_simple_binop (SImode, ASHIFT, val, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + + /* Prepare to adjust the return value. */ + retval = gen_reg_rtx (SImode); + mode = SImode; + } + + mem = rs6000_pre_atomic_barrier (mem, model); + + label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_load_locked (mode, retval, mem); + + x = val; + if (mask) + x = rs6000_mask_atomic_subword (retval, val, mask); + + cond = gen_reg_rtx (CCmode); + emit_store_conditional (mode, cond, mem, x); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (x, label); + + rs6000_post_atomic_barrier (model); + + if (shift) + rs6000_finish_atomic_subword (operands[0], retval, shift); +} + +/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation + to perform. MEM is the memory on which to operate. VAL is the second + operand of the binary operator. BEFORE and AFTER are optional locations to + return the value of MEM either before of after the operation. MODEL_RTX + is a CONST_INT containing the memory model to use. */ + +void +rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, + rtx orig_before, rtx orig_after, rtx model_rtx) +{ + enum memmodel model = memmodel_base (INTVAL (model_rtx)); + machine_mode mode = GET_MODE (mem); + machine_mode store_mode = mode; + rtx label, x, cond, mask, shift; + rtx before = orig_before, after = orig_after; + + mask = shift = NULL_RTX; + /* On power8, we want to use SImode for the operation. On previous systems, + use the operation in a subword and shift/mask to get the proper byte or + halfword. */ + if (mode == QImode || mode == HImode) + { + if (TARGET_SYNC_HI_QI) + { + val = convert_modes (SImode, mode, val, 1); + + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + mode = SImode; + } + else + { + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); + + /* Shift and mask VAL into position with the word. */ + val = convert_modes (SImode, mode, val, 1); + val = expand_simple_binop (SImode, ASHIFT, val, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + + switch (code) + { + case IOR: + case XOR: + /* We've already zero-extended VAL. That is sufficient to + make certain that it does not affect other bits. */ + mask = NULL; + break; + + case AND: + /* If we make certain that all of the other bits in VAL are + set, that will be sufficient to not affect other bits. */ + x = gen_rtx_NOT (SImode, mask); + x = gen_rtx_IOR (SImode, x, val); + emit_insn (gen_rtx_SET (val, x)); + mask = NULL; + break; + + case NOT: + case PLUS: + case MINUS: + /* These will all affect bits outside the field and need + adjustment via MASK within the loop. */ + break; + + default: + gcc_unreachable (); + } + + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + store_mode = mode = SImode; + } + } + + mem = rs6000_pre_atomic_barrier (mem, model); + + label = gen_label_rtx (); + emit_label (label); + label = gen_rtx_LABEL_REF (VOIDmode, label); + + if (before == NULL_RTX) + before = gen_reg_rtx (mode); + + emit_load_locked (mode, before, mem); + + if (code == NOT) + { + x = expand_simple_binop (mode, AND, before, val, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + after = expand_simple_unop (mode, NOT, x, after, 1); + } + else + { + after = expand_simple_binop (mode, code, before, val, + after, 1, OPTAB_LIB_WIDEN); + } + + x = after; + if (mask) + { + x = expand_simple_binop (SImode, AND, after, mask, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + x = rs6000_mask_atomic_subword (before, x, mask); + } + else if (store_mode != mode) + x = convert_modes (store_mode, mode, x, 1); + + cond = gen_reg_rtx (CCmode); + emit_store_conditional (store_mode, cond, mem, x); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (x, label); + + rs6000_post_atomic_barrier (model); + + if (shift) + { + /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and + then do the calcuations in a SImode register. */ + if (orig_before) + rs6000_finish_atomic_subword (orig_before, before, shift); + if (orig_after) + rs6000_finish_atomic_subword (orig_after, after, shift); + } + else if (store_mode != mode) + { + /* QImode/HImode on machines with lbarx/lharx where we do the native + operation and then do the calcuations in a SImode register. */ + if (orig_before) + convert_move (orig_before, before, 1); + if (orig_after) + convert_move (orig_after, after, 1); + } + else if (orig_after && after != orig_after) + emit_move_insn (orig_after, after); +} + +/* Emit instructions to move SRC to DST. Called by splitters for + multi-register moves. It will emit at most one instruction for + each register that is accessed; that is, it won't emit li/lis pairs + (or equivalent for 64-bit code). One of SRC or DST must be a hard + register. */ + +void +rs6000_split_multireg_move (rtx dst, rtx src) +{ + /* The register number of the first register being moved. */ + int reg; + /* The mode that is to be moved. */ + machine_mode mode; + /* The mode that the move is being done in, and its size. */ + machine_mode reg_mode; + int reg_mode_size; + /* The number of registers that will be moved. */ + int nregs; + + reg = REG_P (dst) ? REGNO (dst) : REGNO (src); + mode = GET_MODE (dst); + nregs = hard_regno_nregs[reg][mode]; + if (FP_REGNO_P (reg)) + reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : + ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode); + else if (ALTIVEC_REGNO_P (reg)) + reg_mode = V16QImode; + else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode)) + reg_mode = DFmode; + else + reg_mode = word_mode; + reg_mode_size = GET_MODE_SIZE (reg_mode); + + gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); + + /* TDmode residing in FP registers is special, since the ISA requires that + the lower-numbered word of a register pair is always the most significant + word, even in little-endian mode. This does not match the usual subreg + semantics, so we cannnot use simplify_gen_subreg in those cases. Access + the appropriate constituent registers "by hand" in little-endian mode. + + Note we do not need to check for destructive overlap here since TDmode + can only reside in even/odd register pairs. */ + if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) + { + rtx p_src, p_dst; + int i; + + for (i = 0; i < nregs; i++) + { + if (REG_P (src) && FP_REGNO_P (REGNO (src))) + p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); + else + p_src = simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size); + + if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) + p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); + else + p_dst = simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size); + + emit_insn (gen_rtx_SET (p_dst, p_src)); + } + + return; + } + + if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) + { + /* Move register range backwards, if we might have destructive + overlap. */ + int i; + for (i = nregs - 1; i >= 0; i--) + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size))); + } + else + { + int i; + int j = -1; + bool used_update = false; + rtx restore_basereg = NULL_RTX; + + if (MEM_P (src) && INT_REGNO_P (reg)) + { + rtx breg; + + if (GET_CODE (XEXP (src, 0)) == PRE_INC + || GET_CODE (XEXP (src, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (src, 0), 0); + delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + src = replace_equiv_address (src, breg); + } + else if (! rs6000_offsettable_memref_p (src, reg_mode)) + { + if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (src, 0), 0); + if (TARGET_UPDATE) + { + rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); + emit_insn (gen_rtx_SET (ndst, + gen_rtx_MEM (reg_mode, + XEXP (src, 0)))); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (src, 0), 1))); + src = replace_equiv_address (src, basereg); + } + else + { + rtx basereg = gen_rtx_REG (Pmode, reg); + emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); + src = replace_equiv_address (src, basereg); + } + } + + breg = XEXP (src, 0); + if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) + breg = XEXP (breg, 0); + + /* If the base register we are using to address memory is + also a destination reg, then change that register last. */ + if (REG_P (breg) + && REGNO (breg) >= REGNO (dst) + && REGNO (breg) < REGNO (dst) + nregs) + j = REGNO (breg) - REGNO (dst); + } + else if (MEM_P (dst) && INT_REGNO_P (reg)) + { + rtx breg; + + if (GET_CODE (XEXP (dst, 0)) == PRE_INC + || GET_CODE (XEXP (dst, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (dst, 0), 0); + delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); + + /* We have to update the breg before doing the store. + Use store with update, if available. */ + + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (TARGET_32BIT + ? (TARGET_POWERPC64 + ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) + : gen_movsi_update (breg, breg, delta_rtx, nsrc)) + : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); + used_update = true; + } + else + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + dst = replace_equiv_address (dst, breg); + } + else if (!rs6000_offsettable_memref_p (dst, reg_mode) + && GET_CODE (XEXP (dst, 0)) != LO_SUM) + { + if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, + XEXP (dst, 0)), + nsrc)); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (dst, 0), 1))); + dst = replace_equiv_address (dst, basereg); + } + else + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + rtx offsetreg = XEXP (XEXP (dst, 0), 1); + gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS + && REG_P (basereg) + && REG_P (offsetreg) + && REGNO (basereg) != REGNO (offsetreg)); + if (REGNO (basereg) == 0) + { + rtx tmp = offsetreg; + offsetreg = basereg; + basereg = tmp; + } + emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); + restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); + dst = replace_equiv_address (dst, basereg); + } + } + else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) + gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode)); + } + + for (i = 0; i < nregs; i++) + { + /* Calculate index to next subword. */ + ++j; + if (j == nregs) + j = 0; + + /* If compiler already emitted move of first word by + store with update, no need to do anything. */ + if (j == 0 && used_update) + continue; + + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + j * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + j * reg_mode_size))); + } + if (restore_basereg != NULL_RTX) + emit_insn (restore_basereg); + } +} + + +/* This page contains routines that are used to determine what the + function prologue and epilogue code will do and write them out. */ + +static inline bool +save_reg_p (int r) +{ + return !call_used_regs[r] && df_regs_ever_live_p (r); +} + +/* Determine whether the gp REG is really used. */ + +static bool +rs6000_reg_live_or_pic_offset_p (int reg) +{ + /* We need to mark the PIC offset register live for the same conditions + as it is set up, or otherwise it won't be saved before we clobber it. */ + + if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE) + { + if (TARGET_TOC && TARGET_MINIMAL_TOC + && (crtl->calls_eh_return + || df_regs_ever_live_p (reg) + || !constant_pool_empty_p ())) + return true; + + if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) + && flag_pic) + return true; + } + + /* If the function calls eh_return, claim used all the registers that would + be checked for liveness otherwise. */ + + return ((crtl->calls_eh_return || df_regs_ever_live_p (reg)) + && !call_used_regs[reg]); +} + +/* Return the first fixed-point register that is required to be + saved. 32 if none. */ + +int +first_reg_to_save (void) +{ + int first_reg; + + /* Find lowest numbered live register. */ + for (first_reg = 13; first_reg <= 31; first_reg++) + if (save_reg_p (first_reg)) + break; + + if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM + && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0) + || (DEFAULT_ABI == ABI_DARWIN && flag_pic) + || (TARGET_TOC && TARGET_MINIMAL_TOC)) + && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) + first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM; + +#if TARGET_MACHO + if (flag_pic + && crtl->uses_pic_offset_table + && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM) + return RS6000_PIC_OFFSET_TABLE_REGNUM; +#endif + + return first_reg; +} + +/* Similar, for FP regs. */ + +int +first_fp_reg_to_save (void) +{ + int first_reg; + + /* Find lowest numbered live register. */ + for (first_reg = 14 + 32; first_reg <= 63; first_reg++) + if (save_reg_p (first_reg)) + break; + + return first_reg; +} + +/* Similar, for AltiVec regs. */ + +static int +first_altivec_reg_to_save (void) +{ + int i; + + /* Stack frame remains as is unless we are in AltiVec ABI. */ + if (! TARGET_ALTIVEC_ABI) + return LAST_ALTIVEC_REGNO + 1; + + /* On Darwin, the unwind routines are compiled without + TARGET_ALTIVEC, and use save_world to save/restore the + altivec registers when necessary. */ + if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return + && ! TARGET_ALTIVEC) + return FIRST_ALTIVEC_REGNO + 20; + + /* Find lowest numbered live register. */ + for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i) + if (save_reg_p (i)) + break; + + return i; +} + +/* Return a 32-bit mask of the AltiVec registers we need to set in + VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in + the 32-bit word is 0. */ + +static unsigned int +compute_vrsave_mask (void) +{ + unsigned int i, mask = 0; + + /* On Darwin, the unwind routines are compiled without + TARGET_ALTIVEC, and use save_world to save/restore the + call-saved altivec registers when necessary. */ + if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return + && ! TARGET_ALTIVEC) + mask |= 0xFFF; + + /* First, find out if we use _any_ altivec registers. */ + for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) + if (df_regs_ever_live_p (i)) + mask |= ALTIVEC_REG_BIT (i); + + if (mask == 0) + return mask; + + /* Next, remove the argument registers from the set. These must + be in the VRSAVE mask set by the caller, so we don't need to add + them in again. More importantly, the mask we compute here is + used to generate CLOBBERs in the set_vrsave insn, and we do not + wish the argument registers to die. */ + for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++) + mask &= ~ALTIVEC_REG_BIT (i); + + /* Similarly, remove the return value from the set. */ + { + bool yes = false; + diddle_return_value (is_altivec_return_reg, &yes); + if (yes) + mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN); + } + + return mask; +} + +/* For a very restricted set of circumstances, we can cut down the + size of prologues/epilogues by calling our own save/restore-the-world + routines. */ + +static void +compute_save_world_info (rs6000_stack_t *info) +{ + info->world_save_p = 1; + info->world_save_p + = (WORLD_SAVE_P (info) + && DEFAULT_ABI == ABI_DARWIN + && !cfun->has_nonlocal_label + && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO + && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO + && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO + && info->cr_save_p); + + /* This will not work in conjunction with sibcalls. Make sure there + are none. (This check is expensive, but seldom executed.) */ + if (WORLD_SAVE_P (info)) + { + rtx_insn *insn; + for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn)) + if (CALL_P (insn) && SIBLING_CALL_P (insn)) + { + info->world_save_p = 0; + break; + } + } + + if (WORLD_SAVE_P (info)) + { + /* Even if we're not touching VRsave, make sure there's room on the + stack for it, if it looks like we're calling SAVE_WORLD, which + will attempt to save it. */ + info->vrsave_size = 4; + + /* If we are going to save the world, we need to save the link register too. */ + info->lr_save_p = 1; + + /* "Save" the VRsave register too if we're saving the world. */ + if (info->vrsave_mask == 0) + info->vrsave_mask = compute_vrsave_mask (); + + /* Because the Darwin register save/restore routines only handle + F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency + check. */ + gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO + && (info->first_altivec_reg_save + >= FIRST_SAVED_ALTIVEC_REGNO)); + } + + return; +} + + +static void +is_altivec_return_reg (rtx reg, void *xyes) +{ + bool *yes = (bool *) xyes; + if (REGNO (reg) == ALTIVEC_ARG_RETURN) + *yes = true; +} + + +/* Return whether REG is a global user reg or has been specifed by + -ffixed-REG. We should not restore these, and so cannot use + lmw or out-of-line restore functions if there are any. We also + can't save them (well, emit frame notes for them), because frame + unwinding during exception handling will restore saved registers. */ + +static bool +fixed_reg_p (int reg) +{ + /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the + backend sets it, overriding anything the user might have given. */ + if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM + && ((DEFAULT_ABI == ABI_V4 && flag_pic) + || (DEFAULT_ABI == ABI_DARWIN && flag_pic) + || (TARGET_TOC && TARGET_MINIMAL_TOC))) + return false; + + return fixed_regs[reg]; +} + +/* Determine the strategy for savings/restoring registers. */ + +enum { + SAVE_MULTIPLE = 0x1, + SAVE_INLINE_GPRS = 0x2, + SAVE_INLINE_FPRS = 0x4, + SAVE_NOINLINE_GPRS_SAVES_LR = 0x8, + SAVE_NOINLINE_FPRS_SAVES_LR = 0x10, + SAVE_INLINE_VRS = 0x20, + REST_MULTIPLE = 0x100, + REST_INLINE_GPRS = 0x200, + REST_INLINE_FPRS = 0x400, + REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800, + REST_INLINE_VRS = 0x1000 +}; + +static int +rs6000_savres_strategy (rs6000_stack_t *info, + bool using_static_chain_p) +{ + int strategy = 0; + + /* Select between in-line and out-of-line save and restore of regs. + First, all the obvious cases where we don't use out-of-line. */ + if (crtl->calls_eh_return + || cfun->machine->ra_need_lr) + strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS + | SAVE_INLINE_GPRS | REST_INLINE_GPRS + | SAVE_INLINE_VRS | REST_INLINE_VRS); + + if (info->first_gp_reg_save == 32) + strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + + if (info->first_fp_reg_save == 64 + /* The out-of-line FP routines use double-precision stores; + we can't use those routines if we don't have such stores. */ + || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)) + strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; + + if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1) + strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; + + /* Define cutoff for using out-of-line functions to save registers. */ + if (DEFAULT_ABI == ABI_V4 || TARGET_ELF) + { + if (!optimize_size) + { + strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; + strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; + } + else + { + /* Prefer out-of-line restore if it will exit. */ + if (info->first_fp_reg_save > 61) + strategy |= SAVE_INLINE_FPRS; + if (info->first_gp_reg_save > 29) + { + if (info->first_fp_reg_save == 64) + strategy |= SAVE_INLINE_GPRS; + else + strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + } + if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO) + strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; + } + } + else if (DEFAULT_ABI == ABI_DARWIN) + { + if (info->first_fp_reg_save > 60) + strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; + if (info->first_gp_reg_save > 29) + strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; + } + else + { + gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); + if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun)) + || info->first_fp_reg_save > 61) + strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; + strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; + } + + /* Don't bother to try to save things out-of-line if r11 is occupied + by the static chain. It would require too much fiddling and the + static chain is rarely used anyway. FPRs are saved w.r.t the stack + pointer on Darwin, and AIX uses r1 or r12. */ + if (using_static_chain_p + && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)) + strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS) + | SAVE_INLINE_GPRS + | SAVE_INLINE_VRS); + + /* Saving CR interferes with the exit routines used on the SPE, so + just punt here. */ + if (TARGET_SPE_ABI + && info->spe_64bit_regs_used + && info->cr_save_p) + strategy |= REST_INLINE_GPRS; + + /* We can only use the out-of-line routines to restore fprs if we've + saved all the registers from first_fp_reg_save in the prologue. + Otherwise, we risk loading garbage. Of course, if we have saved + out-of-line then we know we haven't skipped any fprs. */ + if ((strategy & SAVE_INLINE_FPRS) + && !(strategy & REST_INLINE_FPRS)) + { + int i; + + for (i = info->first_fp_reg_save; i < 64; i++) + if (fixed_regs[i] || !save_reg_p (i)) + { + strategy |= REST_INLINE_FPRS; + break; + } + } + + /* Similarly, for altivec regs. */ + if ((strategy & SAVE_INLINE_VRS) + && !(strategy & REST_INLINE_VRS)) + { + int i; + + for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++) + if (fixed_regs[i] || !save_reg_p (i)) + { + strategy |= REST_INLINE_VRS; + break; + } + } + + /* info->lr_save_p isn't yet set if the only reason lr needs to be + saved is an out-of-line save or restore. Set up the value for + the next test (excluding out-of-line gprs). */ + bool lr_save_p = (info->lr_save_p + || !(strategy & SAVE_INLINE_FPRS) + || !(strategy & SAVE_INLINE_VRS) + || !(strategy & REST_INLINE_FPRS) + || !(strategy & REST_INLINE_VRS)); + + if (TARGET_MULTIPLE + && !TARGET_POWERPC64 + && !(TARGET_SPE_ABI && info->spe_64bit_regs_used) + && info->first_gp_reg_save < 31 + && !(flag_shrink_wrap + && flag_shrink_wrap_separate + && optimize_function_for_speed_p (cfun))) + { + /* Prefer store multiple for saves over out-of-line routines, + since the store-multiple instruction will always be smaller. */ + strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE; + + /* The situation is more complicated with load multiple. We'd + prefer to use the out-of-line routines for restores, since the + "exit" out-of-line routines can handle the restore of LR and the + frame teardown. However if doesn't make sense to use the + out-of-line routine if that is the only reason we'd need to save + LR, and we can't use the "exit" out-of-line gpr restore if we + have saved some fprs; In those cases it is advantageous to use + load multiple when available. */ + if (info->first_fp_reg_save != 64 || !lr_save_p) + strategy |= REST_INLINE_GPRS | REST_MULTIPLE; + } + + /* Using the "exit" out-of-line routine does not improve code size + if using it would require lr to be saved and if only saving one + or two gprs. */ + else if (!lr_save_p && info->first_gp_reg_save > 29) + strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + + /* We can only use load multiple or the out-of-line routines to + restore gprs if we've saved all the registers from + first_gp_reg_save. Otherwise, we risk loading garbage. + Of course, if we have saved out-of-line or used stmw then we know + we haven't skipped any gprs. */ + if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS + && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS) + { + int i; + + for (i = info->first_gp_reg_save; i < 32; i++) + if (fixed_reg_p (i) || !save_reg_p (i)) + { + strategy |= REST_INLINE_GPRS; + strategy &= ~REST_MULTIPLE; + break; + } + } + + if (TARGET_ELF && TARGET_64BIT) + { + if (!(strategy & SAVE_INLINE_FPRS)) + strategy |= SAVE_NOINLINE_FPRS_SAVES_LR; + else if (!(strategy & SAVE_INLINE_GPRS) + && info->first_fp_reg_save == 64) + strategy |= SAVE_NOINLINE_GPRS_SAVES_LR; + } + else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS)) + strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR; + + if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS)) + strategy |= SAVE_NOINLINE_FPRS_SAVES_LR; + + return strategy; +} + +/* Calculate the stack information for the current function. This is + complicated by having two separate calling sequences, the AIX calling + sequence and the V.4 calling sequence. + + AIX (and Darwin/Mac OS X) stack frames look like: + 32-bit 64-bit + SP----> +---------------------------------------+ + | back chain to caller | 0 0 + +---------------------------------------+ + | saved CR | 4 8 (8-11) + +---------------------------------------+ + | saved LR | 8 16 + +---------------------------------------+ + | reserved for compilers | 12 24 + +---------------------------------------+ + | reserved for binders | 16 32 + +---------------------------------------+ + | saved TOC pointer | 20 40 + +---------------------------------------+ + | Parameter save area (+padding*) (P) | 24 48 + +---------------------------------------+ + | Alloca space (A) | 24+P etc. + +---------------------------------------+ + | Local variable space (L) | 24+P+A + +---------------------------------------+ + | Float/int conversion temporary (X) | 24+P+A+L + +---------------------------------------+ + | Save area for AltiVec registers (W) | 24+P+A+L+X + +---------------------------------------+ + | AltiVec alignment padding (Y) | 24+P+A+L+X+W + +---------------------------------------+ + | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y + +---------------------------------------+ + | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z + +---------------------------------------+ + | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G + +---------------------------------------+ + old SP->| back chain to caller's caller | + +---------------------------------------+ + + * If the alloca area is present, the parameter save area is + padded so that the former starts 16-byte aligned. + + The required alignment for AIX configurations is two words (i.e., 8 + or 16 bytes). + + The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like: + + SP----> +---------------------------------------+ + | Back chain to caller | 0 + +---------------------------------------+ + | Save area for CR | 8 + +---------------------------------------+ + | Saved LR | 16 + +---------------------------------------+ + | Saved TOC pointer | 24 + +---------------------------------------+ + | Parameter save area (+padding*) (P) | 32 + +---------------------------------------+ + | Alloca space (A) | 32+P + +---------------------------------------+ + | Local variable space (L) | 32+P+A + +---------------------------------------+ + | Save area for AltiVec registers (W) | 32+P+A+L + +---------------------------------------+ + | AltiVec alignment padding (Y) | 32+P+A+L+W + +---------------------------------------+ + | Save area for GP registers (G) | 32+P+A+L+W+Y + +---------------------------------------+ + | Save area for FP registers (F) | 32+P+A+L+W+Y+G + +---------------------------------------+ + old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F + +---------------------------------------+ + + * If the alloca area is present, the parameter save area is + padded so that the former starts 16-byte aligned. + + V.4 stack frames look like: + + SP----> +---------------------------------------+ + | back chain to caller | 0 + +---------------------------------------+ + | caller's saved LR | 4 + +---------------------------------------+ + | Parameter save area (+padding*) (P) | 8 + +---------------------------------------+ + | Alloca space (A) | 8+P + +---------------------------------------+ + | Varargs save area (V) | 8+P+A + +---------------------------------------+ + | Local variable space (L) | 8+P+A+V + +---------------------------------------+ + | Float/int conversion temporary (X) | 8+P+A+V+L + +---------------------------------------+ + | Save area for AltiVec registers (W) | 8+P+A+V+L+X + +---------------------------------------+ + | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W + +---------------------------------------+ + | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y + +---------------------------------------+ + | SPE: area for 64-bit GP registers | + +---------------------------------------+ + | SPE alignment padding | + +---------------------------------------+ + | saved CR (C) | 8+P+A+V+L+X+W+Y+Z + +---------------------------------------+ + | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C + +---------------------------------------+ + | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G + +---------------------------------------+ + old SP->| back chain to caller's caller | + +---------------------------------------+ + + * If the alloca area is present and the required alignment is + 16 bytes, the parameter save area is padded so that the + alloca area starts 16-byte aligned. + + The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is + given. (But note below and in sysv4.h that we require only 8 and + may round up the size of our stack frame anyways. The historical + reason is early versions of powerpc-linux which didn't properly + align the stack at program startup. A happy side-effect is that + -mno-eabi libraries can be used with -meabi programs.) + + The EABI configuration defaults to the V.4 layout. However, + the stack alignment requirements may differ. If -mno-eabi is not + given, the required stack alignment is 8 bytes; if -mno-eabi is + given, the required alignment is 16 bytes. (But see V.4 comment + above.) */ + +#ifndef ABI_STACK_BOUNDARY +#define ABI_STACK_BOUNDARY STACK_BOUNDARY +#endif + +static rs6000_stack_t * +rs6000_stack_info (void) +{ + /* We should never be called for thunks, we are not set up for that. */ + gcc_assert (!cfun->is_thunk); + + rs6000_stack_t *info = &stack_info; + int reg_size = TARGET_32BIT ? 4 : 8; + int ehrd_size; + int ehcr_size; + int save_align; + int first_gp; + HOST_WIDE_INT non_fixed_size; + bool using_static_chain_p; + + if (reload_completed && info->reload_completed) + return info; + + memset (info, 0, sizeof (*info)); + info->reload_completed = reload_completed; + + if (TARGET_SPE) + { + /* Cache value so we don't rescan instruction chain over and over. */ + if (cfun->machine->spe_insn_chain_scanned_p == 0) + cfun->machine->spe_insn_chain_scanned_p + = spe_func_has_64bit_regs_p () + 1; + info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1; + } + + /* Select which calling sequence. */ + info->abi = DEFAULT_ABI; + + /* Calculate which registers need to be saved & save area size. */ + info->first_gp_reg_save = first_reg_to_save (); + /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM, + even if it currently looks like we won't. Reload may need it to + get at a constant; if so, it will have already created a constant + pool entry for it. */ + if (((TARGET_TOC && TARGET_MINIMAL_TOC) + || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) + || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) + && crtl->uses_const_pool + && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM) + first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM; + else + first_gp = info->first_gp_reg_save; + + info->gp_size = reg_size * (32 - first_gp); + + /* For the SPE, we have an additional upper 32-bits on each GPR. + Ideally we should save the entire 64-bits only when the upper + half is used in SIMD instructions. Since we only record + registers live (not the size they are used in), this proves + difficult because we'd have to traverse the instruction chain at + the right time, taking reload into account. This is a real pain, + so we opt to save the GPRs in 64-bits always if but one register + gets used in 64-bits. Otherwise, all the registers in the frame + get saved in 32-bits. + + So... since when we save all GPRs (except the SP) in 64-bits, the + traditional GP save area will be empty. */ + if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) + info->gp_size = 0; + + info->first_fp_reg_save = first_fp_reg_to_save (); + info->fp_size = 8 * (64 - info->first_fp_reg_save); + + info->first_altivec_reg_save = first_altivec_reg_to_save (); + info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1 + - info->first_altivec_reg_save); + + /* Does this function call anything? */ + info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame); + + /* Determine if we need to save the condition code registers. */ + if (save_reg_p (CR2_REGNO) + || save_reg_p (CR3_REGNO) + || save_reg_p (CR4_REGNO)) + { + info->cr_save_p = 1; + if (DEFAULT_ABI == ABI_V4) + info->cr_size = reg_size; + } + + /* If the current function calls __builtin_eh_return, then we need + to allocate stack space for registers that will hold data for + the exception handler. */ + if (crtl->calls_eh_return) + { + unsigned int i; + for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) + continue; + + /* SPE saves EH registers in 64-bits. */ + ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0 + ? UNITS_PER_SPE_WORD : UNITS_PER_WORD); + } + else + ehrd_size = 0; + + /* In the ELFv2 ABI, we also need to allocate space for separate + CR field save areas if the function calls __builtin_eh_return. */ + if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) + { + /* This hard-codes that we have three call-saved CR fields. */ + ehcr_size = 3 * reg_size; + /* We do *not* use the regular CR save mechanism. */ + info->cr_save_p = 0; + } + else + ehcr_size = 0; + + /* Determine various sizes. */ + info->reg_size = reg_size; + info->fixed_size = RS6000_SAVE_AREA; + info->vars_size = RS6000_ALIGN (get_frame_size (), 8); + if (cfun->calls_alloca) + info->parm_size = + RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size, + STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size; + else + info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size, + TARGET_ALTIVEC ? 16 : 8); + if (FRAME_GROWS_DOWNWARD) + info->vars_size + += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size, + ABI_STACK_BOUNDARY / BITS_PER_UNIT) + - (info->fixed_size + info->vars_size + info->parm_size); + + if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) + info->spe_gp_size = 8 * (32 - first_gp); + + if (TARGET_ALTIVEC_ABI) + info->vrsave_mask = compute_vrsave_mask (); + + if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask) + info->vrsave_size = 4; + + compute_save_world_info (info); + + /* Calculate the offsets. */ + switch (DEFAULT_ABI) + { + case ABI_NONE: + default: + gcc_unreachable (); + + case ABI_AIX: + case ABI_ELFv2: + case ABI_DARWIN: + info->fp_save_offset = -info->fp_size; + info->gp_save_offset = info->fp_save_offset - info->gp_size; + + if (TARGET_ALTIVEC_ABI) + { + info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size; + + /* Align stack so vector save area is on a quadword boundary. + The padding goes above the vectors. */ + if (info->altivec_size != 0) + info->altivec_padding_size = info->vrsave_save_offset & 0xF; + + info->altivec_save_offset = info->vrsave_save_offset + - info->altivec_padding_size + - info->altivec_size; + gcc_assert (info->altivec_size == 0 + || info->altivec_save_offset % 16 == 0); + + /* Adjust for AltiVec case. */ + info->ehrd_offset = info->altivec_save_offset - ehrd_size; + } + else + info->ehrd_offset = info->gp_save_offset - ehrd_size; + + info->ehcr_offset = info->ehrd_offset - ehcr_size; + info->cr_save_offset = reg_size; /* first word when 64-bit. */ + info->lr_save_offset = 2*reg_size; + break; + + case ABI_V4: + info->fp_save_offset = -info->fp_size; + info->gp_save_offset = info->fp_save_offset - info->gp_size; + info->cr_save_offset = info->gp_save_offset - info->cr_size; + + if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) + { + /* Align stack so SPE GPR save area is aligned on a + double-word boundary. */ + if (info->spe_gp_size != 0 && info->cr_save_offset != 0) + info->spe_padding_size = 8 - (-info->cr_save_offset % 8); + else + info->spe_padding_size = 0; + + info->spe_gp_save_offset = info->cr_save_offset + - info->spe_padding_size + - info->spe_gp_size; + + /* Adjust for SPE case. */ + info->ehrd_offset = info->spe_gp_save_offset; + } + else if (TARGET_ALTIVEC_ABI) + { + info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size; + + /* Align stack so vector save area is on a quadword boundary. */ + if (info->altivec_size != 0) + info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16); + + info->altivec_save_offset = info->vrsave_save_offset + - info->altivec_padding_size + - info->altivec_size; + + /* Adjust for AltiVec case. */ + info->ehrd_offset = info->altivec_save_offset; + } + else + info->ehrd_offset = info->cr_save_offset; + + info->ehrd_offset -= ehrd_size; + info->lr_save_offset = reg_size; + } + + save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8; + info->save_size = RS6000_ALIGN (info->fp_size + + info->gp_size + + info->altivec_size + + info->altivec_padding_size + + info->spe_gp_size + + info->spe_padding_size + + ehrd_size + + ehcr_size + + info->cr_size + + info->vrsave_size, + save_align); + + non_fixed_size = info->vars_size + info->parm_size + info->save_size; + + info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size, + ABI_STACK_BOUNDARY / BITS_PER_UNIT); + + /* Determine if we need to save the link register. */ + if (info->calls_p + || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && crtl->profile + && !TARGET_PROFILE_KERNEL) + || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca) +#ifdef TARGET_RELOCATABLE + || (DEFAULT_ABI == ABI_V4 + && (TARGET_RELOCATABLE || flag_pic > 1) + && !constant_pool_empty_p ()) +#endif + || rs6000_ra_ever_killed ()) + info->lr_save_p = 1; + + using_static_chain_p = (cfun->static_chain_decl != NULL_TREE + && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) + && call_used_regs[STATIC_CHAIN_REGNUM]); + info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p); + + if (!(info->savres_strategy & SAVE_INLINE_GPRS) + || !(info->savres_strategy & SAVE_INLINE_FPRS) + || !(info->savres_strategy & SAVE_INLINE_VRS) + || !(info->savres_strategy & REST_INLINE_GPRS) + || !(info->savres_strategy & REST_INLINE_FPRS) + || !(info->savres_strategy & REST_INLINE_VRS)) + info->lr_save_p = 1; + + if (info->lr_save_p) + df_set_regs_ever_live (LR_REGNO, true); + + /* Determine if we need to allocate any stack frame: + + For AIX we need to push the stack if a frame pointer is needed + (because the stack might be dynamically adjusted), if we are + debugging, if we make calls, or if the sum of fp_save, gp_save, + and local variables are more than the space needed to save all + non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8 + + 18*8 = 288 (GPR13 reserved). + + For V.4 we don't have the stack cushion that AIX uses, but assume + that the debugger can handle stackless frames. */ + + if (info->calls_p) + info->push_p = 1; + + else if (DEFAULT_ABI == ABI_V4) + info->push_p = non_fixed_size != 0; + + else if (frame_pointer_needed) + info->push_p = 1; + + else if (TARGET_XCOFF && write_symbols != NO_DEBUG) + info->push_p = 1; + + else + info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288); + + return info; +} + +/* Return true if the current function uses any GPRs in 64-bit SIMD + mode. */ + +static bool +spe_func_has_64bit_regs_p (void) +{ + rtx_insn *insns, *insn; + + /* Functions that save and restore all the call-saved registers will + need to save/restore the registers in 64-bits. */ + if (crtl->calls_eh_return + || cfun->calls_setjmp + || crtl->has_nonlocal_goto) + return true; + + insns = get_insns (); + + for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + rtx i; + + /* FIXME: This should be implemented with attributes... + + (set_attr "spe64" "true")....then, + if (get_spe64(insn)) return true; + + It's the only reliable way to do the stuff below. */ + + i = PATTERN (insn); + if (GET_CODE (i) == SET) + { + machine_mode mode = GET_MODE (SET_SRC (i)); + + if (SPE_VECTOR_MODE (mode)) + return true; + if (TARGET_E500_DOUBLE + && (mode == DFmode || FLOAT128_2REG_P (mode))) + return true; + } + } + } + + return false; +} + +static void +debug_stack_info (rs6000_stack_t *info) +{ + const char *abi_string; + + if (! info) + info = rs6000_stack_info (); + + fprintf (stderr, "\nStack information for function %s:\n", + ((current_function_decl && DECL_NAME (current_function_decl)) + ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl)) + : "<unknown>")); + + switch (info->abi) + { + default: abi_string = "Unknown"; break; + case ABI_NONE: abi_string = "NONE"; break; + case ABI_AIX: abi_string = "AIX"; break; + case ABI_ELFv2: abi_string = "ELFv2"; break; + case ABI_DARWIN: abi_string = "Darwin"; break; + case ABI_V4: abi_string = "V.4"; break; + } + + fprintf (stderr, "\tABI = %5s\n", abi_string); + + if (TARGET_ALTIVEC_ABI) + fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n"); + + if (TARGET_SPE_ABI) + fprintf (stderr, "\tSPE ABI extensions enabled.\n"); + + if (info->first_gp_reg_save != 32) + fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save); + + if (info->first_fp_reg_save != 64) + fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save); + + if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO) + fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n", + info->first_altivec_reg_save); + + if (info->lr_save_p) + fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p); + + if (info->cr_save_p) + fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p); + + if (info->vrsave_mask) + fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask); + + if (info->push_p) + fprintf (stderr, "\tpush_p = %5d\n", info->push_p); + + if (info->calls_p) + fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p); + + if (info->gp_size) + fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset); + + if (info->fp_size) + fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset); + + if (info->altivec_size) + fprintf (stderr, "\taltivec_save_offset = %5d\n", + info->altivec_save_offset); + + if (info->spe_gp_size) + fprintf (stderr, "\tspe_gp_save_offset = %5d\n", + info->spe_gp_save_offset); + + if (info->vrsave_size) + fprintf (stderr, "\tvrsave_save_offset = %5d\n", + info->vrsave_save_offset); + + if (info->lr_save_p) + fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset); + + if (info->cr_save_p) + fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset); + + if (info->varargs_save_offset) + fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset); + + if (info->total_size) + fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n", + info->total_size); + + if (info->vars_size) + fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n", + info->vars_size); + + if (info->parm_size) + fprintf (stderr, "\tparm_size = %5d\n", info->parm_size); + + if (info->fixed_size) + fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size); + + if (info->gp_size) + fprintf (stderr, "\tgp_size = %5d\n", info->gp_size); + + if (info->spe_gp_size) + fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size); + + if (info->fp_size) + fprintf (stderr, "\tfp_size = %5d\n", info->fp_size); + + if (info->altivec_size) + fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size); + + if (info->vrsave_size) + fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size); + + if (info->altivec_padding_size) + fprintf (stderr, "\taltivec_padding_size= %5d\n", + info->altivec_padding_size); + + if (info->spe_padding_size) + fprintf (stderr, "\tspe_padding_size = %5d\n", + info->spe_padding_size); + + if (info->cr_size) + fprintf (stderr, "\tcr_size = %5d\n", info->cr_size); + + if (info->save_size) + fprintf (stderr, "\tsave_size = %5d\n", info->save_size); + + if (info->reg_size != 4) + fprintf (stderr, "\treg_size = %5d\n", info->reg_size); + + fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy); + + fprintf (stderr, "\n"); +} + +rtx +rs6000_return_addr (int count, rtx frame) +{ + /* Currently we don't optimize very well between prolog and body + code and for PIC code the code can be actually quite bad, so + don't try to be too clever here. */ + if (count != 0 + || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic)) + { + cfun->machine->ra_needs_full_frame = 1; + + return + gen_rtx_MEM + (Pmode, + memory_address + (Pmode, + plus_constant (Pmode, + copy_to_reg + (gen_rtx_MEM (Pmode, + memory_address (Pmode, frame))), + RETURN_ADDRESS_OFFSET))); + } + + cfun->machine->ra_need_lr = 1; + return get_hard_reg_initial_val (Pmode, LR_REGNO); +} + +/* Say whether a function is a candidate for sibcall handling or not. */ + +static bool +rs6000_function_ok_for_sibcall (tree decl, tree exp) +{ + tree fntype; + + if (decl) + fntype = TREE_TYPE (decl); + else + fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); + + /* We can't do it if the called function has more vector parameters + than the current function; there's nowhere to put the VRsave code. */ + if (TARGET_ALTIVEC_ABI + && TARGET_ALTIVEC_VRSAVE + && !(decl && decl == current_function_decl)) + { + function_args_iterator args_iter; + tree type; + int nvreg = 0; + + /* Functions with vector parameters are required to have a + prototype, so the argument type info must be available + here. */ + FOREACH_FUNCTION_ARGS(fntype, type, args_iter) + if (TREE_CODE (type) == VECTOR_TYPE + && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type))) + nvreg++; + + FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter) + if (TREE_CODE (type) == VECTOR_TYPE + && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type))) + nvreg--; + + if (nvreg > 0) + return false; + } + + /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local + functions, because the callee may have a different TOC pointer to + the caller and there's no way to ensure we restore the TOC when + we return. With the secure-plt SYSV ABI we can't make non-local + calls when -fpic/PIC because the plt call stubs use r30. */ + if (DEFAULT_ABI == ABI_DARWIN + || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && decl + && !DECL_EXTERNAL (decl) + && !DECL_WEAK (decl) + && (*targetm.binds_local_p) (decl)) + || (DEFAULT_ABI == ABI_V4 + && (!TARGET_SECURE_PLT + || !flag_pic + || (decl + && (*targetm.binds_local_p) (decl))))) + { + tree attr_list = TYPE_ATTRIBUTES (fntype); + + if (!lookup_attribute ("longcall", attr_list) + || lookup_attribute ("shortcall", attr_list)) + return true; + } + + return false; +} + +static int +rs6000_ra_ever_killed (void) +{ + rtx_insn *top; + rtx reg; + rtx_insn *insn; + + if (cfun->is_thunk) + return 0; + + if (cfun->machine->lr_save_state) + return cfun->machine->lr_save_state - 1; + + /* regs_ever_live has LR marked as used if any sibcalls are present, + but this should not force saving and restoring in the + pro/epilogue. Likewise, reg_set_between_p thinks a sibcall + clobbers LR, so that is inappropriate. */ + + /* Also, the prologue can generate a store into LR that + doesn't really count, like this: + + move LR->R0 + bcl to set PIC register + move LR->R31 + move R0->LR + + When we're called from the epilogue, we need to avoid counting + this as a store. */ + + push_topmost_sequence (); + top = get_insns (); + pop_topmost_sequence (); + reg = gen_rtx_REG (Pmode, LR_REGNO); + + for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + if (CALL_P (insn)) + { + if (!SIBLING_CALL_P (insn)) + return 1; + } + else if (find_regno_note (insn, REG_INC, LR_REGNO)) + return 1; + else if (set_of (reg, insn) != NULL_RTX + && !prologue_epilogue_contains (insn)) + return 1; + } + } + return 0; +} + +/* Emit instructions needed to load the TOC register. + This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is + a constant pool; or for SVR4 -fpic. */ + +void +rs6000_emit_load_toc_table (int fromprolog) +{ + rtx dest; + dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); + + if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic) + { + char buf[30]; + rtx lab, tmp1, tmp2, got; + + lab = gen_label_rtx (); + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab)); + lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); + if (flag_pic == 2) + { + got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); + need_toc_init = 1; + } + else + got = rs6000_got_sym (); + tmp1 = tmp2 = dest; + if (!fromprolog) + { + tmp1 = gen_reg_rtx (Pmode); + tmp2 = gen_reg_rtx (Pmode); + } + emit_insn (gen_load_toc_v4_PIC_1 (lab)); + emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); + emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab)); + emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab)); + } + else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1) + { + emit_insn (gen_load_toc_v4_pic_si ()); + emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); + } + else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2) + { + char buf[30]; + rtx temp0 = (fromprolog + ? gen_rtx_REG (Pmode, 0) + : gen_reg_rtx (Pmode)); + + if (fromprolog) + { + rtx symF, symL; + + ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); + symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); + + ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno); + symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); + + emit_insn (gen_load_toc_v4_PIC_1 (symF)); + emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); + emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF)); + } + else + { + rtx tocsym, lab; + + tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); + need_toc_init = 1; + lab = gen_label_rtx (); + emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab)); + emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); + if (TARGET_LINK_STACK) + emit_insn (gen_addsi3 (dest, dest, GEN_INT (4))); + emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest)); + } + emit_insn (gen_addsi3 (dest, temp0, dest)); + } + else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC) + { + /* This is for AIX code running in non-PIC ELF32. */ + rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); + + need_toc_init = 1; + emit_insn (gen_elf_high (dest, realsym)); + emit_insn (gen_elf_low (dest, dest, realsym)); + } + else + { + gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); + + if (TARGET_32BIT) + emit_insn (gen_load_toc_aix_si (dest)); + else + emit_insn (gen_load_toc_aix_di (dest)); + } +} + +/* Emit instructions to restore the link register after determining where + its value has been stored. */ + +void +rs6000_emit_eh_reg_restore (rtx source, rtx scratch) +{ + rs6000_stack_t *info = rs6000_stack_info (); + rtx operands[2]; + + operands[0] = source; + operands[1] = scratch; + + if (info->lr_save_p) + { + rtx frame_rtx = stack_pointer_rtx; + HOST_WIDE_INT sp_offset = 0; + rtx tmp; + + if (frame_pointer_needed + || cfun->calls_alloca + || info->total_size > 32767) + { + tmp = gen_frame_mem (Pmode, frame_rtx); + emit_move_insn (operands[1], tmp); + frame_rtx = operands[1]; + } + else if (info->push_p) + sp_offset = info->total_size; + + tmp = plus_constant (Pmode, frame_rtx, + info->lr_save_offset + sp_offset); + tmp = gen_frame_mem (Pmode, tmp); + emit_move_insn (tmp, operands[0]); + } + else + emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]); + + /* Freeze lr_save_p. We've just emitted rtl that depends on the + state of lr_save_p so any change from here on would be a bug. In + particular, stop rs6000_ra_ever_killed from considering the SET + of lr we may have added just above. */ + cfun->machine->lr_save_state = info->lr_save_p + 1; +} + +static GTY(()) alias_set_type set = -1; + +alias_set_type +get_TOC_alias_set (void) +{ + if (set == -1) + set = new_alias_set (); + return set; +} + +/* This returns nonzero if the current function uses the TOC. This is + determined by the presence of (use (unspec ... UNSPEC_TOC)), which + is generated by the ABI_V4 load_toc_* patterns. */ +#if TARGET_ELF +static int +uses_TOC (void) +{ + rtx_insn *insn; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + { + rtx pat = PATTERN (insn); + int i; + + if (GET_CODE (pat) == PARALLEL) + for (i = 0; i < XVECLEN (pat, 0); i++) + { + rtx sub = XVECEXP (pat, 0, i); + if (GET_CODE (sub) == USE) + { + sub = XEXP (sub, 0); + if (GET_CODE (sub) == UNSPEC + && XINT (sub, 1) == UNSPEC_TOC) + return 1; + } + } + } + return 0; +} +#endif + +rtx +create_TOC_reference (rtx symbol, rtx largetoc_reg) +{ + rtx tocrel, tocreg, hi; + + if (TARGET_DEBUG_ADDR) + { + if (GET_CODE (symbol) == SYMBOL_REF) + fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n", + XSTR (symbol, 0)); + else + { + fprintf (stderr, "\ncreate_TOC_reference, code %s:\n", + GET_RTX_NAME (GET_CODE (symbol))); + debug_rtx (symbol); + } + } + + if (!can_create_pseudo_p ()) + df_set_regs_ever_live (TOC_REGISTER, true); + + tocreg = gen_rtx_REG (Pmode, TOC_REGISTER); + tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL); + if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ()) + return tocrel; + + hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel)); + if (largetoc_reg != NULL) + { + emit_move_insn (largetoc_reg, hi); + hi = largetoc_reg; + } + return gen_rtx_LO_SUM (Pmode, hi, tocrel); +} + +/* Issue assembly directives that create a reference to the given DWARF + FRAME_TABLE_LABEL from the current function section. */ +void +rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label) +{ + fprintf (asm_out_file, "\t.ref %s\n", + (* targetm.strip_name_encoding) (frame_table_label)); +} + +/* This ties together stack memory (MEM with an alias set of frame_alias_set) + and the change to the stack pointer. */ + +static void +rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) +{ + rtvec p; + int i; + rtx regs[3]; + + i = 0; + regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + if (hard_frame_needed) + regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); + if (!(REGNO (fp) == STACK_POINTER_REGNUM + || (hard_frame_needed + && REGNO (fp) == HARD_FRAME_POINTER_REGNUM))) + regs[i++] = fp; + + p = rtvec_alloc (i); + while (--i >= 0) + { + rtx mem = gen_frame_mem (BLKmode, regs[i]); + RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx); + } + + emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p))); +} + +/* Emit the correct code for allocating stack space, as insns. + If COPY_REG, make sure a copy of the old frame is left there. + The generated code may use hard register 0 as a temporary. */ + +static rtx_insn * +rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) +{ + rtx_insn *insn; + rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + rtx tmp_reg = gen_rtx_REG (Pmode, 0); + rtx todec = gen_int_mode (-size, Pmode); + rtx par, set, mem; + + if (INTVAL (todec) != -size) + { + warning (0, "stack frame too large"); + emit_insn (gen_trap ()); + return 0; + } + + if (crtl->limit_stack) + { + if (REG_P (stack_limit_rtx) + && REGNO (stack_limit_rtx) > 1 + && REGNO (stack_limit_rtx) <= 31) + { + emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size))); + emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, + const0_rtx)); + } + else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF + && TARGET_32BIT + && DEFAULT_ABI == ABI_V4 + && !flag_pic) + { + rtx toload = gen_rtx_CONST (VOIDmode, + gen_rtx_PLUS (Pmode, + stack_limit_rtx, + GEN_INT (size))); + + emit_insn (gen_elf_high (tmp_reg, toload)); + emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload)); + emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, + const0_rtx)); + } + else + warning (0, "stack limit expression is not supported"); + } + + if (copy_reg) + { + if (copy_off != 0) + emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off))); + else + emit_move_insn (copy_reg, stack_reg); + } + + if (size > 32767) + { + /* Need a note here so that try_split doesn't get confused. */ + if (get_last_insn () == NULL_RTX) + emit_note (NOTE_INSN_DELETED); + insn = emit_move_insn (tmp_reg, todec); + try_split (PATTERN (insn), insn, 0); + todec = tmp_reg; + } + + insn = emit_insn (TARGET_32BIT + ? gen_movsi_update_stack (stack_reg, stack_reg, + todec, stack_reg) + : gen_movdi_di_update_stack (stack_reg, stack_reg, + todec, stack_reg)); + /* Since we didn't use gen_frame_mem to generate the MEM, grab + it now and set the alias set/attributes. The above gen_*_update + calls will generate a PARALLEL with the MEM set being the first + operation. */ + par = PATTERN (insn); + gcc_assert (GET_CODE (par) == PARALLEL); + set = XVECEXP (par, 0, 0); + gcc_assert (GET_CODE (set) == SET); + mem = SET_DEST (set); + gcc_assert (MEM_P (mem)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, get_frame_alias_set ()); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg, + GEN_INT (-size)))); + return insn; +} + +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + +#if PROBE_INTERVAL > 32768 +#error Cannot use indexed addressing mode for stack probing +#endif + +/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, + inclusive. These are offsets from the current stack pointer. */ + +static void +rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) +{ + /* See if we have a constant small number of probes to generate. If so, + that's the easy case. */ + if (first + size <= 32768) + { + HOST_WIDE_INT i; + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until + it exceeds SIZE. If only one probe is needed, this will not + generate any code. Then probe at FIRST + SIZE. */ + for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + i))); + + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + size))); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be + extra careful with variables wrapping around because we might be at + the very top (or the very bottom) of the address space and we have + to be able to handle this case properly; in particular, we use an + equality test for the loop condition. */ + else + { + HOST_WIDE_INT rounded_size; + rtx r12 = gen_rtx_REG (Pmode, 12); + rtx r0 = gen_rtx_REG (Pmode, 0); + + /* Sanity check for the addressing mode we're going to use. */ + gcc_assert (first <= 32768); + + /* Step 1: round SIZE to the previous multiple of the interval. */ + + rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* TEST_ADDR = SP + FIRST. */ + emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx, + -first))); + + /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ + if (rounded_size > 32768) + { + emit_move_insn (r0, GEN_INT (-rounded_size)); + emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0))); + } + else + emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12, + -rounded_size))); + + + /* Step 3: the loop + + do + { + TEST_ADDR = TEST_ADDR + PROBE_INTERVAL + probe at TEST_ADDR + } + while (TEST_ADDR != LAST_ADDR) + + probes at FIRST + N * PROBE_INTERVAL for values of N from 1 + until it is equal to ROUNDED_SIZE. */ + + if (TARGET_64BIT) + emit_insn (gen_probe_stack_rangedi (r12, r12, r0)); + else + emit_insn (gen_probe_stack_rangesi (r12, r12, r0)); + + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time + that SIZE is equal to ROUNDED_SIZE. */ + + if (size != rounded_size) + emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); + } +} + +/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are + absolute addresses. */ + +const char * +output_probe_stack_range (rtx reg1, rtx reg2) +{ + static int labelno = 0; + char loop_lab[32]; + rtx xops[2]; + + ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + + /* Loop. */ + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ + xops[0] = reg1; + xops[1] = GEN_INT (-PROBE_INTERVAL); + output_asm_insn ("addi %0,%0,%1", xops); + + /* Probe at TEST_ADDR. */ + xops[1] = gen_rtx_REG (Pmode, 0); + output_asm_insn ("stw %1,0(%0)", xops); + + /* Test if TEST_ADDR == LAST_ADDR. */ + xops[1] = reg2; + if (TARGET_64BIT) + output_asm_insn ("cmpd 0,%0,%1", xops); + else + output_asm_insn ("cmpw 0,%0,%1", xops); + + /* Branch. */ + fputs ("\tbne 0,", asm_out_file); + assemble_name_raw (asm_out_file, loop_lab); + fputc ('\n', asm_out_file); + + return ""; +} + +/* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced + with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2 + is not NULL. It would be nice if dwarf2out_frame_debug_expr could + deduce these equivalences by itself so it wasn't necessary to hold + its hand so much. Don't be tempted to always supply d2_f_d_e with + the actual cfa register, ie. r31 when we are using a hard frame + pointer. That fails when saving regs off r1, and sched moves the + r31 setup past the reg saves. */ + +static rtx_insn * +rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val, + rtx reg2, rtx repl2) +{ + rtx repl; + + if (REGNO (reg) == STACK_POINTER_REGNUM) + { + gcc_checking_assert (val == 0); + repl = NULL_RTX; + } + else + repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM), + GEN_INT (val)); + + rtx pat = PATTERN (insn); + if (!repl && !reg2) + { + /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */ + if (GET_CODE (pat) == PARALLEL) + for (int i = 0; i < XVECLEN (pat, 0); i++) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET) + { + rtx set = XVECEXP (pat, 0, i); + + /* If this PARALLEL has been emitted for out-of-line + register save functions, or store multiple, then omit + eh_frame info for any user-defined global regs. If + eh_frame info is supplied, frame unwinding will + restore a user reg. */ + if (!REG_P (SET_SRC (set)) + || !fixed_reg_p (REGNO (SET_SRC (set)))) + RTX_FRAME_RELATED_P (set) = 1; + } + RTX_FRAME_RELATED_P (insn) = 1; + return insn; + } + + /* We expect that 'pat' is either a SET or a PARALLEL containing + SETs (and possibly other stuff). In a PARALLEL, all the SETs + are important so they all have to be marked RTX_FRAME_RELATED_P. + Call simplify_replace_rtx on the SETs rather than the whole insn + so as to leave the other stuff alone (for example USE of r12). */ + + set_used_flags (pat); + if (GET_CODE (pat) == SET) + { + if (repl) + pat = simplify_replace_rtx (pat, reg, repl); + if (reg2) + pat = simplify_replace_rtx (pat, reg2, repl2); + } + else if (GET_CODE (pat) == PARALLEL) + { + pat = shallow_copy_rtx (pat); + XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0)); + + for (int i = 0; i < XVECLEN (pat, 0); i++) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET) + { + rtx set = XVECEXP (pat, 0, i); + + if (repl) + set = simplify_replace_rtx (set, reg, repl); + if (reg2) + set = simplify_replace_rtx (set, reg2, repl2); + XVECEXP (pat, 0, i) = set; + + /* Omit eh_frame info for any user-defined global regs. */ + if (!REG_P (SET_SRC (set)) + || !fixed_reg_p (REGNO (SET_SRC (set)))) + RTX_FRAME_RELATED_P (set) = 1; + } + } + else + gcc_unreachable (); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat)); + + return insn; +} + +/* Returns an insn that has a vrsave set operation with the + appropriate CLOBBERs. */ + +static rtx +generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep) +{ + int nclobs, i; + rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1]; + rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO); + + clobs[0] + = gen_rtx_SET (vrsave, + gen_rtx_UNSPEC_VOLATILE (SImode, + gen_rtvec (2, reg, vrsave), + UNSPECV_SET_VRSAVE)); + + nclobs = 1; + + /* We need to clobber the registers in the mask so the scheduler + does not move sets to VRSAVE before sets of AltiVec registers. + + However, if the function receives nonlocal gotos, reload will set + all call saved registers live. We will end up with: + + (set (reg 999) (mem)) + (parallel [ (set (reg vrsave) (unspec blah)) + (clobber (reg 999))]) + + The clobber will cause the store into reg 999 to be dead, and + flow will attempt to delete an epilogue insn. In this case, we + need an unspec use/set of the register. */ + + for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) + if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) + { + if (!epiloguep || call_used_regs [i]) + clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (V4SImode, i)); + else + { + rtx reg = gen_rtx_REG (V4SImode, i); + + clobs[nclobs++] + = gen_rtx_SET (reg, + gen_rtx_UNSPEC (V4SImode, + gen_rtvec (1, reg), 27)); + } + } + + insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs)); + + for (i = 0; i < nclobs; ++i) + XVECEXP (insn, 0, i) = clobs[i]; + + return insn; +} + +static rtx +gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) +{ + rtx addr, mem; + + addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); + mem = gen_frame_mem (GET_MODE (reg), addr); + return gen_rtx_SET (store ? mem : reg, store ? reg : mem); +} + +static rtx +gen_frame_load (rtx reg, rtx frame_reg, int offset) +{ + return gen_frame_set (reg, frame_reg, offset, false); +} + +static rtx +gen_frame_store (rtx reg, rtx frame_reg, int offset) +{ + return gen_frame_set (reg, frame_reg, offset, true); +} + +/* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes. + Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */ + +static rtx_insn * +emit_frame_save (rtx frame_reg, machine_mode mode, + unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp) +{ + rtx reg; + + /* Some cases that need register indexed addressing. */ + gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) + || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) + || (TARGET_E500_DOUBLE && mode == DFmode) + || (TARGET_SPE_ABI + && SPE_VECTOR_MODE (mode) + && !SPE_CONST_OFFSET_OK (offset)))); + + reg = gen_rtx_REG (mode, regno); + rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset)); + return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp, + NULL_RTX, NULL_RTX); +} + +/* Emit an offset memory reference suitable for a frame store, while + converting to a valid addressing mode. */ + +static rtx +gen_frame_mem_offset (machine_mode mode, rtx reg, int offset) +{ + rtx int_rtx, offset_rtx; + + int_rtx = GEN_INT (offset); + + if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset)) + || (TARGET_E500_DOUBLE && mode == DFmode)) + { + offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH); + emit_move_insn (offset_rtx, int_rtx); + } + else + offset_rtx = int_rtx; + + return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx)); +} + +#ifndef TARGET_FIX_AND_CONTINUE +#define TARGET_FIX_AND_CONTINUE 0 +#endif + +/* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */ +#define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO +#define LAST_SAVRES_REGISTER 31 +#define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1) + +enum { + SAVRES_LR = 0x1, + SAVRES_SAVE = 0x2, + SAVRES_REG = 0x0c, + SAVRES_GPR = 0, + SAVRES_FPR = 4, + SAVRES_VR = 8 +}; + +static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12]; + +/* Temporary holding space for an out-of-line register save/restore + routine name. */ +static char savres_routine_name[30]; + +/* Return the name for an out-of-line register save/restore routine. + We are saving/restoring GPRs if GPR is true. */ + +static char * +rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel) +{ + const char *prefix = ""; + const char *suffix = ""; + + /* Different targets are supposed to define + {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed + routine name could be defined with: + + sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX) + + This is a nice idea in practice, but in reality, things are + complicated in several ways: + + - ELF targets have save/restore routines for GPRs. + + - SPE targets use different prefixes for 32/64-bit registers, and + neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen. + + - PPC64 ELF targets have routines for save/restore of GPRs that + differ in what they do with the link register, so having a set + prefix doesn't work. (We only use one of the save routines at + the moment, though.) + + - PPC32 elf targets have "exit" versions of the restore routines + that restore the link register and can save some extra space. + These require an extra suffix. (There are also "tail" versions + of the restore routines and "GOT" versions of the save routines, + but we don't generate those at present. Same problems apply, + though.) + + We deal with all this by synthesizing our own prefix/suffix and + using that for the simple sprintf call shown above. */ + if (TARGET_SPE) + { + /* No floating point saves on the SPE. */ + gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR); + + if ((sel & SAVRES_SAVE)) + prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_"; + else + prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_"; + + if ((sel & SAVRES_LR)) + suffix = "_x"; + } + else if (DEFAULT_ABI == ABI_V4) + { + if (TARGET_64BIT) + goto aix_names; + + if ((sel & SAVRES_REG) == SAVRES_GPR) + prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_"; + else if ((sel & SAVRES_REG) == SAVRES_FPR) + prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_"; + else if ((sel & SAVRES_REG) == SAVRES_VR) + prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_"; + else + abort (); + + if ((sel & SAVRES_LR)) + suffix = "_x"; + } + else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { +#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) + /* No out-of-line save/restore routines for GPRs on AIX. */ + gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR); +#endif + + aix_names: + if ((sel & SAVRES_REG) == SAVRES_GPR) + prefix = ((sel & SAVRES_SAVE) + ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_") + : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_")); + else if ((sel & SAVRES_REG) == SAVRES_FPR) + { +#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) + if ((sel & SAVRES_LR)) + prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_"); + else +#endif + { + prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX; + suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX; + } + } + else if ((sel & SAVRES_REG) == SAVRES_VR) + prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_"; + else + abort (); + } + + if (DEFAULT_ABI == ABI_DARWIN) + { + /* The Darwin approach is (slightly) different, in order to be + compatible with code generated by the system toolchain. There is a + single symbol for the start of save sequence, and the code here + embeds an offset into that code on the basis of the first register + to be saved. */ + prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ; + if ((sel & SAVRES_REG) == SAVRES_GPR) + sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix, + ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"), + (regno - 13) * 4, prefix, regno); + else if ((sel & SAVRES_REG) == SAVRES_FPR) + sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix, + (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno); + else if ((sel & SAVRES_REG) == SAVRES_VR) + sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix, + (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno); + else + abort (); + } + else + sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix); + + return savres_routine_name; +} + +/* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine. + We are saving/restoring GPRs if GPR is true. */ + +static rtx +rs6000_savres_routine_sym (rs6000_stack_t *info, int sel) +{ + int regno = ((sel & SAVRES_REG) == SAVRES_GPR + ? info->first_gp_reg_save + : (sel & SAVRES_REG) == SAVRES_FPR + ? info->first_fp_reg_save - 32 + : (sel & SAVRES_REG) == SAVRES_VR + ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO + : -1); + rtx sym; + int select = sel; + + /* On the SPE, we never have any FPRs, but we do have 32/64-bit + versions of the gpr routines. */ + if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR + && info->spe_64bit_regs_used) + select ^= SAVRES_FPR ^ SAVRES_GPR; + + /* Don't generate bogus routine names. */ + gcc_assert (FIRST_SAVRES_REGISTER <= regno + && regno <= LAST_SAVRES_REGISTER + && select >= 0 && select <= 12); + + sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]; + + if (sym == NULL) + { + char *name; + + name = rs6000_savres_routine_name (info, regno, sel); + + sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select] + = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); + SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION; + } + + return sym; +} + +/* Emit a sequence of insns, including a stack tie if needed, for + resetting the stack pointer. If UPDT_REGNO is not 1, then don't + reset the stack pointer, but move the base of the frame into + reg UPDT_REGNO for use by out-of-line register restore routines. */ + +static rtx +rs6000_emit_stack_reset (rs6000_stack_t *info, + rtx frame_reg_rtx, HOST_WIDE_INT frame_off, + unsigned updt_regno) +{ + /* If there is nothing to do, don't do anything. */ + if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno) + return NULL_RTX; + + rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno); + + /* This blockage is needed so that sched doesn't decide to move + the sp change before the register restores. */ + if (DEFAULT_ABI == ABI_V4 + || (TARGET_SPE_ABI + && info->spe_64bit_regs_used != 0 + && info->first_gp_reg_save != 32)) + return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx, + GEN_INT (frame_off))); + + /* If we are restoring registers out-of-line, we will be using the + "exit" variants of the restore routines, which will reset the + stack for us. But we do need to point updt_reg into the + right place for those routines. */ + if (frame_off != 0) + return emit_insn (gen_add3_insn (updt_reg_rtx, + frame_reg_rtx, GEN_INT (frame_off))); + else + return emit_move_insn (updt_reg_rtx, frame_reg_rtx); + + return NULL_RTX; +} + +/* Return the register number used as a pointer by out-of-line + save/restore functions. */ + +static inline unsigned +ptr_regno_for_savres (int sel) +{ + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12; + return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11; +} + +/* Construct a parallel rtx describing the effect of a call to an + out-of-line register save/restore routine, and emit the insn + or jump_insn as appropriate. */ + +static rtx_insn * +rs6000_emit_savres_rtx (rs6000_stack_t *info, + rtx frame_reg_rtx, int save_area_offset, int lr_offset, + machine_mode reg_mode, int sel) +{ + int i; + int offset, start_reg, end_reg, n_regs, use_reg; + int reg_size = GET_MODE_SIZE (reg_mode); + rtx sym; + rtvec p; + rtx par; + rtx_insn *insn; + + offset = 0; + start_reg = ((sel & SAVRES_REG) == SAVRES_GPR + ? info->first_gp_reg_save + : (sel & SAVRES_REG) == SAVRES_FPR + ? info->first_fp_reg_save + : (sel & SAVRES_REG) == SAVRES_VR + ? info->first_altivec_reg_save + : -1); + end_reg = ((sel & SAVRES_REG) == SAVRES_GPR + ? 32 + : (sel & SAVRES_REG) == SAVRES_FPR + ? 64 + : (sel & SAVRES_REG) == SAVRES_VR + ? LAST_ALTIVEC_REGNO + 1 + : -1); + n_regs = end_reg - start_reg; + p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0) + + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0) + + n_regs); + + if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR)) + RTVEC_ELT (p, offset++) = ret_rtx; + + RTVEC_ELT (p, offset++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); + + sym = rs6000_savres_routine_sym (info, sel); + RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym); + + use_reg = ptr_regno_for_savres (sel); + if ((sel & SAVRES_REG) == SAVRES_VR) + { + /* Vector regs are saved/restored using [reg+reg] addressing. */ + RTVEC_ELT (p, offset++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg)); + RTVEC_ELT (p, offset++) + = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0)); + } + else + RTVEC_ELT (p, offset++) + = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg)); + + for (i = 0; i < end_reg - start_reg; i++) + RTVEC_ELT (p, i + offset) + = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i), + frame_reg_rtx, save_area_offset + reg_size * i, + (sel & SAVRES_SAVE) != 0); + + if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR)) + RTVEC_ELT (p, i + offset) + = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset); + + par = gen_rtx_PARALLEL (VOIDmode, p); + + if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR)) + { + insn = emit_jump_insn (par); + JUMP_LABEL (insn) = ret_rtx; + } + else + insn = emit_insn (par); + return insn; +} + +/* Emit code to store CR fields that need to be saved into REG. */ + +static void +rs6000_emit_move_from_cr (rtx reg) +{ + /* Only the ELFv2 ABI allows storing only selected fields. */ + if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF) + { + int i, cr_reg[8], count = 0; + + /* Collect CR fields that must be saved. */ + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + cr_reg[count++] = i; + + /* If it's just a single one, use mfcrf. */ + if (count == 1) + { + rtvec p = rtvec_alloc (1); + rtvec r = rtvec_alloc (2); + RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]); + RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0])); + RTVEC_ELT (p, 0) + = gen_rtx_SET (reg, + gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR)); + + emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + return; + } + + /* ??? It might be better to handle count == 2 / 3 cases here + as well, using logical operations to combine the values. */ + } + + emit_insn (gen_movesi_from_cr (reg)); +} + +/* Return whether the split-stack arg pointer (r12) is used. */ + +static bool +split_stack_arg_pointer_used_p (void) +{ + /* If the pseudo holding the arg pointer is no longer a pseudo, + then the arg pointer is used. */ + if (cfun->machine->split_stack_arg_pointer != NULL_RTX + && (!REG_P (cfun->machine->split_stack_arg_pointer) + || (REGNO (cfun->machine->split_stack_arg_pointer) + < FIRST_PSEUDO_REGISTER))) + return true; + + /* Unfortunately we also need to do some code scanning, since + r12 may have been substituted for the pseudo. */ + rtx_insn *insn; + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn)) + { + /* A call destroys r12. */ + if (CALL_P (insn)) + return false; + + df_ref use; + FOR_EACH_INSN_USE (use, insn) + { + rtx x = DF_REF_REG (use); + if (REG_P (x) && REGNO (x) == 12) + return true; + } + df_ref def; + FOR_EACH_INSN_DEF (def, insn) + { + rtx x = DF_REF_REG (def); + if (REG_P (x) && REGNO (x) == 12) + return false; + } + } + return bitmap_bit_p (DF_LR_OUT (bb), 12); +} + +/* Return whether we need to emit an ELFv2 global entry point prologue. */ + +static bool +rs6000_global_entry_point_needed_p (void) +{ + /* Only needed for the ELFv2 ABI. */ + if (DEFAULT_ABI != ABI_ELFv2) + return false; + + /* With -msingle-pic-base, we assume the whole program shares the same + TOC, so no global entry point prologues are needed anywhere. */ + if (TARGET_SINGLE_PIC_BASE) + return false; + + /* Ensure we have a global entry point for thunks. ??? We could + avoid that if the target routine doesn't need a global entry point, + but we do not know whether this is the case at this point. */ + if (cfun->is_thunk) + return true; + + /* For regular functions, rs6000_emit_prologue sets this flag if the + routine ever uses the TOC pointer. */ + return cfun->machine->r2_setup_needed; +} + +/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ +static sbitmap +rs6000_get_separate_components (void) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + if (WORLD_SAVE_P (info)) + return NULL; + + if (TARGET_SPE_ABI) + return NULL; + + gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE) + && !(info->savres_strategy & REST_MULTIPLE)); + + /* Component 0 is the save/restore of LR (done via GPR0). + Components 13..31 are the save/restore of GPR13..GPR31. + Components 46..63 are the save/restore of FPR14..FPR31. */ + + cfun->machine->n_components = 64; + + sbitmap components = sbitmap_alloc (cfun->machine->n_components); + bitmap_clear (components); + + int reg_size = TARGET_32BIT ? 4 : 8; + int fp_reg_size = 8; + + /* The GPRs we need saved to the frame. */ + if ((info->savres_strategy & SAVE_INLINE_GPRS) + && (info->savres_strategy & REST_INLINE_GPRS)) + { + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) + { + if (IN_RANGE (offset, -0x8000, 0x7fff) + && rs6000_reg_live_or_pic_offset_p (regno)) + bitmap_set_bit (components, regno); + + offset += reg_size; + } + } + + /* Don't mess with the hard frame pointer. */ + if (frame_pointer_needed) + bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); + + /* Don't mess with the fixed TOC register. */ + if ((TARGET_TOC && TARGET_MINIMAL_TOC) + || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) + || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) + bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM); + + /* The FPRs we need saved to the frame. */ + if ((info->savres_strategy & SAVE_INLINE_FPRS) + && (info->savres_strategy & REST_INLINE_FPRS)) + { + int offset = info->fp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++) + { + if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno)) + bitmap_set_bit (components, regno); + + offset += fp_reg_size; + } + } + + /* Optimize LR save and restore if we can. This is component 0. Any + out-of-line register save/restore routines need LR. */ + if (info->lr_save_p + && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)) + && (info->savres_strategy & SAVE_INLINE_GPRS) + && (info->savres_strategy & REST_INLINE_GPRS) + && (info->savres_strategy & SAVE_INLINE_FPRS) + && (info->savres_strategy & REST_INLINE_FPRS) + && (info->savres_strategy & SAVE_INLINE_VRS) + && (info->savres_strategy & REST_INLINE_VRS)) + { + int offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + if (IN_RANGE (offset, -0x8000, 0x7fff)) + bitmap_set_bit (components, 0); + } + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ +static sbitmap +rs6000_components_for_bb (basic_block bb) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + bitmap in = DF_LIVE_IN (bb); + bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; + bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; + + sbitmap components = sbitmap_alloc (cfun->machine->n_components); + bitmap_clear (components); + + /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */ + + /* GPRs. */ + for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) + if (bitmap_bit_p (in, regno) + || bitmap_bit_p (gen, regno) + || bitmap_bit_p (kill, regno)) + bitmap_set_bit (components, regno); + + /* FPRs. */ + for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++) + if (bitmap_bit_p (in, regno) + || bitmap_bit_p (gen, regno) + || bitmap_bit_p (kill, regno)) + bitmap_set_bit (components, regno); + + /* The link register. */ + if (bitmap_bit_p (in, LR_REGNO) + || bitmap_bit_p (gen, LR_REGNO) + || bitmap_bit_p (kill, LR_REGNO)) + bitmap_set_bit (components, 0); + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ +static void +rs6000_disqualify_components (sbitmap components, edge e, + sbitmap edge_components, bool /*is_prologue*/) +{ + /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be + live where we want to place that code. */ + if (bitmap_bit_p (edge_components, 0) + && bitmap_bit_p (DF_LIVE_IN (e->dest), 0)) + { + if (dump_file) + fprintf (dump_file, "Disqualifying LR because GPR0 is live " + "on entry to bb %d\n", e->dest->index); + bitmap_clear_bit (components, 0); + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ +static void +rs6000_emit_prologue_components (sbitmap components) +{ + rs6000_stack_t *info = rs6000_stack_info (); + rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed + ? HARD_FRAME_POINTER_REGNUM + : STACK_POINTER_REGNUM); + + machine_mode reg_mode = Pmode; + int reg_size = TARGET_32BIT ? 4 : 8; + machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) + ? DFmode : SFmode; + int fp_reg_size = 8; + + /* Prologue for LR. */ + if (bitmap_bit_p (components, 0)) + { + rtx reg = gen_rtx_REG (reg_mode, 0); + rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_REGISTER, NULL); + + int offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + + insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); + RTX_FRAME_RELATED_P (insn) = 1; + rtx lr = gen_rtx_REG (reg_mode, LR_REGNO); + rtx mem = copy_rtx (SET_DEST (single_set (insn))); + add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr)); + } + + /* Prologue for the GPRs. */ + int offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (bitmap_bit_p (components, i)) + { + rtx reg = gen_rtx_REG (reg_mode, i); + rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); + RTX_FRAME_RELATED_P (insn) = 1; + rtx set = copy_rtx (single_set (insn)); + add_reg_note (insn, REG_CFA_OFFSET, set); + } + + offset += reg_size; + } + + /* Prologue for the FPRs. */ + offset = info->fp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_fp_reg_save; i < 64; i++) + { + if (bitmap_bit_p (components, i)) + { + rtx reg = gen_rtx_REG (fp_reg_mode, i); + rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); + RTX_FRAME_RELATED_P (insn) = 1; + rtx set = copy_rtx (single_set (insn)); + add_reg_note (insn, REG_CFA_OFFSET, set); + } + + offset += fp_reg_size; + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ +static void +rs6000_emit_epilogue_components (sbitmap components) +{ + rs6000_stack_t *info = rs6000_stack_info (); + rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed + ? HARD_FRAME_POINTER_REGNUM + : STACK_POINTER_REGNUM); + + machine_mode reg_mode = Pmode; + int reg_size = TARGET_32BIT ? 4 : 8; + + machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) + ? DFmode : SFmode; + int fp_reg_size = 8; + + /* Epilogue for the FPRs. */ + int offset = info->fp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_fp_reg_save; i < 64; i++) + { + if (bitmap_bit_p (components, i)) + { + rtx reg = gen_rtx_REG (fp_reg_mode, i); + rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, reg); + } + + offset += fp_reg_size; + } + + /* Epilogue for the GPRs. */ + offset = info->gp_save_offset; + if (info->push_p) + offset += info->total_size; + + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (bitmap_bit_p (components, i)) + { + rtx reg = gen_rtx_REG (reg_mode, i); + rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, reg); + } + + offset += reg_size; + } + + /* Epilogue for LR. */ + if (bitmap_bit_p (components, 0)) + { + int offset = info->lr_save_offset; + if (info->push_p) + offset += info->total_size; + + rtx reg = gen_rtx_REG (reg_mode, 0); + rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); + + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + insn = emit_move_insn (lr, reg); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, lr); + } +} + +/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */ +static void +rs6000_set_handled_components (sbitmap components) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + for (int i = info->first_gp_reg_save; i < 32; i++) + if (bitmap_bit_p (components, i)) + cfun->machine->gpr_is_wrapped_separately[i] = true; + + for (int i = info->first_fp_reg_save; i < 64; i++) + if (bitmap_bit_p (components, i)) + cfun->machine->fpr_is_wrapped_separately[i - 32] = true; + + if (bitmap_bit_p (components, 0)) + cfun->machine->lr_is_wrapped_separately = true; +} + +/* Emit function prologue as insns. */ + +void +rs6000_emit_prologue (void) +{ + rs6000_stack_t *info = rs6000_stack_info (); + machine_mode reg_mode = Pmode; + int reg_size = TARGET_32BIT ? 4 : 8; + machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) + ? DFmode : SFmode; + int fp_reg_size = 8; + rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + rtx frame_reg_rtx = sp_reg_rtx; + unsigned int cr_save_regno; + rtx cr_save_rtx = NULL_RTX; + rtx_insn *insn; + int strategy; + int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE + && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) + && call_used_regs[STATIC_CHAIN_REGNUM]); + int using_split_stack = (flag_split_stack + && (lookup_attribute ("no_split_stack", + DECL_ATTRIBUTES (cfun->decl)) + == NULL)); + + /* Offset to top of frame for frame_reg and sp respectively. */ + HOST_WIDE_INT frame_off = 0; + HOST_WIDE_INT sp_off = 0; + /* sp_adjust is the stack adjusting instruction, tracked so that the + insn setting up the split-stack arg pointer can be emitted just + prior to it, when r12 is not used here for other purposes. */ + rtx_insn *sp_adjust = 0; + +#if CHECKING_P + /* Track and check usage of r0, r11, r12. */ + int reg_inuse = using_static_chain_p ? 1 << 11 : 0; +#define START_USE(R) do \ + { \ + gcc_assert ((reg_inuse & (1 << (R))) == 0); \ + reg_inuse |= 1 << (R); \ + } while (0) +#define END_USE(R) do \ + { \ + gcc_assert ((reg_inuse & (1 << (R))) != 0); \ + reg_inuse &= ~(1 << (R)); \ + } while (0) +#define NOT_INUSE(R) do \ + { \ + gcc_assert ((reg_inuse & (1 << (R))) == 0); \ + } while (0) +#else +#define START_USE(R) do {} while (0) +#define END_USE(R) do {} while (0) +#define NOT_INUSE(R) do {} while (0) +#endif + + if (DEFAULT_ABI == ABI_ELFv2 + && !TARGET_SINGLE_PIC_BASE) + { + cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM); + + /* With -mminimal-toc we may generate an extra use of r2 below. */ + if (TARGET_TOC && TARGET_MINIMAL_TOC + && !constant_pool_empty_p ()) + cfun->machine->r2_setup_needed = true; + } + + + if (flag_stack_usage_info) + current_function_static_stack_size = info->total_size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) + { + HOST_WIDE_INT size = info->total_size; + + if (crtl->is_leaf && !cfun->calls_alloca) + { + if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) + rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, + size - STACK_CHECK_PROTECT); + } + else if (size > 0) + rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size); + } + + if (TARGET_FIX_AND_CONTINUE) + { + /* gdb on darwin arranges to forward a function from the old + address by modifying the first 5 instructions of the function + to branch to the overriding function. This is necessary to + permit function pointers that point to the old function to + actually forward to the new function. */ + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + } + + if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) + { + reg_mode = V2SImode; + reg_size = 8; + } + + /* Handle world saves specially here. */ + if (WORLD_SAVE_P (info)) + { + int i, j, sz; + rtx treg; + rtvec p; + rtx reg0; + + /* save_world expects lr in r0. */ + reg0 = gen_rtx_REG (Pmode, 0); + if (info->lr_save_p) + { + insn = emit_move_insn (reg0, + gen_rtx_REG (Pmode, LR_REGNO)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* The SAVE_WORLD and RESTORE_WORLD routines make a number of + assumptions about the offsets of various bits of the stack + frame. */ + gcc_assert (info->gp_save_offset == -220 + && info->fp_save_offset == -144 + && info->lr_save_offset == 8 + && info->cr_save_offset == 4 + && info->push_p + && info->lr_save_p + && (!crtl->calls_eh_return + || info->ehrd_offset == -432) + && info->vrsave_save_offset == -224 + && info->altivec_save_offset == -416); + + treg = gen_rtx_REG (SImode, 11); + emit_move_insn (treg, GEN_INT (-info->total_size)); + + /* SAVE_WORLD takes the caller's LR in R0 and the frame size + in R11. It also clobbers R12, so beware! */ + + /* Preserve CR2 for save_world prologues */ + sz = 5; + sz += 32 - info->first_gp_reg_save; + sz += 64 - info->first_fp_reg_save; + sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1; + p = rtvec_alloc (sz); + j = 0; + RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (SImode, + LR_REGNO)); + RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, + gen_rtx_SYMBOL_REF (Pmode, + "*save_world")); + /* We do floats first so that the instruction pattern matches + properly. */ + for (i = 0; i < 64 - info->first_fp_reg_save; i++) + RTVEC_ELT (p, j++) + = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT + ? DFmode : SFmode, + info->first_fp_reg_save + i), + frame_reg_rtx, + info->fp_save_offset + frame_off + 8 * i); + for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++) + RTVEC_ELT (p, j++) + = gen_frame_store (gen_rtx_REG (V4SImode, + info->first_altivec_reg_save + i), + frame_reg_rtx, + info->altivec_save_offset + frame_off + 16 * i); + for (i = 0; i < 32 - info->first_gp_reg_save; i++) + RTVEC_ELT (p, j++) + = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), + frame_reg_rtx, + info->gp_save_offset + frame_off + reg_size * i); + + /* CR register traditionally saved as CR2. */ + RTVEC_ELT (p, j++) + = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO), + frame_reg_rtx, info->cr_save_offset + frame_off); + /* Explain about use of R0. */ + if (info->lr_save_p) + RTVEC_ELT (p, j++) + = gen_frame_store (reg0, + frame_reg_rtx, info->lr_save_offset + frame_off); + /* Explain what happens to the stack pointer. */ + { + rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg); + RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval); + } + + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, + treg, GEN_INT (-info->total_size)); + sp_off = frame_off = info->total_size; + } + + strategy = info->savres_strategy; + + /* For V.4, update stack before we do any saving and set back pointer. */ + if (! WORLD_SAVE_P (info) + && info->push_p + && (DEFAULT_ABI == ABI_V4 + || crtl->calls_eh_return)) + { + bool need_r11 = (TARGET_SPE + ? (!(strategy & SAVE_INLINE_GPRS) + && info->spe_64bit_regs_used == 0) + : (!(strategy & SAVE_INLINE_FPRS) + || !(strategy & SAVE_INLINE_GPRS) + || !(strategy & SAVE_INLINE_VRS))); + int ptr_regno = -1; + rtx ptr_reg = NULL_RTX; + int ptr_off = 0; + + if (info->total_size < 32767) + frame_off = info->total_size; + else if (need_r11) + ptr_regno = 11; + else if (info->cr_save_p + || info->lr_save_p + || info->first_fp_reg_save < 64 + || info->first_gp_reg_save < 32 + || info->altivec_size != 0 + || info->vrsave_size != 0 + || crtl->calls_eh_return) + ptr_regno = 12; + else + { + /* The prologue won't be saving any regs so there is no need + to set up a frame register to access any frame save area. + We also won't be using frame_off anywhere below, but set + the correct value anyway to protect against future + changes to this function. */ + frame_off = info->total_size; + } + if (ptr_regno != -1) + { + /* Set up the frame offset to that needed by the first + out-of-line save function. */ + START_USE (ptr_regno); + ptr_reg = gen_rtx_REG (Pmode, ptr_regno); + frame_reg_rtx = ptr_reg; + if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0) + gcc_checking_assert (info->fp_save_offset + info->fp_size == 0); + else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32) + ptr_off = info->gp_save_offset + info->gp_size; + else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0) + ptr_off = info->altivec_save_offset + info->altivec_size; + frame_off = -ptr_off; + } + sp_adjust = rs6000_emit_allocate_stack (info->total_size, + ptr_reg, ptr_off); + if (REGNO (frame_reg_rtx) == 12) + sp_adjust = 0; + sp_off = info->total_size; + if (frame_reg_rtx != sp_reg_rtx) + rs6000_emit_stack_tie (frame_reg_rtx, false); + } + + /* If we use the link register, get it into r0. */ + if (!WORLD_SAVE_P (info) && info->lr_save_p + && !cfun->machine->lr_is_wrapped_separately) + { + rtx addr, reg, mem; + + reg = gen_rtx_REG (Pmode, 0); + START_USE (0); + insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO)); + RTX_FRAME_RELATED_P (insn) = 1; + + if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR + | SAVE_NOINLINE_FPRS_SAVES_LR))) + { + addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, + GEN_INT (info->lr_save_offset + frame_off)); + mem = gen_rtx_MEM (Pmode, addr); + /* This should not be of rs6000_sr_alias_set, because of + __builtin_return_address. */ + + insn = emit_move_insn (mem, reg); + rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, + NULL_RTX, NULL_RTX); + END_USE (0); + } + } + + /* If we need to save CR, put it into r12 or r11. Choose r12 except when + r12 will be needed by out-of-line gpr restore. */ + cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && !(strategy & (SAVE_INLINE_GPRS + | SAVE_NOINLINE_GPRS_SAVES_LR)) + ? 11 : 12); + if (!WORLD_SAVE_P (info) + && info->cr_save_p + && REGNO (frame_reg_rtx) != cr_save_regno + && !(using_static_chain_p && cr_save_regno == 11) + && !(using_split_stack && cr_save_regno == 12 && sp_adjust)) + { + cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno); + START_USE (cr_save_regno); + rs6000_emit_move_from_cr (cr_save_rtx); + } + + /* Do any required saving of fpr's. If only one or two to save, do + it ourselves. Otherwise, call function. */ + if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS)) + { + int offset = info->fp_save_offset + frame_off; + for (int i = info->first_fp_reg_save; i < 64; i++) + { + if (save_reg_p (i) + && !cfun->machine->fpr_is_wrapped_separately[i - 32]) + emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset, + sp_off - frame_off); + + offset += fp_reg_size; + } + } + else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64) + { + bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0; + int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0); + unsigned ptr_regno = ptr_regno_for_savres (sel); + rtx ptr_reg = frame_reg_rtx; + + if (REGNO (frame_reg_rtx) == ptr_regno) + gcc_checking_assert (frame_off == 0); + else + { + ptr_reg = gen_rtx_REG (Pmode, ptr_regno); + NOT_INUSE (ptr_regno); + emit_insn (gen_add3_insn (ptr_reg, + frame_reg_rtx, GEN_INT (frame_off))); + } + insn = rs6000_emit_savres_rtx (info, ptr_reg, + info->fp_save_offset, + info->lr_save_offset, + DFmode, sel); + rs6000_frame_related (insn, ptr_reg, sp_off, + NULL_RTX, NULL_RTX); + if (lr) + END_USE (0); + } + + /* Save GPRs. This is done as a PARALLEL if we are using + the store-multiple instructions. */ + if (!WORLD_SAVE_P (info) + && TARGET_SPE_ABI + && info->spe_64bit_regs_used != 0 + && info->first_gp_reg_save != 32) + { + int i; + rtx spe_save_area_ptr; + HOST_WIDE_INT save_off; + int ool_adjust = 0; + + /* Determine whether we can address all of the registers that need + to be saved with an offset from frame_reg_rtx that fits in + the small const field for SPE memory instructions. */ + int spe_regs_addressable + = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off + + reg_size * (32 - info->first_gp_reg_save - 1)) + && (strategy & SAVE_INLINE_GPRS)); + + if (spe_regs_addressable) + { + spe_save_area_ptr = frame_reg_rtx; + save_off = frame_off; + } + else + { + /* Make r11 point to the start of the SPE save area. We need + to be careful here if r11 is holding the static chain. If + it is, then temporarily save it in r0. */ + HOST_WIDE_INT offset; + + if (!(strategy & SAVE_INLINE_GPRS)) + ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); + offset = info->spe_gp_save_offset + frame_off - ool_adjust; + spe_save_area_ptr = gen_rtx_REG (Pmode, 11); + save_off = frame_off - offset; + + if (using_static_chain_p) + { + rtx r0 = gen_rtx_REG (Pmode, 0); + + START_USE (0); + gcc_assert (info->first_gp_reg_save > 11); + + emit_move_insn (r0, spe_save_area_ptr); + } + else if (REGNO (frame_reg_rtx) != 11) + START_USE (11); + + emit_insn (gen_addsi3 (spe_save_area_ptr, + frame_reg_rtx, GEN_INT (offset))); + if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11) + frame_off = -info->spe_gp_save_offset + ool_adjust; + } + + if ((strategy & SAVE_INLINE_GPRS)) + { + for (i = 0; i < 32 - info->first_gp_reg_save; i++) + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) + emit_frame_save (spe_save_area_ptr, reg_mode, + info->first_gp_reg_save + i, + (info->spe_gp_save_offset + save_off + + reg_size * i), + sp_off - save_off); + } + else + { + insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr, + info->spe_gp_save_offset + save_off, + 0, reg_mode, + SAVRES_SAVE | SAVRES_GPR); + + rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off, + NULL_RTX, NULL_RTX); + } + + /* Move the static chain pointer back. */ + if (!spe_regs_addressable) + { + if (using_static_chain_p) + { + emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0)); + END_USE (0); + } + else if (REGNO (frame_reg_rtx) != 11) + END_USE (11); + } + } + else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS)) + { + bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0; + int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0); + unsigned ptr_regno = ptr_regno_for_savres (sel); + rtx ptr_reg = frame_reg_rtx; + bool ptr_set_up = REGNO (ptr_reg) == ptr_regno; + int end_save = info->gp_save_offset + info->gp_size; + int ptr_off; + + if (ptr_regno == 12) + sp_adjust = 0; + if (!ptr_set_up) + ptr_reg = gen_rtx_REG (Pmode, ptr_regno); + + /* Need to adjust r11 (r12) if we saved any FPRs. */ + if (end_save + frame_off != 0) + { + rtx offset = GEN_INT (end_save + frame_off); + + if (ptr_set_up) + frame_off = -end_save; + else + NOT_INUSE (ptr_regno); + emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); + } + else if (!ptr_set_up) + { + NOT_INUSE (ptr_regno); + emit_move_insn (ptr_reg, frame_reg_rtx); + } + ptr_off = -end_save; + insn = rs6000_emit_savres_rtx (info, ptr_reg, + info->gp_save_offset + ptr_off, + info->lr_save_offset + ptr_off, + reg_mode, sel); + rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off, + NULL_RTX, NULL_RTX); + if (lr) + END_USE (0); + } + else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE)) + { + rtvec p; + int i; + p = rtvec_alloc (32 - info->first_gp_reg_save); + for (i = 0; i < 32 - info->first_gp_reg_save; i++) + RTVEC_ELT (p, i) + = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), + frame_reg_rtx, + info->gp_save_offset + frame_off + reg_size * i); + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, + NULL_RTX, NULL_RTX); + } + else if (!WORLD_SAVE_P (info)) + { + int offset = info->gp_save_offset + frame_off; + for (int i = info->first_gp_reg_save; i < 32; i++) + { + if (rs6000_reg_live_or_pic_offset_p (i) + && !cfun->machine->gpr_is_wrapped_separately[i]) + emit_frame_save (frame_reg_rtx, reg_mode, i, offset, + sp_off - frame_off); + + offset += reg_size; + } + } + + if (crtl->calls_eh_return) + { + unsigned int i; + rtvec p; + + for (i = 0; ; ++i) + { + unsigned int regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + } + + p = rtvec_alloc (i); + + for (i = 0; ; ++i) + { + unsigned int regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + rtx set + = gen_frame_store (gen_rtx_REG (reg_mode, regno), + sp_reg_rtx, + info->ehrd_offset + sp_off + reg_size * (int) i); + RTVEC_ELT (p, i) = set; + RTX_FRAME_RELATED_P (set) = 1; + } + + insn = emit_insn (gen_blockage ()); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p)); + } + + /* In AIX ABI we need to make sure r2 is really saved. */ + if (TARGET_AIX && crtl->calls_eh_return) + { + rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump; + rtx join_insn, note; + rtx_insn *save_insn; + long toc_restore_insn; + + tmp_reg = gen_rtx_REG (Pmode, 11); + tmp_reg_si = gen_rtx_REG (SImode, 11); + if (using_static_chain_p) + { + START_USE (0); + emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg); + } + else + START_USE (11); + emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO)); + /* Peek at instruction to which this function returns. If it's + restoring r2, then we know we've already saved r2. We can't + unconditionally save r2 because the value we have will already + be updated if we arrived at this function via a plt call or + toc adjusting stub. */ + emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg)); + toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000) + + RS6000_TOC_SAVE_SLOT); + hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode); + emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi)); + compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO); + validate_condition_mode (EQ, CCUNSmode); + lo = gen_int_mode (toc_restore_insn & 0xffff, SImode); + emit_insn (gen_rtx_SET (compare_result, + gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo))); + toc_save_done = gen_label_rtx (); + jump = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_EQ (VOIDmode, compare_result, + const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, toc_save_done), + pc_rtx); + jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); + JUMP_LABEL (jump) = toc_save_done; + LABEL_NUSES (toc_save_done) += 1; + + save_insn = emit_frame_save (frame_reg_rtx, reg_mode, + TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT, + sp_off - frame_off); + + emit_label (toc_save_done); + + /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll + have a CFG that has different saves along different paths. + Move the note to a dummy blockage insn, which describes that + R2 is unconditionally saved after the label. */ + /* ??? An alternate representation might be a special insn pattern + containing both the branch and the store. That might let the + code that minimizes the number of DW_CFA_advance opcodes better + freedom in placing the annotations. */ + note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL); + if (note) + remove_note (save_insn, note); + else + note = alloc_reg_note (REG_FRAME_RELATED_EXPR, + copy_rtx (PATTERN (save_insn)), NULL_RTX); + RTX_FRAME_RELATED_P (save_insn) = 0; + + join_insn = emit_insn (gen_blockage ()); + REG_NOTES (join_insn) = note; + RTX_FRAME_RELATED_P (join_insn) = 1; + + if (using_static_chain_p) + { + emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0)); + END_USE (0); + } + else + END_USE (11); + } + + /* Save CR if we use any that must be preserved. */ + if (!WORLD_SAVE_P (info) && info->cr_save_p) + { + rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, + GEN_INT (info->cr_save_offset + frame_off)); + rtx mem = gen_frame_mem (SImode, addr); + + /* If we didn't copy cr before, do so now using r0. */ + if (cr_save_rtx == NULL_RTX) + { + START_USE (0); + cr_save_rtx = gen_rtx_REG (SImode, 0); + rs6000_emit_move_from_cr (cr_save_rtx); + } + + /* Saving CR requires a two-instruction sequence: one instruction + to move the CR to a general-purpose register, and a second + instruction that stores the GPR to memory. + + We do not emit any DWARF CFI records for the first of these, + because we cannot properly represent the fact that CR is saved in + a register. One reason is that we cannot express that multiple + CR fields are saved; another reason is that on 64-bit, the size + of the CR register in DWARF (4 bytes) differs from the size of + a general-purpose register. + + This means if any intervening instruction were to clobber one of + the call-saved CR fields, we'd have incorrect CFI. To prevent + this from happening, we mark the store to memory as a use of + those CR fields, which prevents any such instruction from being + scheduled in between the two instructions. */ + rtx crsave_v[9]; + int n_crsave = 0; + int i; + + crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx); + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + crsave_v[n_crsave++] + = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i)); + + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (n_crsave, crsave_v))); + END_USE (REGNO (cr_save_rtx)); + + /* Now, there's no way that dwarf2out_frame_debug_expr is going to + understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)', + so we need to construct a frame expression manually. */ + RTX_FRAME_RELATED_P (insn) = 1; + + /* Update address to be stack-pointer relative, like + rs6000_frame_related would do. */ + addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM), + GEN_INT (info->cr_save_offset + sp_off)); + mem = gen_frame_mem (SImode, addr); + + if (DEFAULT_ABI == ABI_ELFv2) + { + /* In the ELFv2 ABI we generate separate CFI records for each + CR field that was actually saved. They all point to the + same 32-bit stack slot. */ + rtx crframe[8]; + int n_crframe = 0; + + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + { + crframe[n_crframe] + = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1; + n_crframe++; + } + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (n_crframe, crframe))); + } + else + { + /* In other ABIs, by convention, we use a single CR regnum to + represent the fact that all call-saved CR fields are saved. + We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */ + rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); + } + } + + /* In the ELFv2 ABI we need to save all call-saved CR fields into + *separate* slots if the routine calls __builtin_eh_return, so + that they can be independently restored by the unwinder. */ + if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) + { + int i, cr_off = info->ehcr_offset; + rtx crsave; + + /* ??? We might get better performance by using multiple mfocrf + instructions. */ + crsave = gen_rtx_REG (SImode, 0); + emit_insn (gen_movesi_from_cr (crsave)); + + for (i = 0; i < 8; i++) + if (!call_used_regs[CR0_REGNO + i]) + { + rtvec p = rtvec_alloc (2); + RTVEC_ELT (p, 0) + = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off); + RTVEC_ELT (p, 1) + = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i)); + + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i), + sp_reg_rtx, cr_off + sp_off)); + + cr_off += reg_size; + } + } + + /* Update stack and set back pointer unless this is V.4, + for which it was done previously. */ + if (!WORLD_SAVE_P (info) && info->push_p + && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return)) + { + rtx ptr_reg = NULL; + int ptr_off = 0; + + /* If saving altivec regs we need to be able to address all save + locations using a 16-bit offset. */ + if ((strategy & SAVE_INLINE_VRS) == 0 + || (info->altivec_size != 0 + && (info->altivec_save_offset + info->altivec_size - 16 + + info->total_size - frame_off) > 32767) + || (info->vrsave_size != 0 + && (info->vrsave_save_offset + + info->total_size - frame_off) > 32767)) + { + int sel = SAVRES_SAVE | SAVRES_VR; + unsigned ptr_regno = ptr_regno_for_savres (sel); + + if (using_static_chain_p + && ptr_regno == STATIC_CHAIN_REGNUM) + ptr_regno = 12; + if (REGNO (frame_reg_rtx) != ptr_regno) + START_USE (ptr_regno); + ptr_reg = gen_rtx_REG (Pmode, ptr_regno); + frame_reg_rtx = ptr_reg; + ptr_off = info->altivec_save_offset + info->altivec_size; + frame_off = -ptr_off; + } + else if (REGNO (frame_reg_rtx) == 1) + frame_off = info->total_size; + sp_adjust = rs6000_emit_allocate_stack (info->total_size, + ptr_reg, ptr_off); + if (REGNO (frame_reg_rtx) == 12) + sp_adjust = 0; + sp_off = info->total_size; + if (frame_reg_rtx != sp_reg_rtx) + rs6000_emit_stack_tie (frame_reg_rtx, false); + } + + /* Set frame pointer, if needed. */ + if (frame_pointer_needed) + { + insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM), + sp_reg_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Save AltiVec registers if needed. Save here because the red zone does + not always include AltiVec registers. */ + if (!WORLD_SAVE_P (info) + && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0) + { + int end_save = info->altivec_save_offset + info->altivec_size; + int ptr_off; + /* Oddly, the vector save/restore functions point r0 at the end + of the save area, then use r11 or r12 to load offsets for + [reg+reg] addressing. */ + rtx ptr_reg = gen_rtx_REG (Pmode, 0); + int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR); + rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + + gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12); + NOT_INUSE (0); + if (scratch_regno == 12) + sp_adjust = 0; + if (end_save + frame_off != 0) + { + rtx offset = GEN_INT (end_save + frame_off); + + emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); + } + else + emit_move_insn (ptr_reg, frame_reg_rtx); + + ptr_off = -end_save; + insn = rs6000_emit_savres_rtx (info, scratch_reg, + info->altivec_save_offset + ptr_off, + 0, V4SImode, SAVRES_SAVE | SAVRES_VR); + rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off, + NULL_RTX, NULL_RTX); + if (REGNO (frame_reg_rtx) == REGNO (scratch_reg)) + { + /* The oddity mentioned above clobbered our frame reg. */ + emit_move_insn (frame_reg_rtx, ptr_reg); + frame_off = ptr_off; + } + } + else if (!WORLD_SAVE_P (info) + && info->altivec_size != 0) + { + int i; + + for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) + if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) + { + rtx areg, savereg, mem; + HOST_WIDE_INT offset; + + offset = (info->altivec_save_offset + frame_off + + 16 * (i - info->first_altivec_reg_save)); + + savereg = gen_rtx_REG (V4SImode, i); + + if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) + { + mem = gen_frame_mem (V4SImode, + gen_rtx_PLUS (Pmode, frame_reg_rtx, + GEN_INT (offset))); + insn = emit_insn (gen_rtx_SET (mem, savereg)); + areg = NULL_RTX; + } + else + { + NOT_INUSE (0); + areg = gen_rtx_REG (Pmode, 0); + emit_move_insn (areg, GEN_INT (offset)); + + /* AltiVec addressing mode is [reg+reg]. */ + mem = gen_frame_mem (V4SImode, + gen_rtx_PLUS (Pmode, frame_reg_rtx, areg)); + + /* Rather than emitting a generic move, force use of the stvx + instruction, which we always want on ISA 2.07 (power8) systems. + In particular we don't want xxpermdi/stxvd2x for little + endian. */ + insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg)); + } + + rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, + areg, GEN_INT (offset)); + } + } + + /* VRSAVE is a bit vector representing which AltiVec registers + are used. The OS uses this to determine which vector + registers to save on a context switch. We need to save + VRSAVE on the stack frame, add whatever AltiVec registers we + used in this function, and do the corresponding magic in the + epilogue. */ + + if (!WORLD_SAVE_P (info) + && info->vrsave_size != 0) + { + rtx reg, vrsave; + int offset; + int save_regno; + + /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might + be using r12 as frame_reg_rtx and r11 as the static chain + pointer for nested functions. */ + save_regno = 12; + if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && !using_static_chain_p) + save_regno = 11; + else if (using_split_stack || REGNO (frame_reg_rtx) == 12) + { + save_regno = 11; + if (using_static_chain_p) + save_regno = 0; + } + + NOT_INUSE (save_regno); + reg = gen_rtx_REG (SImode, save_regno); + vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO); + if (TARGET_MACHO) + emit_insn (gen_get_vrsave_internal (reg)); + else + emit_insn (gen_rtx_SET (reg, vrsave)); + + /* Save VRSAVE. */ + offset = info->vrsave_save_offset + frame_off; + insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset)); + + /* Include the registers in the mask. */ + emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask))); + + insn = emit_insn (generate_set_vrsave (reg, info, 0)); + } + + /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */ + if (!TARGET_SINGLE_PIC_BASE + && ((TARGET_TOC && TARGET_MINIMAL_TOC + && !constant_pool_empty_p ()) + || (DEFAULT_ABI == ABI_V4 + && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT)) + && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)))) + { + /* If emit_load_toc_table will use the link register, we need to save + it. We use R12 for this purpose because emit_load_toc_table + can use register 0. This allows us to use a plain 'blr' to return + from the procedure more often. */ + int save_LR_around_toc_setup = (TARGET_ELF + && DEFAULT_ABI == ABI_V4 + && flag_pic + && ! info->lr_save_p + && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0); + if (save_LR_around_toc_setup) + { + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + rtx tmp = gen_rtx_REG (Pmode, 12); + + sp_adjust = 0; + insn = emit_move_insn (tmp, lr); + RTX_FRAME_RELATED_P (insn) = 1; + + rs6000_emit_load_toc_table (TRUE); + + insn = emit_move_insn (lr, tmp); + add_reg_note (insn, REG_CFA_RESTORE, lr); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + rs6000_emit_load_toc_table (TRUE); + } + +#if TARGET_MACHO + if (!TARGET_SINGLE_PIC_BASE + && DEFAULT_ABI == ABI_DARWIN + && flag_pic && crtl->uses_pic_offset_table) + { + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME); + + /* Save and restore LR locally around this call (in R0). */ + if (!info->lr_save_p) + emit_move_insn (gen_rtx_REG (Pmode, 0), lr); + + emit_insn (gen_load_macho_picbase (src)); + + emit_move_insn (gen_rtx_REG (Pmode, + RS6000_PIC_OFFSET_TABLE_REGNUM), + lr); + + if (!info->lr_save_p) + emit_move_insn (lr, gen_rtx_REG (Pmode, 0)); + } +#endif + + /* If we need to, save the TOC register after doing the stack setup. + Do not emit eh frame info for this save. The unwinder wants info, + conceptually attached to instructions in this function, about + register values in the caller of this function. This R2 may have + already been changed from the value in the caller. + We don't attempt to write accurate DWARF EH frame info for R2 + because code emitted by gcc for a (non-pointer) function call + doesn't save and restore R2. Instead, R2 is managed out-of-line + by a linker generated plt call stub when the function resides in + a shared library. This behavior is costly to describe in DWARF, + both in terms of the size of DWARF info and the time taken in the + unwinder to interpret it. R2 changes, apart from the + calls_eh_return case earlier in this function, are handled by + linux-unwind.h frob_update_context. */ + if (rs6000_save_toc_in_prologue_p ()) + { + rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM); + emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT)); + } + + if (using_split_stack && split_stack_arg_pointer_used_p ()) + { + /* Set up the arg pointer (r12) for -fsplit-stack code. If + __morestack was called, it left the arg pointer to the old + stack in r29. Otherwise, the arg pointer is the top of the + current frame. */ + cfun->machine->split_stack_argp_used = true; + if (sp_adjust) + { + rtx r12 = gen_rtx_REG (Pmode, 12); + rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx); + emit_insn_before (set_r12, sp_adjust); + } + else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12) + { + rtx r12 = gen_rtx_REG (Pmode, 12); + if (frame_off == 0) + emit_move_insn (r12, frame_reg_rtx); + else + emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off))); + } + if (info->push_p) + { + rtx r12 = gen_rtx_REG (Pmode, 12); + rtx r29 = gen_rtx_REG (Pmode, 29); + rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO); + rtx not_more = gen_label_rtx (); + rtx jump; + + jump = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_GEU (VOIDmode, cr7, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, not_more), + pc_rtx); + jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); + JUMP_LABEL (jump) = not_more; + LABEL_NUSES (not_more) += 1; + emit_move_insn (r12, r29); + emit_label (not_more); + } + } +} + +/* Output .extern statements for the save/restore routines we use. */ + +static void +rs6000_output_savres_externs (FILE *file) +{ + rs6000_stack_t *info = rs6000_stack_info (); + + if (TARGET_DEBUG_STACK) + debug_stack_info (info); + + /* Write .extern for any function we will call to save and restore + fp values. */ + if (info->first_fp_reg_save < 64 + && !TARGET_MACHO + && !TARGET_ELF) + { + char *name; + int regno = info->first_fp_reg_save - 32; + + if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0) + { + bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0; + int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0); + name = rs6000_savres_routine_name (info, regno, sel); + fprintf (file, "\t.extern %s\n", name); + } + if ((info->savres_strategy & REST_INLINE_FPRS) == 0) + { + bool lr = (info->savres_strategy + & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; + int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); + name = rs6000_savres_routine_name (info, regno, sel); + fprintf (file, "\t.extern %s\n", name); + } + } +} + +/* Write function prologue. */ + +static void +rs6000_output_function_prologue (FILE *file, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + if (!cfun->is_thunk) + rs6000_output_savres_externs (file); + + /* ELFv2 ABI r2 setup code and local entry point. This must follow + immediately after the global entry point label. */ + if (rs6000_global_entry_point_needed_p ()) + { + const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); + + (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno); + + if (TARGET_CMODEL != CMODEL_LARGE) + { + /* In the small and medium code models, we assume the TOC is less + 2 GB away from the text section, so it can be computed via the + following two-instruction sequence. */ + char buf[256]; + + ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); + fprintf (file, "0:\taddis 2,12,.TOC.-"); + assemble_name (file, buf); + fprintf (file, "@ha\n"); + fprintf (file, "\taddi 2,2,.TOC.-"); + assemble_name (file, buf); + fprintf (file, "@l\n"); + } + else + { + /* In the large code model, we allow arbitrary offsets between the + TOC and the text section, so we have to load the offset from + memory. The data field is emitted directly before the global + entry point in rs6000_elf_declare_function_name. */ + char buf[256]; + +#ifdef HAVE_AS_ENTRY_MARKERS + /* If supported by the linker, emit a marker relocation. If the + total code size of the final executable or shared library + happens to fit into 2 GB after all, the linker will replace + this code sequence with the sequence for the small or medium + code model. */ + fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n"); +#endif + fprintf (file, "\tld 2,"); + ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno); + assemble_name (file, buf); + fprintf (file, "-"); + ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); + assemble_name (file, buf); + fprintf (file, "(12)\n"); + fprintf (file, "\tadd 2,2,12\n"); + } + + fputs ("\t.localentry\t", file); + assemble_name (file, name); + fputs (",.-", file); + assemble_name (file, name); + fputs ("\n", file); + } + + /* Output -mprofile-kernel code. This needs to be done here instead of + in output_function_profile since it must go after the ELFv2 ABI + local entry point. */ + if (TARGET_PROFILE_KERNEL && crtl->profile) + { + gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); + gcc_assert (!TARGET_32BIT); + + asm_fprintf (file, "\tmflr %s\n", reg_names[0]); + + /* In the ELFv2 ABI we have no compiler stack word. It must be + the resposibility of _mcount to preserve the static chain + register if required. */ + if (DEFAULT_ABI != ABI_ELFv2 + && cfun->static_chain_decl != NULL) + { + asm_fprintf (file, "\tstd %s,24(%s)\n", + reg_names[STATIC_CHAIN_REGNUM], reg_names[1]); + fprintf (file, "\tbl %s\n", RS6000_MCOUNT); + asm_fprintf (file, "\tld %s,24(%s)\n", + reg_names[STATIC_CHAIN_REGNUM], reg_names[1]); + } + else + fprintf (file, "\tbl %s\n", RS6000_MCOUNT); + } + + rs6000_pic_labelno++; +} + +/* -mprofile-kernel code calls mcount before the function prolog, + so a profiled leaf function should stay a leaf function. */ +static bool +rs6000_keep_leaf_when_profiled () +{ + return TARGET_PROFILE_KERNEL; +} + +/* Non-zero if vmx regs are restored before the frame pop, zero if + we restore after the pop when possible. */ +#define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0 + +/* Restoring cr is a two step process: loading a reg from the frame + save, then moving the reg to cr. For ABI_V4 we must let the + unwinder know that the stack location is no longer valid at or + before the stack deallocation, but we can't emit a cfa_restore for + cr at the stack deallocation like we do for other registers. + The trouble is that it is possible for the move to cr to be + scheduled after the stack deallocation. So say exactly where cr + is located on each of the two insns. */ + +static rtx +load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func) +{ + rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset); + rtx reg = gen_rtx_REG (SImode, regno); + rtx_insn *insn = emit_move_insn (reg, mem); + + if (!exit_func && DEFAULT_ABI == ABI_V4) + { + rtx cr = gen_rtx_REG (SImode, CR2_REGNO); + rtx set = gen_rtx_SET (reg, cr); + + add_reg_note (insn, REG_CFA_REGISTER, set); + RTX_FRAME_RELATED_P (insn) = 1; + } + return reg; +} + +/* Reload CR from REG. */ + +static void +restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func) +{ + int count = 0; + int i; + + if (using_mfcr_multiple) + { + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + count++; + gcc_assert (count); + } + + if (using_mfcr_multiple && count > 1) + { + rtx_insn *insn; + rtvec p; + int ndx; + + p = rtvec_alloc (count); + + ndx = 0; + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + { + rtvec r = rtvec_alloc (2); + RTVEC_ELT (r, 0) = reg; + RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i)); + RTVEC_ELT (p, ndx) = + gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i), + gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR)); + ndx++; + } + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + gcc_assert (ndx == count); + + /* For the ELFv2 ABI we generate a CFA_RESTORE for each + CR field separately. */ + if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) + { + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (insn) = 1; + } + } + else + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + { + rtx insn = emit_insn (gen_movsi_to_cr_one + (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); + + /* For the ELFv2 ABI we generate a CFA_RESTORE for each + CR field separately, attached to the insn that in fact + restores this particular CR field. */ + if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) + { + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */ + if (!exit_func && DEFAULT_ABI != ABI_ELFv2 + && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) + { + rtx_insn *insn = get_last_insn (); + rtx cr = gen_rtx_REG (SImode, CR2_REGNO); + + add_reg_note (insn, REG_CFA_RESTORE, cr); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Like cr, the move to lr instruction can be scheduled after the + stack deallocation, but unlike cr, its stack frame save is still + valid. So we only need to emit the cfa_restore on the correct + instruction. */ + +static void +load_lr_save (int regno, rtx frame_reg_rtx, int offset) +{ + rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset); + rtx reg = gen_rtx_REG (Pmode, regno); + + emit_move_insn (reg, mem); +} + +static void +restore_saved_lr (int regno, bool exit_func) +{ + rtx reg = gen_rtx_REG (Pmode, regno); + rtx lr = gen_rtx_REG (Pmode, LR_REGNO); + rtx_insn *insn = emit_move_insn (lr, reg); + + if (!exit_func && flag_shrink_wrap) + { + add_reg_note (insn, REG_CFA_RESTORE, lr); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +static rtx +add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores) +{ + if (DEFAULT_ABI == ABI_ELFv2) + { + int i; + for (i = 0; i < 8; i++) + if (save_reg_p (CR0_REGNO + i)) + { + rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr, + cfa_restores); + } + } + else if (info->cr_save_p) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR2_REGNO), + cfa_restores); + + if (info->lr_save_p) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (Pmode, LR_REGNO), + cfa_restores); + return cfa_restores; +} + +/* Return true if OFFSET from stack pointer can be clobbered by signals. + V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes + below stack pointer not cloberred by signals. */ + +static inline bool +offset_below_red_zone_p (HOST_WIDE_INT offset) +{ + return offset < (DEFAULT_ABI == ABI_V4 + ? 0 + : TARGET_32BIT ? -220 : -288); +} + +/* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */ + +static void +emit_cfa_restores (rtx cfa_restores) +{ + rtx_insn *insn = get_last_insn (); + rtx *loc = ®_NOTES (insn); + + while (*loc) + loc = &XEXP (*loc, 1); + *loc = cfa_restores; + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Emit function epilogue as insns. */ + +void +rs6000_emit_epilogue (int sibcall) +{ + rs6000_stack_t *info; + int restoring_GPRs_inline; + int restoring_FPRs_inline; + int using_load_multiple; + int using_mtcr_multiple; + int use_backchain_to_restore_sp; + int restore_lr; + int strategy; + HOST_WIDE_INT frame_off = 0; + rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1); + rtx frame_reg_rtx = sp_reg_rtx; + rtx cfa_restores = NULL_RTX; + rtx insn; + rtx cr_save_reg = NULL_RTX; + machine_mode reg_mode = Pmode; + int reg_size = TARGET_32BIT ? 4 : 8; + machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) + ? DFmode : SFmode; + int fp_reg_size = 8; + int i; + bool exit_func; + unsigned ptr_regno; + + info = rs6000_stack_info (); + + if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) + { + reg_mode = V2SImode; + reg_size = 8; + } + + strategy = info->savres_strategy; + using_load_multiple = strategy & REST_MULTIPLE; + restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS); + restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS); + using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601 + || rs6000_cpu == PROCESSOR_PPC603 + || rs6000_cpu == PROCESSOR_PPC750 + || optimize_size); + /* Restore via the backchain when we have a large frame, since this + is more efficient than an addis, addi pair. The second condition + here will not trigger at the moment; We don't actually need a + frame pointer for alloca, but the generic parts of the compiler + give us one anyway. */ + use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p + ? info->lr_save_offset + : 0) > 32767 + || (cfun->calls_alloca + && !frame_pointer_needed)); + restore_lr = (info->lr_save_p + && (restoring_FPRs_inline + || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR)) + && (restoring_GPRs_inline + || info->first_fp_reg_save < 64) + && !cfun->machine->lr_is_wrapped_separately); + + + if (WORLD_SAVE_P (info)) + { + int i, j; + char rname[30]; + const char *alloc_rname; + rtvec p; + + /* eh_rest_world_r10 will return to the location saved in the LR + stack slot (which is not likely to be our caller.) + Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8. + rest_world is similar, except any R10 parameter is ignored. + The exception-handling stuff that was here in 2.95 is no + longer necessary. */ + + p = rtvec_alloc (9 + + 32 - info->first_gp_reg_save + + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save + + 63 + 1 - info->first_fp_reg_save); + + strcpy (rname, ((crtl->calls_eh_return) ? + "*eh_rest_world_r10" : "*rest_world")); + alloc_rname = ggc_strdup (rname); + + j = 0; + RTVEC_ELT (p, j++) = ret_rtx; + RTVEC_ELT (p, j++) + = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname)); + /* The instruction pattern requires a clobber here; + it is shared with the restVEC helper. */ + RTVEC_ELT (p, j++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11)); + + { + /* CR register traditionally saved as CR2. */ + rtx reg = gen_rtx_REG (SImode, CR2_REGNO); + RTVEC_ELT (p, j++) + = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset); + if (flag_shrink_wrap) + { + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (Pmode, LR_REGNO), + cfa_restores); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + } + + for (i = 0; i < 32 - info->first_gp_reg_save; i++) + { + rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); + RTVEC_ELT (p, j++) + = gen_frame_load (reg, + frame_reg_rtx, info->gp_save_offset + reg_size * i); + if (flag_shrink_wrap) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++) + { + rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i); + RTVEC_ELT (p, j++) + = gen_frame_load (reg, + frame_reg_rtx, info->altivec_save_offset + 16 * i); + if (flag_shrink_wrap) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + for (i = 0; info->first_fp_reg_save + i <= 63; i++) + { + rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT + ? DFmode : SFmode), + info->first_fp_reg_save + i); + RTVEC_ELT (p, j++) + = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i); + if (flag_shrink_wrap) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + RTVEC_ELT (p, j++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0)); + RTVEC_ELT (p, j++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12)); + RTVEC_ELT (p, j++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7)); + RTVEC_ELT (p, j++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8)); + RTVEC_ELT (p, j++) + = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10)); + insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); + + if (flag_shrink_wrap) + { + REG_NOTES (insn) = cfa_restores; + add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + return; + } + + /* frame_reg_rtx + frame_off points to the top of this stack frame. */ + if (info->push_p) + frame_off = info->total_size; + + /* Restore AltiVec registers if we must do so before adjusting the + stack. */ + if (info->altivec_size != 0 + && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP + || (DEFAULT_ABI != ABI_V4 + && offset_below_red_zone_p (info->altivec_save_offset)))) + { + int i; + int scratch_regno = ptr_regno_for_savres (SAVRES_VR); + + gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12); + if (use_backchain_to_restore_sp) + { + int frame_regno = 11; + + if ((strategy & REST_INLINE_VRS) == 0) + { + /* Of r11 and r12, select the one not clobbered by an + out-of-line restore function for the frame register. */ + frame_regno = 11 + 12 - scratch_regno; + } + frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno); + emit_move_insn (frame_reg_rtx, + gen_rtx_MEM (Pmode, sp_reg_rtx)); + frame_off = 0; + } + else if (frame_pointer_needed) + frame_reg_rtx = hard_frame_pointer_rtx; + + if ((strategy & REST_INLINE_VRS) == 0) + { + int end_save = info->altivec_save_offset + info->altivec_size; + int ptr_off; + rtx ptr_reg = gen_rtx_REG (Pmode, 0); + rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + + if (end_save + frame_off != 0) + { + rtx offset = GEN_INT (end_save + frame_off); + + emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); + } + else + emit_move_insn (ptr_reg, frame_reg_rtx); + + ptr_off = -end_save; + insn = rs6000_emit_savres_rtx (info, scratch_reg, + info->altivec_save_offset + ptr_off, + 0, V4SImode, SAVRES_VR); + } + else + { + for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) + if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) + { + rtx addr, areg, mem, insn; + rtx reg = gen_rtx_REG (V4SImode, i); + HOST_WIDE_INT offset + = (info->altivec_save_offset + frame_off + + 16 * (i - info->first_altivec_reg_save)); + + if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) + { + mem = gen_frame_mem (V4SImode, + gen_rtx_PLUS (Pmode, frame_reg_rtx, + GEN_INT (offset))); + insn = gen_rtx_SET (reg, mem); + } + else + { + areg = gen_rtx_REG (Pmode, 0); + emit_move_insn (areg, GEN_INT (offset)); + + /* AltiVec addressing mode is [reg+reg]. */ + addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg); + mem = gen_frame_mem (V4SImode, addr); + + /* Rather than emitting a generic move, force use of the + lvx instruction, which we always want. In particular we + don't want lxvd2x/xxpermdi for little endian. */ + insn = gen_altivec_lvx_v4si_internal (reg, mem); + } + + (void) emit_insn (insn); + } + } + + for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) + if (((strategy & REST_INLINE_VRS) == 0 + || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0) + && (flag_shrink_wrap + || (offset_below_red_zone_p + (info->altivec_save_offset + + 16 * (i - info->first_altivec_reg_save))))) + { + rtx reg = gen_rtx_REG (V4SImode, i); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + } + + /* Restore VRSAVE if we must do so before adjusting the stack. */ + if (info->vrsave_size != 0 + && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP + || (DEFAULT_ABI != ABI_V4 + && offset_below_red_zone_p (info->vrsave_save_offset)))) + { + rtx reg; + + if (frame_reg_rtx == sp_reg_rtx) + { + if (use_backchain_to_restore_sp) + { + frame_reg_rtx = gen_rtx_REG (Pmode, 11); + emit_move_insn (frame_reg_rtx, + gen_rtx_MEM (Pmode, sp_reg_rtx)); + frame_off = 0; + } + else if (frame_pointer_needed) + frame_reg_rtx = hard_frame_pointer_rtx; + } + + reg = gen_rtx_REG (SImode, 12); + emit_insn (gen_frame_load (reg, frame_reg_rtx, + info->vrsave_save_offset + frame_off)); + + emit_insn (generate_set_vrsave (reg, info, 1)); + } + + insn = NULL_RTX; + /* If we have a large stack frame, restore the old stack pointer + using the backchain. */ + if (use_backchain_to_restore_sp) + { + if (frame_reg_rtx == sp_reg_rtx) + { + /* Under V.4, don't reset the stack pointer until after we're done + loading the saved registers. */ + if (DEFAULT_ABI == ABI_V4) + frame_reg_rtx = gen_rtx_REG (Pmode, 11); + + insn = emit_move_insn (frame_reg_rtx, + gen_rtx_MEM (Pmode, sp_reg_rtx)); + frame_off = 0; + } + else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP + && DEFAULT_ABI == ABI_V4) + /* frame_reg_rtx has been set up by the altivec restore. */ + ; + else + { + insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx); + frame_reg_rtx = sp_reg_rtx; + } + } + /* If we have a frame pointer, we can restore the old stack pointer + from it. */ + else if (frame_pointer_needed) + { + frame_reg_rtx = sp_reg_rtx; + if (DEFAULT_ABI == ABI_V4) + frame_reg_rtx = gen_rtx_REG (Pmode, 11); + /* Prevent reordering memory accesses against stack pointer restore. */ + else if (cfun->calls_alloca + || offset_below_red_zone_p (-info->total_size)) + rs6000_emit_stack_tie (frame_reg_rtx, true); + + insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx, + GEN_INT (info->total_size))); + frame_off = 0; + } + else if (info->push_p + && DEFAULT_ABI != ABI_V4 + && !crtl->calls_eh_return) + { + /* Prevent reordering memory accesses against stack pointer restore. */ + if (cfun->calls_alloca + || offset_below_red_zone_p (-info->total_size)) + rs6000_emit_stack_tie (frame_reg_rtx, false); + insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, + GEN_INT (info->total_size))); + frame_off = 0; + } + if (insn && frame_reg_rtx == sp_reg_rtx) + { + if (cfa_restores) + { + REG_NOTES (insn) = cfa_restores; + cfa_restores = NULL_RTX; + } + add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Restore AltiVec registers if we have not done so already. */ + if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP + && info->altivec_size != 0 + && (DEFAULT_ABI == ABI_V4 + || !offset_below_red_zone_p (info->altivec_save_offset))) + { + int i; + + if ((strategy & REST_INLINE_VRS) == 0) + { + int end_save = info->altivec_save_offset + info->altivec_size; + int ptr_off; + rtx ptr_reg = gen_rtx_REG (Pmode, 0); + int scratch_regno = ptr_regno_for_savres (SAVRES_VR); + rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + + if (end_save + frame_off != 0) + { + rtx offset = GEN_INT (end_save + frame_off); + + emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); + } + else + emit_move_insn (ptr_reg, frame_reg_rtx); + + ptr_off = -end_save; + insn = rs6000_emit_savres_rtx (info, scratch_reg, + info->altivec_save_offset + ptr_off, + 0, V4SImode, SAVRES_VR); + if (REGNO (frame_reg_rtx) == REGNO (scratch_reg)) + { + /* Frame reg was clobbered by out-of-line save. Restore it + from ptr_reg, and if we are calling out-of-line gpr or + fpr restore set up the correct pointer and offset. */ + unsigned newptr_regno = 1; + if (!restoring_GPRs_inline) + { + bool lr = info->gp_save_offset + info->gp_size == 0; + int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0); + newptr_regno = ptr_regno_for_savres (sel); + end_save = info->gp_save_offset + info->gp_size; + } + else if (!restoring_FPRs_inline) + { + bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR); + int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); + newptr_regno = ptr_regno_for_savres (sel); + end_save = info->fp_save_offset + info->fp_size; + } + + if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno) + frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno); + + if (end_save + ptr_off != 0) + { + rtx offset = GEN_INT (end_save + ptr_off); + + frame_off = -end_save; + if (TARGET_32BIT) + emit_insn (gen_addsi3_carry (frame_reg_rtx, + ptr_reg, offset)); + else + emit_insn (gen_adddi3_carry (frame_reg_rtx, + ptr_reg, offset)); + } + else + { + frame_off = ptr_off; + emit_move_insn (frame_reg_rtx, ptr_reg); + } + } + } + else + { + for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) + if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) + { + rtx addr, areg, mem, insn; + rtx reg = gen_rtx_REG (V4SImode, i); + HOST_WIDE_INT offset + = (info->altivec_save_offset + frame_off + + 16 * (i - info->first_altivec_reg_save)); + + if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) + { + mem = gen_frame_mem (V4SImode, + gen_rtx_PLUS (Pmode, frame_reg_rtx, + GEN_INT (offset))); + insn = gen_rtx_SET (reg, mem); + } + else + { + areg = gen_rtx_REG (Pmode, 0); + emit_move_insn (areg, GEN_INT (offset)); + + /* AltiVec addressing mode is [reg+reg]. */ + addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg); + mem = gen_frame_mem (V4SImode, addr); + + /* Rather than emitting a generic move, force use of the + lvx instruction, which we always want. In particular we + don't want lxvd2x/xxpermdi for little endian. */ + insn = gen_altivec_lvx_v4si_internal (reg, mem); + } + + (void) emit_insn (insn); + } + } + + for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) + if (((strategy & REST_INLINE_VRS) == 0 + || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0) + && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) + { + rtx reg = gen_rtx_REG (V4SImode, i); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + } + + /* Restore VRSAVE if we have not done so already. */ + if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP + && info->vrsave_size != 0 + && (DEFAULT_ABI == ABI_V4 + || !offset_below_red_zone_p (info->vrsave_save_offset))) + { + rtx reg; + + reg = gen_rtx_REG (SImode, 12); + emit_insn (gen_frame_load (reg, frame_reg_rtx, + info->vrsave_save_offset + frame_off)); + + emit_insn (generate_set_vrsave (reg, info, 1)); + } + + /* If we exit by an out-of-line restore function on ABI_V4 then that + function will deallocate the stack, so we don't need to worry + about the unwinder restoring cr from an invalid stack frame + location. */ + exit_func = (!restoring_FPRs_inline + || (!restoring_GPRs_inline + && info->first_fp_reg_save == 64)); + + /* In the ELFv2 ABI we need to restore all call-saved CR fields from + *separate* slots if the routine calls __builtin_eh_return, so + that they can be independently restored by the unwinder. */ + if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) + { + int i, cr_off = info->ehcr_offset; + + for (i = 0; i < 8; i++) + if (!call_used_regs[CR0_REGNO + i]) + { + rtx reg = gen_rtx_REG (SImode, 0); + emit_insn (gen_frame_load (reg, frame_reg_rtx, + cr_off + frame_off)); + + insn = emit_insn (gen_movsi_to_cr_one + (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); + + if (!exit_func && flag_shrink_wrap) + { + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (SImode, CR0_REGNO + i)); + + RTX_FRAME_RELATED_P (insn) = 1; + } + + cr_off += reg_size; + } + } + + /* Get the old lr if we saved it. If we are restoring registers + out-of-line, then the out-of-line routines can do this for us. */ + if (restore_lr && restoring_GPRs_inline) + load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off); + + /* Get the old cr if we saved it. */ + if (info->cr_save_p) + { + unsigned cr_save_regno = 12; + + if (!restoring_GPRs_inline) + { + /* Ensure we don't use the register used by the out-of-line + gpr register restore below. */ + bool lr = info->gp_save_offset + info->gp_size == 0; + int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0); + int gpr_ptr_regno = ptr_regno_for_savres (sel); + + if (gpr_ptr_regno == 12) + cr_save_regno = 11; + gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno); + } + else if (REGNO (frame_reg_rtx) == 12) + cr_save_regno = 11; + + cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx, + info->cr_save_offset + frame_off, + exit_func); + } + + /* Set LR here to try to overlap restores below. */ + if (restore_lr && restoring_GPRs_inline) + restore_saved_lr (0, exit_func); + + /* Load exception handler data registers, if needed. */ + if (crtl->calls_eh_return) + { + unsigned int i, regno; + + if (TARGET_AIX) + { + rtx reg = gen_rtx_REG (reg_mode, 2); + emit_insn (gen_frame_load (reg, frame_reg_rtx, + frame_off + RS6000_TOC_SAVE_SLOT)); + } + + for (i = 0; ; ++i) + { + rtx mem; + + regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + + /* Note: possible use of r0 here to address SPE regs. */ + mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx, + info->ehrd_offset + frame_off + + reg_size * (int) i); + + emit_move_insn (gen_rtx_REG (reg_mode, regno), mem); + } + } + + /* Restore GPRs. This is done as a PARALLEL if we are using + the load-multiple instructions. */ + if (TARGET_SPE_ABI + && info->spe_64bit_regs_used + && info->first_gp_reg_save != 32) + { + /* Determine whether we can address all of the registers that need + to be saved with an offset from frame_reg_rtx that fits in + the small const field for SPE memory instructions. */ + int spe_regs_addressable + = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off + + reg_size * (32 - info->first_gp_reg_save - 1)) + && restoring_GPRs_inline); + + if (!spe_regs_addressable) + { + int ool_adjust = 0; + rtx old_frame_reg_rtx = frame_reg_rtx; + /* Make r11 point to the start of the SPE save area. We worried about + not clobbering it when we were saving registers in the prologue. + There's no need to worry here because the static chain is passed + anew to every function. */ + + if (!restoring_GPRs_inline) + ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); + frame_reg_rtx = gen_rtx_REG (Pmode, 11); + emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx, + GEN_INT (info->spe_gp_save_offset + + frame_off + - ool_adjust))); + /* Keep the invariant that frame_reg_rtx + frame_off points + at the top of the stack frame. */ + frame_off = -info->spe_gp_save_offset + ool_adjust; + } + + if (restoring_GPRs_inline) + { + HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off; + + for (i = 0; i < 32 - info->first_gp_reg_save; i++) + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) + { + rtx offset, addr, mem, reg; + + /* We're doing all this to ensure that the immediate offset + fits into the immediate field of 'evldd'. */ + gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i)); + + offset = GEN_INT (spe_offset + reg_size * i); + addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset); + mem = gen_rtx_MEM (V2SImode, addr); + reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); + + emit_move_insn (reg, mem); + } + } + else + rs6000_emit_savres_rtx (info, frame_reg_rtx, + info->spe_gp_save_offset + frame_off, + info->lr_save_offset + frame_off, + reg_mode, + SAVRES_GPR | SAVRES_LR); + } + else if (!restoring_GPRs_inline) + { + /* We are jumping to an out-of-line function. */ + rtx ptr_reg; + int end_save = info->gp_save_offset + info->gp_size; + bool can_use_exit = end_save == 0; + int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0); + int ptr_off; + + /* Emit stack reset code if we need it. */ + ptr_regno = ptr_regno_for_savres (sel); + ptr_reg = gen_rtx_REG (Pmode, ptr_regno); + if (can_use_exit) + rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno); + else if (end_save + frame_off != 0) + emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, + GEN_INT (end_save + frame_off))); + else if (REGNO (frame_reg_rtx) != ptr_regno) + emit_move_insn (ptr_reg, frame_reg_rtx); + if (REGNO (frame_reg_rtx) == ptr_regno) + frame_off = -end_save; + + if (can_use_exit && info->cr_save_p) + restore_saved_cr (cr_save_reg, using_mtcr_multiple, true); + + ptr_off = -end_save; + rs6000_emit_savres_rtx (info, ptr_reg, + info->gp_save_offset + ptr_off, + info->lr_save_offset + ptr_off, + reg_mode, sel); + } + else if (using_load_multiple) + { + rtvec p; + p = rtvec_alloc (32 - info->first_gp_reg_save); + for (i = 0; i < 32 - info->first_gp_reg_save; i++) + RTVEC_ELT (p, i) + = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), + frame_reg_rtx, + info->gp_save_offset + frame_off + reg_size * i); + emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); + } + else + { + int offset = info->gp_save_offset + frame_off; + for (i = info->first_gp_reg_save; i < 32; i++) + { + if (rs6000_reg_live_or_pic_offset_p (i) + && !cfun->machine->gpr_is_wrapped_separately[i]) + { + rtx reg = gen_rtx_REG (reg_mode, i); + emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); + } + + offset += reg_size; + } + } + + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) + { + /* If the frame pointer was used then we can't delay emitting + a REG_CFA_DEF_CFA note. This must happen on the insn that + restores the frame pointer, r31. We may have already emitted + a REG_CFA_DEF_CFA note, but that's OK; A duplicate is + discarded by dwarf2cfi.c/dwarf2out.c, and in any case would + be harmless if emitted. */ + if (frame_pointer_needed) + { + insn = get_last_insn (); + add_reg_note (insn, REG_CFA_DEF_CFA, + plus_constant (Pmode, frame_reg_rtx, frame_off)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Set up cfa_restores. We always need these when + shrink-wrapping. If not shrink-wrapping then we only need + the cfa_restore when the stack location is no longer valid. + The cfa_restores must be emitted on or before the insn that + invalidates the stack, and of course must not be emitted + before the insn that actually does the restore. The latter + is why it is a bad idea to emit the cfa_restores as a group + on the last instruction here that actually does a restore: + That insn may be reordered with respect to others doing + restores. */ + if (flag_shrink_wrap + && !restoring_GPRs_inline + && info->first_fp_reg_save == 64) + cfa_restores = add_crlr_cfa_restore (info, cfa_restores); + + for (i = info->first_gp_reg_save; i < 32; i++) + if (!restoring_GPRs_inline + || using_load_multiple + || rs6000_reg_live_or_pic_offset_p (i)) + { + if (cfun->machine->gpr_is_wrapped_separately[i]) + continue; + + rtx reg = gen_rtx_REG (reg_mode, i); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + } + + if (!restoring_GPRs_inline + && info->first_fp_reg_save == 64) + { + /* We are jumping to an out-of-line function. */ + if (cfa_restores) + emit_cfa_restores (cfa_restores); + return; + } + + if (restore_lr && !restoring_GPRs_inline) + { + load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off); + restore_saved_lr (0, exit_func); + } + + /* Restore fpr's if we need to do it without calling a function. */ + if (restoring_FPRs_inline) + { + int offset = info->fp_save_offset + frame_off; + for (i = info->first_fp_reg_save; i < 64; i++) + { + if (save_reg_p (i) + && !cfun->machine->fpr_is_wrapped_separately[i - 32]) + { + rtx reg = gen_rtx_REG (fp_reg_mode, i); + emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); + if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, + cfa_restores); + } + + offset += fp_reg_size; + } + } + + /* If we saved cr, restore it here. Just those that were used. */ + if (info->cr_save_p) + restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func); + + /* If this is V.4, unwind the stack pointer after all of the loads + have been done, or set up r11 if we are restoring fp out of line. */ + ptr_regno = 1; + if (!restoring_FPRs_inline) + { + bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; + int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); + ptr_regno = ptr_regno_for_savres (sel); + } + + insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno); + if (REGNO (frame_reg_rtx) == ptr_regno) + frame_off = 0; + + if (insn && restoring_FPRs_inline) + { + if (cfa_restores) + { + REG_NOTES (insn) = cfa_restores; + cfa_restores = NULL_RTX; + } + add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (crtl->calls_eh_return) + { + rtx sa = EH_RETURN_STACKADJ_RTX; + emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa)); + } + + if (!sibcall && restoring_FPRs_inline) + { + if (cfa_restores) + { + /* We can't hang the cfa_restores off a simple return, + since the shrink-wrap code sometimes uses an existing + return. This means there might be a path from + pre-prologue code to this return, and dwarf2cfi code + wants the eh_frame unwinder state to be the same on + all paths to any point. So we need to emit the + cfa_restores before the return. For -m64 we really + don't need epilogue cfa_restores at all, except for + this irritating dwarf2cfi with shrink-wrap + requirement; The stack red-zone means eh_frame info + from the prologue telling the unwinder to restore + from the stack is perfectly good right to the end of + the function. */ + emit_insn (gen_blockage ()); + emit_cfa_restores (cfa_restores); + cfa_restores = NULL_RTX; + } + + emit_jump_insn (targetm.gen_simple_return ()); + } + + if (!sibcall && !restoring_FPRs_inline) + { + bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; + rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save); + int elt = 0; + RTVEC_ELT (p, elt++) = ret_rtx; + if (lr) + RTVEC_ELT (p, elt++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); + + /* We have to restore more than two FP registers, so branch to the + restore function. It will return to our caller. */ + int i; + int reg; + rtx sym; + + if (flag_shrink_wrap) + cfa_restores = add_crlr_cfa_restore (info, cfa_restores); + + sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0)); + RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym); + reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11; + RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg)); + + for (i = 0; i < 64 - info->first_fp_reg_save; i++) + { + rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i); + + RTVEC_ELT (p, elt++) + = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i); + if (flag_shrink_wrap) + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + + emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); + } + + if (cfa_restores) + { + if (sibcall) + /* Ensure the cfa_restores are hung off an insn that won't + be reordered above other restores. */ + emit_insn (gen_blockage ()); + + emit_cfa_restores (cfa_restores); + } +} + +/* Write function epilogue. */ + +static void +rs6000_output_function_epilogue (FILE *file, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ +#if TARGET_MACHO + macho_branch_islands (); + + { + rtx_insn *insn = get_last_insn (); + rtx_insn *deleted_debug_label = NULL; + + /* Mach-O doesn't support labels at the end of objects, so if + it looks like we might want one, take special action. + + First, collect any sequence of deleted debug labels. */ + while (insn + && NOTE_P (insn) + && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) + { + /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL + notes only, instead set their CODE_LABEL_NUMBER to -1, + otherwise there would be code generation differences + in between -g and -g0. */ + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) + deleted_debug_label = insn; + insn = PREV_INSN (insn); + } + + /* Second, if we have: + label: + barrier + then this needs to be detected, so skip past the barrier. */ + + if (insn && BARRIER_P (insn)) + insn = PREV_INSN (insn); + + /* Up to now we've only seen notes or barriers. */ + if (insn) + { + if (LABEL_P (insn) + || (NOTE_P (insn) + && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) + /* Trailing label: <barrier>. */ + fputs ("\tnop\n", file); + else + { + /* Lastly, see if we have a completely empty function body. */ + while (insn && ! INSN_P (insn)) + insn = PREV_INSN (insn); + /* If we don't find any insns, we've got an empty function body; + I.e. completely empty - without a return or branch. This is + taken as the case where a function body has been removed + because it contains an inline __builtin_unreachable(). GCC + states that reaching __builtin_unreachable() means UB so we're + not obliged to do anything special; however, we want + non-zero-sized function bodies. To meet this, and help the + user out, let's trap the case. */ + if (insn == NULL) + fputs ("\ttrap\n", file); + } + } + else if (deleted_debug_label) + for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) + if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) + CODE_LABEL_NUMBER (insn) = -1; + } +#endif + + /* Output a traceback table here. See /usr/include/sys/debug.h for info + on its format. + + We don't output a traceback table if -finhibit-size-directive was + used. The documentation for -finhibit-size-directive reads + ``don't output a @code{.size} assembler directive, or anything + else that would cause trouble if the function is split in the + middle, and the two halves are placed at locations far apart in + memory.'' The traceback table has this property, since it + includes the offset from the start of the function to the + traceback table itself. + + System V.4 Powerpc's (and the embedded ABI derived from it) use a + different traceback table. */ + if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && ! flag_inhibit_size_directive + && rs6000_traceback != traceback_none && !cfun->is_thunk) + { + const char *fname = NULL; + const char *language_string = lang_hooks.name; + int fixed_parms = 0, float_parms = 0, parm_info = 0; + int i; + int optional_tbtab; + rs6000_stack_t *info = rs6000_stack_info (); + + if (rs6000_traceback == traceback_full) + optional_tbtab = 1; + else if (rs6000_traceback == traceback_part) + optional_tbtab = 0; + else + optional_tbtab = !optimize_size && !TARGET_ELF; + + if (optional_tbtab) + { + fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); + while (*fname == '.') /* V.4 encodes . in the name */ + fname++; + + /* Need label immediately before tbtab, so we can compute + its offset from the function start. */ + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT"); + ASM_OUTPUT_LABEL (file, fname); + } + + /* The .tbtab pseudo-op can only be used for the first eight + expressions, since it can't handle the possibly variable + length fields that follow. However, if you omit the optional + fields, the assembler outputs zeros for all optional fields + anyways, giving each variable length field is minimum length + (as defined in sys/debug.h). Thus we can not use the .tbtab + pseudo-op at all. */ + + /* An all-zero word flags the start of the tbtab, for debuggers + that have to find it by searching forward from the entry + point or from the current pc. */ + fputs ("\t.long 0\n", file); + + /* Tbtab format type. Use format type 0. */ + fputs ("\t.byte 0,", file); + + /* Language type. Unfortunately, there does not seem to be any + official way to discover the language being compiled, so we + use language_string. + C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9. + Java is 13. Objective-C is 14. Objective-C++ isn't assigned + a number, so for now use 9. LTO, Go and JIT aren't assigned numbers + either, so for now use 0. */ + if (lang_GNU_C () + || ! strcmp (language_string, "GNU GIMPLE") + || ! strcmp (language_string, "GNU Go") + || ! strcmp (language_string, "libgccjit")) + i = 0; + else if (! strcmp (language_string, "GNU F77") + || lang_GNU_Fortran ()) + i = 1; + else if (! strcmp (language_string, "GNU Pascal")) + i = 2; + else if (! strcmp (language_string, "GNU Ada")) + i = 3; + else if (lang_GNU_CXX () + || ! strcmp (language_string, "GNU Objective-C++")) + i = 9; + else if (! strcmp (language_string, "GNU Java")) + i = 13; + else if (! strcmp (language_string, "GNU Objective-C")) + i = 14; + else + gcc_unreachable (); + fprintf (file, "%d,", i); + + /* 8 single bit fields: global linkage (not set for C extern linkage, + apparently a PL/I convention?), out-of-line epilogue/prologue, offset + from start of procedure stored in tbtab, internal function, function + has controlled storage, function has no toc, function uses fp, + function logs/aborts fp operations. */ + /* Assume that fp operations are used if any fp reg must be saved. */ + fprintf (file, "%d,", + (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1)); + + /* 6 bitfields: function is interrupt handler, name present in + proc table, function calls alloca, on condition directives + (controls stack walks, 3 bits), saves condition reg, saves + link reg. */ + /* The `function calls alloca' bit seems to be set whenever reg 31 is + set up as a frame pointer, even when there is no alloca call. */ + fprintf (file, "%d,", + ((optional_tbtab << 6) + | ((optional_tbtab & frame_pointer_needed) << 5) + | (info->cr_save_p << 1) + | (info->lr_save_p))); + + /* 3 bitfields: saves backchain, fixup code, number of fpr saved + (6 bits). */ + fprintf (file, "%d,", + (info->push_p << 7) | (64 - info->first_fp_reg_save)); + + /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */ + fprintf (file, "%d,", (32 - first_reg_to_save ())); + + if (optional_tbtab) + { + /* Compute the parameter info from the function decl argument + list. */ + tree decl; + int next_parm_info_bit = 31; + + for (decl = DECL_ARGUMENTS (current_function_decl); + decl; decl = DECL_CHAIN (decl)) + { + rtx parameter = DECL_INCOMING_RTL (decl); + machine_mode mode = GET_MODE (parameter); + + if (GET_CODE (parameter) == REG) + { + if (SCALAR_FLOAT_MODE_P (mode)) + { + int bits; + + float_parms++; + + switch (mode) + { + case SFmode: + case SDmode: + bits = 0x2; + break; + + case DFmode: + case DDmode: + case TFmode: + case TDmode: + case IFmode: + case KFmode: + bits = 0x3; + break; + + default: + gcc_unreachable (); + } + + /* If only one bit will fit, don't or in this entry. */ + if (next_parm_info_bit > 0) + parm_info |= (bits << (next_parm_info_bit - 1)); + next_parm_info_bit -= 2; + } + else + { + fixed_parms += ((GET_MODE_SIZE (mode) + + (UNITS_PER_WORD - 1)) + / UNITS_PER_WORD); + next_parm_info_bit -= 1; + } + } + } + } + + /* Number of fixed point parameters. */ + /* This is actually the number of words of fixed point parameters; thus + an 8 byte struct counts as 2; and thus the maximum value is 8. */ + fprintf (file, "%d,", fixed_parms); + + /* 2 bitfields: number of floating point parameters (7 bits), parameters + all on stack. */ + /* This is actually the number of fp registers that hold parameters; + and thus the maximum value is 13. */ + /* Set parameters on stack bit if parameters are not in their original + registers, regardless of whether they are on the stack? Xlc + seems to set the bit when not optimizing. */ + fprintf (file, "%d\n", ((float_parms << 1) | (! optimize))); + + if (optional_tbtab) + { + /* Optional fields follow. Some are variable length. */ + + /* Parameter types, left adjusted bit fields: 0 fixed, 10 single + float, 11 double float. */ + /* There is an entry for each parameter in a register, in the order + that they occur in the parameter list. Any intervening arguments + on the stack are ignored. If the list overflows a long (max + possible length 34 bits) then completely leave off all elements + that don't fit. */ + /* Only emit this long if there was at least one parameter. */ + if (fixed_parms || float_parms) + fprintf (file, "\t.long %d\n", parm_info); + + /* Offset from start of code to tb table. */ + fputs ("\t.long ", file); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT"); + RS6000_OUTPUT_BASENAME (file, fname); + putc ('-', file); + rs6000_output_function_entry (file, fname); + putc ('\n', file); + + /* Interrupt handler mask. */ + /* Omit this long, since we never set the interrupt handler bit + above. */ + + /* Number of CTL (controlled storage) anchors. */ + /* Omit this long, since the has_ctl bit is never set above. */ + + /* Displacement into stack of each CTL anchor. */ + /* Omit this list of longs, because there are no CTL anchors. */ + + /* Length of function name. */ + if (*fname == '*') + ++fname; + fprintf (file, "\t.short %d\n", (int) strlen (fname)); + + /* Function name. */ + assemble_string (fname, strlen (fname)); + + /* Register for alloca automatic storage; this is always reg 31. + Only emit this if the alloca bit was set above. */ + if (frame_pointer_needed) + fputs ("\t.byte 31\n", file); + + fputs ("\t.align 2\n", file); + } + } + + /* Arrange to define .LCTOC1 label, if not already done. */ + if (need_toc_init) + { + need_toc_init = 0; + if (!toc_initialized) + { + switch_to_section (toc_section); + switch_to_section (current_function_section ()); + } + } +} + +/* -fsplit-stack support. */ + +/* A SYMBOL_REF for __morestack. */ +static GTY(()) rtx morestack_ref; + +static rtx +gen_add3_const (rtx rt, rtx ra, long c) +{ + if (TARGET_64BIT) + return gen_adddi3 (rt, ra, GEN_INT (c)); + else + return gen_addsi3 (rt, ra, GEN_INT (c)); +} + +/* Emit -fsplit-stack prologue, which goes before the regular function + prologue (at local entry point in the case of ELFv2). */ + +void +rs6000_expand_split_stack_prologue (void) +{ + rs6000_stack_t *info = rs6000_stack_info (); + unsigned HOST_WIDE_INT allocate; + long alloc_hi, alloc_lo; + rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage; + rtx_insn *insn; + + gcc_assert (flag_split_stack && reload_completed); + + if (!info->push_p) + return; + + if (global_regs[29]) + { + error ("-fsplit-stack uses register r29"); + inform (DECL_SOURCE_LOCATION (global_regs_decl[29]), + "conflicts with %qD", global_regs_decl[29]); + } + + allocate = info->total_size; + if (allocate > (unsigned HOST_WIDE_INT) 1 << 31) + { + sorry ("Stack frame larger than 2G is not supported for -fsplit-stack"); + return; + } + if (morestack_ref == NULL_RTX) + { + morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); + SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL + | SYMBOL_FLAG_FUNCTION); + } + + r0 = gen_rtx_REG (Pmode, 0); + r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + r12 = gen_rtx_REG (Pmode, 12); + emit_insn (gen_load_split_stack_limit (r0)); + /* Always emit two insns here to calculate the requested stack, + so that the linker can edit them when adjusting size for calling + non-split-stack code. */ + alloc_hi = (-allocate + 0x8000) & ~0xffffL; + alloc_lo = -allocate - alloc_hi; + if (alloc_hi != 0) + { + emit_insn (gen_add3_const (r12, r1, alloc_hi)); + if (alloc_lo != 0) + emit_insn (gen_add3_const (r12, r12, alloc_lo)); + else + emit_insn (gen_nop ()); + } + else + { + emit_insn (gen_add3_const (r12, r1, alloc_lo)); + emit_insn (gen_nop ()); + } + + compare = gen_rtx_REG (CCUNSmode, CR7_REGNO); + emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0))); + ok_label = gen_label_rtx (); + jump = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_GEU (VOIDmode, compare, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, ok_label), + pc_rtx); + insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); + JUMP_LABEL (insn) = ok_label; + /* Mark the jump as very likely to be taken. */ + add_int_reg_note (insn, REG_BR_PROB, + REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100); + + lr = gen_rtx_REG (Pmode, LR_REGNO); + insn = emit_move_insn (r0, lr); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset)); + RTX_FRAME_RELATED_P (insn) = 1; + + insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref), + const0_rtx, const0_rtx)); + call_fusage = NULL_RTX; + use_reg (&call_fusage, r12); + /* Say the call uses r0, even though it doesn't, to stop regrename + from twiddling with the insns saving lr, trashing args for cfun. + The insns restoring lr are similarly protected by making + split_stack_return use r0. */ + use_reg (&call_fusage, r0); + add_function_usage_to (insn, call_fusage); + /* Indicate that this function can't jump to non-local gotos. */ + make_reg_eh_region_note_nothrow_nononlocal (insn); + emit_insn (gen_frame_load (r0, r1, info->lr_save_offset)); + insn = emit_move_insn (lr, r0); + add_reg_note (insn, REG_CFA_RESTORE, lr); + RTX_FRAME_RELATED_P (insn) = 1; + emit_insn (gen_split_stack_return ()); + + emit_label (ok_label); + LABEL_NUSES (ok_label) = 1; +} + +/* Return the internal arg pointer used for function incoming + arguments. When -fsplit-stack, the arg pointer is r12 so we need + to copy it to a pseudo in order for it to be preserved over calls + and suchlike. We'd really like to use a pseudo here for the + internal arg pointer but data-flow analysis is not prepared to + accept pseudos as live at the beginning of a function. */ + +static rtx +rs6000_internal_arg_pointer (void) +{ + if (flag_split_stack + && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) + == NULL)) + + { + if (cfun->machine->split_stack_arg_pointer == NULL_RTX) + { + rtx pat; + + cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode); + REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1; + + /* Put the pseudo initialization right after the note at the + beginning of the function. */ + pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer, + gen_rtx_REG (Pmode, 12)); + push_topmost_sequence (); + emit_insn_after (pat, get_insns ()); + pop_topmost_sequence (); + } + return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer, + FIRST_PARM_OFFSET (current_function_decl)); + } + return virtual_incoming_args_rtx; +} + +/* We may have to tell the dataflow pass that the split stack prologue + is initializing a register. */ + +static void +rs6000_live_on_entry (bitmap regs) +{ + if (flag_split_stack) + bitmap_set_bit (regs, 12); +} + +/* Emit -fsplit-stack dynamic stack allocation space check. */ + +void +rs6000_split_stack_space_check (rtx size, rtx label) +{ + rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + rtx limit = gen_reg_rtx (Pmode); + rtx requested = gen_reg_rtx (Pmode); + rtx cmp = gen_reg_rtx (CCUNSmode); + rtx jump; + + emit_insn (gen_load_split_stack_limit (limit)); + if (CONST_INT_P (size)) + emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size)))); + else + { + size = force_reg (Pmode, size); + emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size)); + } + emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit))); + jump = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_GEU (VOIDmode, cmp, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); + JUMP_LABEL (jump) = label; +} + +/* A C compound statement that outputs the assembler code for a thunk + function, used to implement C++ virtual function calls with + multiple inheritance. The thunk acts as a wrapper around a virtual + function, adjusting the implicit object parameter before handing + control off to the real function. + + First, emit code to add the integer DELTA to the location that + contains the incoming first argument. Assume that this argument + contains a pointer, and is the one used to pass the `this' pointer + in C++. This is the incoming argument *before* the function + prologue, e.g. `%o0' on a sparc. The addition must preserve the + values of all other incoming arguments. + + After the addition, emit code to jump to FUNCTION, which is a + `FUNCTION_DECL'. This is a direct pure jump, not a call, and does + not touch the return address. Hence returning from FUNCTION will + return to whoever called the current `thunk'. + + The effect must be as if FUNCTION had been called directly with the + adjusted first argument. This macro is responsible for emitting + all of the code for a thunk function; output_function_prologue() + and output_function_epilogue() are not invoked. + + The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already + been extracted from it.) It might possibly be useful on some + targets, but probably not. + + If you do not define this macro, the target-independent code in the + C++ frontend will generate a less efficient heavyweight thunk that + calls FUNCTION instead of jumping to it. The generic approach does + not support varargs. */ + +static void +rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) +{ + rtx this_rtx, funexp; + rtx_insn *insn; + + reload_completed = 1; + epilogue_completed = 1; + + /* Mark the end of the (empty) prologue. */ + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Find the "this" pointer. If the function returns a structure, + the structure return pointer is in r3. */ + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + this_rtx = gen_rtx_REG (Pmode, 4); + else + this_rtx = gen_rtx_REG (Pmode, 3); + + /* Apply the constant offset, if required. */ + if (delta) + emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta))); + + /* Apply the offset from the vtable, if required. */ + if (vcall_offset) + { + rtx vcall_offset_rtx = GEN_INT (vcall_offset); + rtx tmp = gen_rtx_REG (Pmode, 12); + + emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); + if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000) + { + emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx)); + emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); + } + else + { + rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx); + + emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc)); + } + emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp)); + } + + /* Generate a tail call to the target function. */ + if (!TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); + +#if TARGET_MACHO + if (MACHOPIC_INDIRECT) + funexp = machopic_indirect_call_target (funexp); +#endif + + /* gen_sibcall expects reload to convert scratch pseudo to LR so we must + generate sibcall RTL explicitly. */ + insn = emit_call_insn ( + gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (3, + gen_rtx_CALL (VOIDmode, + funexp, const0_rtx), + gen_rtx_USE (VOIDmode, const0_rtx), + simple_return_rtx))); + SIBLING_CALL_P (insn) = 1; + emit_barrier (); + + /* Run just enough of rest_of_compilation to get the insns emitted. + There's not really enough bulk here to make other passes such as + instruction scheduling worth while. Note that use_thunk calls + assemble_start_function and assemble_end_function. */ + insn = get_insns (); + shorten_branches (insn); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); + + reload_completed = 0; + epilogue_completed = 0; +} + +/* A quick summary of the various types of 'constant-pool tables' + under PowerPC: + + Target Flags Name One table per + AIX (none) AIX TOC object file + AIX -mfull-toc AIX TOC object file + AIX -mminimal-toc AIX minimal TOC translation unit + SVR4/EABI (none) SVR4 SDATA object file + SVR4/EABI -fpic SVR4 pic object file + SVR4/EABI -fPIC SVR4 PIC translation unit + SVR4/EABI -mrelocatable EABI TOC function + SVR4/EABI -maix AIX TOC object file + SVR4/EABI -maix -mminimal-toc + AIX minimal TOC translation unit + + Name Reg. Set by entries contains: + made by addrs? fp? sum? + + AIX TOC 2 crt0 as Y option option + AIX minimal TOC 30 prolog gcc Y Y option + SVR4 SDATA 13 crt0 gcc N Y N + SVR4 pic 30 prolog ld Y not yet N + SVR4 PIC 30 prolog gcc Y option option + EABI TOC 30 prolog gcc Y option option + +*/ + +/* Hash functions for the hash table. */ + +static unsigned +rs6000_hash_constant (rtx k) +{ + enum rtx_code code = GET_CODE (k); + machine_mode mode = GET_MODE (k); + unsigned result = (code << 3) ^ mode; + const char *format; + int flen, fidx; + + format = GET_RTX_FORMAT (code); + flen = strlen (format); + fidx = 0; + + switch (code) + { + case LABEL_REF: + return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); + + case CONST_WIDE_INT: + { + int i; + flen = CONST_WIDE_INT_NUNITS (k); + for (i = 0; i < flen; i++) + result = result * 613 + CONST_WIDE_INT_ELT (k, i); + return result; + } + + case CONST_DOUBLE: + if (mode != VOIDmode) + return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; + flen = 2; + break; + + case CODE_LABEL: + fidx = 3; + break; + + default: + break; + } + + for (; fidx < flen; fidx++) + switch (format[fidx]) + { + case 's': + { + unsigned i, len; + const char *str = XSTR (k, fidx); + len = strlen (str); + result = result * 613 + len; + for (i = 0; i < len; i++) + result = result * 613 + (unsigned) str[i]; + break; + } + case 'u': + case 'e': + result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); + break; + case 'i': + case 'n': + result = result * 613 + (unsigned) XINT (k, fidx); + break; + case 'w': + if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) + result = result * 613 + (unsigned) XWINT (k, fidx); + else + { + size_t i; + for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) + result = result * 613 + (unsigned) (XWINT (k, fidx) + >> CHAR_BIT * i); + } + break; + case '0': + break; + default: + gcc_unreachable (); + } + + return result; +} + +hashval_t +toc_hasher::hash (toc_hash_struct *thc) +{ + return rs6000_hash_constant (thc->key) ^ thc->key_mode; +} + +/* Compare H1 and H2 for equivalence. */ + +bool +toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) +{ + rtx r1 = h1->key; + rtx r2 = h2->key; + + if (h1->key_mode != h2->key_mode) + return 0; + + return rtx_equal_p (r1, r2); +} + +/* These are the names given by the C++ front-end to vtables, and + vtable-like objects. Ideally, this logic should not be here; + instead, there should be some programmatic way of inquiring as + to whether or not an object is a vtable. */ + +#define VTABLE_NAME_P(NAME) \ + (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \ + || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \ + || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \ + || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \ + || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0) + +#ifdef NO_DOLLAR_IN_LABEL +/* Return a GGC-allocated character string translating dollar signs in + input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */ + +const char * +rs6000_xcoff_strip_dollar (const char *name) +{ + char *strip, *p; + const char *q; + size_t len; + + q = (const char *) strchr (name, '$'); + + if (q == 0 || q == name) + return name; + + len = strlen (name); + strip = XALLOCAVEC (char, len + 1); + strcpy (strip, name); + p = strip + (q - name); + while (p) + { + *p = '_'; + p = strchr (p + 1, '$'); + } + + return ggc_alloc_string (strip, len); +} +#endif + +void +rs6000_output_symbol_ref (FILE *file, rtx x) +{ + const char *name = XSTR (x, 0); + + /* Currently C++ toc references to vtables can be emitted before it + is decided whether the vtable is public or private. If this is + the case, then the linker will eventually complain that there is + a reference to an unknown section. Thus, for vtables only, + we emit the TOC reference to reference the identifier and not the + symbol. */ + if (VTABLE_NAME_P (name)) + { + RS6000_OUTPUT_BASENAME (file, name); + } + else + assemble_name (file, name); +} + +/* Output a TOC entry. We derive the entry name from what is being + written. */ + +void +output_toc (FILE *file, rtx x, int labelno, machine_mode mode) +{ + char buf[256]; + const char *name = buf; + rtx base = x; + HOST_WIDE_INT offset = 0; + + gcc_assert (!TARGET_NO_TOC); + + /* When the linker won't eliminate them, don't output duplicate + TOC entries (this happens on AIX if there is any kind of TOC, + and on SVR4 under -fPIC or -mrelocatable). Don't do this for + CODE_LABELs. */ + if (TARGET_TOC && GET_CODE (x) != LABEL_REF) + { + struct toc_hash_struct *h; + + /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE + time because GGC is not initialized at that point. */ + if (toc_hash_table == NULL) + toc_hash_table = hash_table<toc_hasher>::create_ggc (1021); + + h = ggc_alloc<toc_hash_struct> (); + h->key = x; + h->key_mode = mode; + h->labelno = labelno; + + toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT); + if (*found == NULL) + *found = h; + else /* This is indeed a duplicate. + Set this label equal to that label. */ + { + fputs ("\t.set ", file); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); + fprintf (file, "%d,", labelno); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); + fprintf (file, "%d\n", ((*found)->labelno)); + +#ifdef HAVE_AS_TLS + if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF + && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC + || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)) + { + fputs ("\t.set ", file); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); + fprintf (file, "%d,", labelno); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); + fprintf (file, "%d\n", ((*found)->labelno)); + } +#endif + return; + } + } + + /* If we're going to put a double constant in the TOC, make sure it's + aligned properly when strict alignment is on. */ + if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x)) + && STRICT_ALIGNMENT + && GET_MODE_BITSIZE (mode) >= 64 + && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) { + ASM_OUTPUT_ALIGN (file, 3); + } + + (*targetm.asm_out.internal_label) (file, "LC", labelno); + + /* Handle FP constants specially. Note that if we have a minimal + TOC, things we put here aren't actually in the TOC, so we can allow + FP constants. */ + if (GET_CODE (x) == CONST_DOUBLE && + (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode + || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode)) + { + long k[4]; + + if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) + REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k); + else + REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); + + if (TARGET_64BIT) + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs (DOUBLE_INT_ASM_OP, file); + else + fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", + k[0] & 0xffffffff, k[1] & 0xffffffff, + k[2] & 0xffffffff, k[3] & 0xffffffff); + fprintf (file, "0x%lx%08lx,0x%lx%08lx\n", + k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, + k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff, + k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff, + k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff); + return; + } + else + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs ("\t.long ", file); + else + fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", + k[0] & 0xffffffff, k[1] & 0xffffffff, + k[2] & 0xffffffff, k[3] & 0xffffffff); + fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n", + k[0] & 0xffffffff, k[1] & 0xffffffff, + k[2] & 0xffffffff, k[3] & 0xffffffff); + return; + } + } + else if (GET_CODE (x) == CONST_DOUBLE && + (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode)) + { + long k[2]; + + if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) + REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k); + else + REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); + + if (TARGET_64BIT) + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs (DOUBLE_INT_ASM_OP, file); + else + fprintf (file, "\t.tc FD_%lx_%lx[TC],", + k[0] & 0xffffffff, k[1] & 0xffffffff); + fprintf (file, "0x%lx%08lx\n", + k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, + k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff); + return; + } + else + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs ("\t.long ", file); + else + fprintf (file, "\t.tc FD_%lx_%lx[TC],", + k[0] & 0xffffffff, k[1] & 0xffffffff); + fprintf (file, "0x%lx,0x%lx\n", + k[0] & 0xffffffff, k[1] & 0xffffffff); + return; + } + } + else if (GET_CODE (x) == CONST_DOUBLE && + (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode)) + { + long l; + + if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) + REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l); + else + REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + + if (TARGET_64BIT) + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs (DOUBLE_INT_ASM_OP, file); + else + fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); + if (WORDS_BIG_ENDIAN) + fprintf (file, "0x%lx00000000\n", l & 0xffffffff); + else + fprintf (file, "0x%lx\n", l & 0xffffffff); + return; + } + else + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs ("\t.long ", file); + else + fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); + fprintf (file, "0x%lx\n", l & 0xffffffff); + return; + } + } + else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT) + { + unsigned HOST_WIDE_INT low; + HOST_WIDE_INT high; + + low = INTVAL (x) & 0xffffffff; + high = (HOST_WIDE_INT) INTVAL (x) >> 32; + + /* TOC entries are always Pmode-sized, so when big-endian + smaller integer constants in the TOC need to be padded. + (This is still a win over putting the constants in + a separate constant pool, because then we'd have + to have both a TOC entry _and_ the actual constant.) + + For a 32-bit target, CONST_INT values are loaded and shifted + entirely within `low' and can be stored in one TOC entry. */ + + /* It would be easy to make this work, but it doesn't now. */ + gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode)); + + if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode)) + { + low |= high << 32; + low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode); + high = (HOST_WIDE_INT) low >> 32; + low &= 0xffffffff; + } + + if (TARGET_64BIT) + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs (DOUBLE_INT_ASM_OP, file); + else + fprintf (file, "\t.tc ID_%lx_%lx[TC],", + (long) high & 0xffffffff, (long) low & 0xffffffff); + fprintf (file, "0x%lx%08lx\n", + (long) high & 0xffffffff, (long) low & 0xffffffff); + return; + } + else + { + if (POINTER_SIZE < GET_MODE_BITSIZE (mode)) + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs ("\t.long ", file); + else + fprintf (file, "\t.tc ID_%lx_%lx[TC],", + (long) high & 0xffffffff, (long) low & 0xffffffff); + fprintf (file, "0x%lx,0x%lx\n", + (long) high & 0xffffffff, (long) low & 0xffffffff); + } + else + { + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs ("\t.long ", file); + else + fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff); + fprintf (file, "0x%lx\n", (long) low & 0xffffffff); + } + return; + } + } + + if (GET_CODE (x) == CONST) + { + gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT); + + base = XEXP (XEXP (x, 0), 0); + offset = INTVAL (XEXP (XEXP (x, 0), 1)); + } + + switch (GET_CODE (base)) + { + case SYMBOL_REF: + name = XSTR (base, 0); + break; + + case LABEL_REF: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", + CODE_LABEL_NUMBER (XEXP (base, 0))); + break; + + case CODE_LABEL: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base)); + break; + + default: + gcc_unreachable (); + } + + if (TARGET_ELF || TARGET_MINIMAL_TOC) + fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file); + else + { + fputs ("\t.tc ", file); + RS6000_OUTPUT_BASENAME (file, name); + + if (offset < 0) + fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset); + else if (offset) + fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset); + + /* Mark large TOC symbols on AIX with [TE] so they are mapped + after other TOC symbols, reducing overflow of small TOC access + to [TC] symbols. */ + fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL + ? "[TE]," : "[TC],", file); + } + + /* Currently C++ toc references to vtables can be emitted before it + is decided whether the vtable is public or private. If this is + the case, then the linker will eventually complain that there is + a TOC reference to an unknown section. Thus, for vtables only, + we emit the TOC reference to reference the symbol and not the + section. */ + if (VTABLE_NAME_P (name)) + { + RS6000_OUTPUT_BASENAME (file, name); + if (offset < 0) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset); + else if (offset > 0) + fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); + } + else + output_addr_const (file, x); + +#if HAVE_AS_TLS + if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF) + { + switch (SYMBOL_REF_TLS_MODEL (base)) + { + case 0: + break; + case TLS_MODEL_LOCAL_EXEC: + fputs ("@le", file); + break; + case TLS_MODEL_INITIAL_EXEC: + fputs ("@ie", file); + break; + /* Use global-dynamic for local-dynamic. */ + case TLS_MODEL_GLOBAL_DYNAMIC: + case TLS_MODEL_LOCAL_DYNAMIC: + putc ('\n', file); + (*targetm.asm_out.internal_label) (file, "LCM", labelno); + fputs ("\t.tc .", file); + RS6000_OUTPUT_BASENAME (file, name); + fputs ("[TC],", file); + output_addr_const (file, x); + fputs ("@m", file); + break; + default: + gcc_unreachable (); + } + } +#endif + + putc ('\n', file); +} + +/* Output an assembler pseudo-op to write an ASCII string of N characters + starting at P to FILE. + + On the RS/6000, we have to do this using the .byte operation and + write out special characters outside the quoted string. + Also, the assembler is broken; very long strings are truncated, + so we must artificially break them up early. */ + +void +output_ascii (FILE *file, const char *p, int n) +{ + char c; + int i, count_string; + const char *for_string = "\t.byte \""; + const char *for_decimal = "\t.byte "; + const char *to_close = NULL; + + count_string = 0; + for (i = 0; i < n; i++) + { + c = *p++; + if (c >= ' ' && c < 0177) + { + if (for_string) + fputs (for_string, file); + putc (c, file); + + /* Write two quotes to get one. */ + if (c == '"') + { + putc (c, file); + ++count_string; + } + + for_string = NULL; + for_decimal = "\"\n\t.byte "; + to_close = "\"\n"; + ++count_string; + + if (count_string >= 512) + { + fputs (to_close, file); + + for_string = "\t.byte \""; + for_decimal = "\t.byte "; + to_close = NULL; + count_string = 0; + } + } + else + { + if (for_decimal) + fputs (for_decimal, file); + fprintf (file, "%d", c); + + for_string = "\n\t.byte \""; + for_decimal = ", "; + to_close = "\n"; + count_string = 0; + } + } + + /* Now close the string if we have written one. Then end the line. */ + if (to_close) + fputs (to_close, file); +} + +/* Generate a unique section name for FILENAME for a section type + represented by SECTION_DESC. Output goes into BUF. + + SECTION_DESC can be any string, as long as it is different for each + possible section type. + + We name the section in the same manner as xlc. The name begins with an + underscore followed by the filename (after stripping any leading directory + names) with the last period replaced by the string SECTION_DESC. If + FILENAME does not contain a period, SECTION_DESC is appended to the end of + the name. */ + +void +rs6000_gen_section_name (char **buf, const char *filename, + const char *section_desc) +{ + const char *q, *after_last_slash, *last_period = 0; + char *p; + int len; + + after_last_slash = filename; + for (q = filename; *q; q++) + { + if (*q == '/') + after_last_slash = q + 1; + else if (*q == '.') + last_period = q; + } + + len = strlen (after_last_slash) + strlen (section_desc) + 2; + *buf = (char *) xmalloc (len); + + p = *buf; + *p++ = '_'; + + for (q = after_last_slash; *q; q++) + { + if (q == last_period) + { + strcpy (p, section_desc); + p += strlen (section_desc); + break; + } + + else if (ISALNUM (*q)) + *p++ = *q; + } + + if (last_period == 0) + strcpy (p, section_desc); + else + *p = '\0'; +} + +/* Emit profile function. */ + +void +output_profile_hook (int labelno ATTRIBUTE_UNUSED) +{ + /* Non-standard profiling for kernels, which just saves LR then calls + _mcount without worrying about arg saves. The idea is to change + the function prologue as little as possible as it isn't easy to + account for arg save/restore code added just for _mcount. */ + if (TARGET_PROFILE_KERNEL) + return; + + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { +#ifndef NO_PROFILE_COUNTERS +# define NO_PROFILE_COUNTERS 0 +#endif + if (NO_PROFILE_COUNTERS) + emit_library_call (init_one_libfunc (RS6000_MCOUNT), + LCT_NORMAL, VOIDmode, 0); + else + { + char buf[30]; + const char *label_name; + rtx fun; + + ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); + label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); + fun = gen_rtx_SYMBOL_REF (Pmode, label_name); + + emit_library_call (init_one_libfunc (RS6000_MCOUNT), + LCT_NORMAL, VOIDmode, 1, fun, Pmode); + } + } + else if (DEFAULT_ABI == ABI_DARWIN) + { + const char *mcount_name = RS6000_MCOUNT; + int caller_addr_regno = LR_REGNO; + + /* Be conservative and always set this, at least for now. */ + crtl->uses_pic_offset_table = 1; + +#if TARGET_MACHO + /* For PIC code, set up a stub and collect the caller's address + from r0, which is where the prologue puts it. */ + if (MACHOPIC_INDIRECT + && crtl->uses_pic_offset_table) + caller_addr_regno = 0; +#endif + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name), + LCT_NORMAL, VOIDmode, 1, + gen_rtx_REG (Pmode, caller_addr_regno), Pmode); + } +} + +/* Write function profiler code. */ + +void +output_function_profiler (FILE *file, int labelno) +{ + char buf[100]; + + switch (DEFAULT_ABI) + { + default: + gcc_unreachable (); + + case ABI_V4: + if (!TARGET_32BIT) + { + warning (0, "no profiling of 64-bit code for this ABI"); + return; + } + ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); + fprintf (file, "\tmflr %s\n", reg_names[0]); + if (NO_PROFILE_COUNTERS) + { + asm_fprintf (file, "\tstw %s,4(%s)\n", + reg_names[0], reg_names[1]); + } + else if (TARGET_SECURE_PLT && flag_pic) + { + if (TARGET_LINK_STACK) + { + char name[32]; + get_ppc476_thunk_name (name); + asm_fprintf (file, "\tbl %s\n", name); + } + else + asm_fprintf (file, "\tbcl 20,31,1f\n1:\n"); + asm_fprintf (file, "\tstw %s,4(%s)\n", + reg_names[0], reg_names[1]); + asm_fprintf (file, "\tmflr %s\n", reg_names[12]); + asm_fprintf (file, "\taddis %s,%s,", + reg_names[12], reg_names[12]); + assemble_name (file, buf); + asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]); + assemble_name (file, buf); + asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]); + } + else if (flag_pic == 1) + { + fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file); + asm_fprintf (file, "\tstw %s,4(%s)\n", + reg_names[0], reg_names[1]); + asm_fprintf (file, "\tmflr %s\n", reg_names[12]); + asm_fprintf (file, "\tlwz %s,", reg_names[0]); + assemble_name (file, buf); + asm_fprintf (file, "@got(%s)\n", reg_names[12]); + } + else if (flag_pic > 1) + { + asm_fprintf (file, "\tstw %s,4(%s)\n", + reg_names[0], reg_names[1]); + /* Now, we need to get the address of the label. */ + if (TARGET_LINK_STACK) + { + char name[32]; + get_ppc476_thunk_name (name); + asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name); + assemble_name (file, buf); + fputs ("-.\n1:", file); + asm_fprintf (file, "\tmflr %s\n", reg_names[11]); + asm_fprintf (file, "\taddi %s,%s,4\n", + reg_names[11], reg_names[11]); + } + else + { + fputs ("\tbcl 20,31,1f\n\t.long ", file); + assemble_name (file, buf); + fputs ("-.\n1:", file); + asm_fprintf (file, "\tmflr %s\n", reg_names[11]); + } + asm_fprintf (file, "\tlwz %s,0(%s)\n", + reg_names[0], reg_names[11]); + asm_fprintf (file, "\tadd %s,%s,%s\n", + reg_names[0], reg_names[0], reg_names[11]); + } + else + { + asm_fprintf (file, "\tlis %s,", reg_names[12]); + assemble_name (file, buf); + fputs ("@ha\n", file); + asm_fprintf (file, "\tstw %s,4(%s)\n", + reg_names[0], reg_names[1]); + asm_fprintf (file, "\tla %s,", reg_names[0]); + assemble_name (file, buf); + asm_fprintf (file, "@l(%s)\n", reg_names[12]); + } + + /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */ + fprintf (file, "\tbl %s%s\n", + RS6000_MCOUNT, flag_pic ? "@plt" : ""); + break; + + case ABI_AIX: + case ABI_ELFv2: + case ABI_DARWIN: + /* Don't do anything, done in output_profile_hook (). */ + break; + } +} + + + +/* The following variable value is the last issued insn. */ + +static rtx_insn *last_scheduled_insn; + +/* The following variable helps to balance issuing of load and + store instructions */ + +static int load_store_pendulum; + +/* The following variable helps pair divide insns during scheduling. */ +static int divide_cnt; +/* The following variable helps pair and alternate vector and vector load + insns during scheduling. */ +static int vec_pairing; + + +/* Power4 load update and store update instructions are cracked into a + load or store and an integer insn which are executed in the same cycle. + Branches have their own dispatch slot which does not count against the + GCC issue rate, but it changes the program flow so there are no other + instructions to issue in this cycle. */ + +static int +rs6000_variable_issue_1 (rtx_insn *insn, int more) +{ + last_scheduled_insn = insn; + if (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + { + cached_can_issue_more = more; + return cached_can_issue_more; + } + + if (insn_terminates_group_p (insn, current_group)) + { + cached_can_issue_more = 0; + return cached_can_issue_more; + } + + /* If no reservation, but reach here */ + if (recog_memoized (insn) < 0) + return more; + + if (rs6000_sched_groups) + { + if (is_microcoded_insn (insn)) + cached_can_issue_more = 0; + else if (is_cracked_insn (insn)) + cached_can_issue_more = more > 2 ? more - 2 : 0; + else + cached_can_issue_more = more - 1; + + return cached_can_issue_more; + } + + if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn)) + return 0; + + cached_can_issue_more = more - 1; + return cached_can_issue_more; +} + +static int +rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more) +{ + int r = rs6000_variable_issue_1 (insn, more); + if (verbose) + fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r); + return r; +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, + unsigned int) +{ + enum attr_type attr_type; + + if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) + return cost; + + switch (dep_type) + { + case REG_DEP_TRUE: + { + /* Data dependency; DEP_INSN writes a register that INSN reads + some cycles later. */ + + /* Separate a load from a narrower, dependent store. */ + if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9) + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (PATTERN (dep_insn)) == SET + && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM + && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM + && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) + > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) + return cost + 14; + + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_JMPREG: + /* Tell the first scheduling pass about the latency between + a mtctr and bctr (and mtlr and br/blr). The first + scheduling pass will not know about this latency since + the mtctr instruction, which has the latency associated + to it, will be generated by reload. */ + return 4; + case TYPE_BRANCH: + /* Leave some extra cycles between a compare and its + dependent branch, to inhibit expensive mispredicts. */ + if ((rs6000_cpu_attr == CPU_PPC603 + || rs6000_cpu_attr == CPU_PPC604 + || rs6000_cpu_attr == CPU_PPC604E + || rs6000_cpu_attr == CPU_PPC620 + || rs6000_cpu_attr == CPU_PPC630 + || rs6000_cpu_attr == CPU_PPC750 + || rs6000_cpu_attr == CPU_PPC7400 + || rs6000_cpu_attr == CPU_PPC7450 + || rs6000_cpu_attr == CPU_PPCE5500 + || rs6000_cpu_attr == CPU_PPCE6500 + || rs6000_cpu_attr == CPU_POWER4 + || rs6000_cpu_attr == CPU_POWER5 + || rs6000_cpu_attr == CPU_POWER7 + || rs6000_cpu_attr == CPU_POWER8 + || rs6000_cpu_attr == CPU_POWER9 + || rs6000_cpu_attr == CPU_CELL) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + + switch (get_attr_type (dep_insn)) + { + case TYPE_CMP: + case TYPE_FPCOMPARE: + case TYPE_CR_LOGICAL: + case TYPE_DELAYED_CR: + return cost + 2; + case TYPE_EXTS: + case TYPE_MUL: + if (get_attr_dot (dep_insn) == DOT_YES) + return cost + 2; + else + break; + case TYPE_SHIFT: + if (get_attr_dot (dep_insn) == DOT_YES + && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO) + return cost + 2; + else + break; + default: + break; + } + break; + + case TYPE_STORE: + case TYPE_FPSTORE: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + if (GET_CODE (PATTERN (insn)) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return default for now. */ + return cost; + + /* Adjust the cost for the case where the value written + by a fixed point operation is used as the address + gen value on a store. */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_CNTLZ: + { + if (! rs6000_store_data_bypass_p (dep_insn, insn)) + return get_attr_sign_extend (dep_insn) + == SIGN_EXTEND_YES ? 6 : 4; + break; + } + case TYPE_SHIFT: + { + if (! rs6000_store_data_bypass_p (dep_insn, insn)) + return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? + 6 : 3; + break; + } + case TYPE_INTEGER: + case TYPE_ADD: + case TYPE_LOGICAL: + case TYPE_EXTS: + case TYPE_INSERT: + { + if (! rs6000_store_data_bypass_p (dep_insn, insn)) + return 3; + break; + } + case TYPE_STORE: + case TYPE_FPLOAD: + case TYPE_FPSTORE: + { + if (get_attr_update (dep_insn) == UPDATE_YES + && ! rs6000_store_data_bypass_p (dep_insn, insn)) + return 3; + break; + } + case TYPE_MUL: + { + if (! rs6000_store_data_bypass_p (dep_insn, insn)) + return 17; + break; + } + case TYPE_DIV: + { + if (! rs6000_store_data_bypass_p (dep_insn, insn)) + return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; + break; + } + default: + break; + } + } + break; + + case TYPE_LOAD: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + /* Adjust the cost for the case where the value written + by a fixed point instruction is used within the address + gen portion of a subsequent load(u)(x) */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_CNTLZ: + { + if (set_to_load_agen (dep_insn, insn)) + return get_attr_sign_extend (dep_insn) + == SIGN_EXTEND_YES ? 6 : 4; + break; + } + case TYPE_SHIFT: + { + if (set_to_load_agen (dep_insn, insn)) + return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? + 6 : 3; + break; + } + case TYPE_INTEGER: + case TYPE_ADD: + case TYPE_LOGICAL: + case TYPE_EXTS: + case TYPE_INSERT: + { + if (set_to_load_agen (dep_insn, insn)) + return 3; + break; + } + case TYPE_STORE: + case TYPE_FPLOAD: + case TYPE_FPSTORE: + { + if (get_attr_update (dep_insn) == UPDATE_YES + && set_to_load_agen (dep_insn, insn)) + return 3; + break; + } + case TYPE_MUL: + { + if (set_to_load_agen (dep_insn, insn)) + return 17; + break; + } + case TYPE_DIV: + { + if (set_to_load_agen (dep_insn, insn)) + return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; + break; + } + default: + break; + } + } + break; + + case TYPE_FPLOAD: + if ((rs6000_cpu == PROCESSOR_POWER6) + && get_attr_update (insn) == UPDATE_NO + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0) + && (get_attr_type (dep_insn) == TYPE_MFFGPR)) + return 2; + + default: + break; + } + + /* Fall out to return default cost. */ + } + break; + + case REG_DEP_OUTPUT: + /* Output dependency; DEP_INSN writes a register that INSN writes some + cycles later. */ + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_FP: + case TYPE_FPSIMPLE: + if (get_attr_type (dep_insn) == TYPE_FP + || get_attr_type (dep_insn) == TYPE_FPSIMPLE) + return 1; + break; + case TYPE_FPLOAD: + if (get_attr_update (insn) == UPDATE_NO + && get_attr_type (dep_insn) == TYPE_MFFGPR) + return 2; + break; + default: + break; + } + } + /* Fall through, no cost for output dependency. */ + /* FALLTHRU */ + + case REG_DEP_ANTI: + /* Anti dependency; DEP_INSN reads a register that INSN writes some + cycles later. */ + return 0; + + default: + gcc_unreachable (); + } + + return cost; +} + +/* Debug version of rs6000_adjust_cost. */ + +static int +rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, + int cost, unsigned int dw) +{ + int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw); + + if (ret != cost) + { + const char *dep; + + switch (dep_type) + { + default: dep = "unknown depencency"; break; + case REG_DEP_TRUE: dep = "data dependency"; break; + case REG_DEP_OUTPUT: dep = "output dependency"; break; + case REG_DEP_ANTI: dep = "anti depencency"; break; + } + + fprintf (stderr, + "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, " + "%s, insn:\n", ret, cost, dep); + + debug_rtx (insn); + } + + return ret; +} + +/* The function returns a true if INSN is microcoded. + Return false otherwise. */ + +static bool +is_microcoded_insn (rtx_insn *insn) +{ + if (!insn || !NONDEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + if (rs6000_cpu_attr == CPU_CELL) + return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS; + + if (rs6000_sched_groups + && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) + { + enum attr_type type = get_attr_type (insn); + if ((type == TYPE_LOAD + && get_attr_update (insn) == UPDATE_YES + && get_attr_sign_extend (insn) == SIGN_EXTEND_YES) + || ((type == TYPE_LOAD || type == TYPE_STORE) + && get_attr_update (insn) == UPDATE_YES + && get_attr_indexed (insn) == INDEXED_YES) + || type == TYPE_MFCR) + return true; + } + + return false; +} + +/* The function returns true if INSN is cracked into 2 instructions + by the processor (and therefore occupies 2 issue slots). */ + +static bool +is_cracked_insn (rtx_insn *insn) +{ + if (!insn || !NONDEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + if (rs6000_sched_groups + && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) + { + enum attr_type type = get_attr_type (insn); + if ((type == TYPE_LOAD + && get_attr_sign_extend (insn) == SIGN_EXTEND_YES + && get_attr_update (insn) == UPDATE_NO) + || (type == TYPE_LOAD + && get_attr_sign_extend (insn) == SIGN_EXTEND_NO + && get_attr_update (insn) == UPDATE_YES + && get_attr_indexed (insn) == INDEXED_NO) + || (type == TYPE_STORE + && get_attr_update (insn) == UPDATE_YES + && get_attr_indexed (insn) == INDEXED_NO) + || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE) + && get_attr_update (insn) == UPDATE_YES) + || type == TYPE_DELAYED_CR + || (type == TYPE_EXTS + && get_attr_dot (insn) == DOT_YES) + || (type == TYPE_SHIFT + && get_attr_dot (insn) == DOT_YES + && get_attr_var_shift (insn) == VAR_SHIFT_NO) + || (type == TYPE_MUL + && get_attr_dot (insn) == DOT_YES) + || type == TYPE_DIV + || (type == TYPE_INSERT + && get_attr_size (insn) == SIZE_32)) + return true; + } + + return false; +} + +/* The function returns true if INSN can be issued only from + the branch slot. */ + +static bool +is_branch_slot_insn (rtx_insn *insn) +{ + if (!insn || !NONDEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + if (rs6000_sched_groups) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_BRANCH || type == TYPE_JMPREG) + return true; + return false; + } + + return false; +} + +/* The function returns true if out_inst sets a value that is + used in the address generation computation of in_insn */ +static bool +set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn) +{ + rtx out_set, in_set; + + /* For performance reasons, only handle the simple case where + both loads are a single_set. */ + out_set = single_set (out_insn); + if (out_set) + { + in_set = single_set (in_insn); + if (in_set) + return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); + } + + return false; +} + +/* Try to determine base/offset/size parts of the given MEM. + Return true if successful, false if all the values couldn't + be determined. + + This function only looks for REG or REG+CONST address forms. + REG+REG address form will return false. */ + +static bool +get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, + HOST_WIDE_INT *size) +{ + rtx addr_rtx; + if MEM_SIZE_KNOWN_P (mem) + *size = MEM_SIZE (mem); + else + return false; + + addr_rtx = (XEXP (mem, 0)); + if (GET_CODE (addr_rtx) == PRE_MODIFY) + addr_rtx = XEXP (addr_rtx, 1); + + *offset = 0; + while (GET_CODE (addr_rtx) == PLUS + && CONST_INT_P (XEXP (addr_rtx, 1))) + { + *offset += INTVAL (XEXP (addr_rtx, 1)); + addr_rtx = XEXP (addr_rtx, 0); + } + if (!REG_P (addr_rtx)) + return false; + + *base = addr_rtx; + return true; +} + +/* The function returns true if the target storage location of + mem1 is adjacent to the target storage location of mem2 */ +/* Return 1 if memory locations are adjacent. */ + +static bool +adjacent_mem_locations (rtx mem1, rtx mem2) +{ + rtx reg1, reg2; + HOST_WIDE_INT off1, size1, off2, size2; + + if (get_memref_parts (mem1, ®1, &off1, &size1) + && get_memref_parts (mem2, ®2, &off2, &size2)) + return ((REGNO (reg1) == REGNO (reg2)) + && ((off1 + size1 == off2) + || (off2 + size2 == off1))); + + return false; +} + +/* This function returns true if it can be determined that the two MEM + locations overlap by at least 1 byte based on base reg/offset/size. */ + +static bool +mem_locations_overlap (rtx mem1, rtx mem2) +{ + rtx reg1, reg2; + HOST_WIDE_INT off1, size1, off2, size2; + + if (get_memref_parts (mem1, ®1, &off1, &size1) + && get_memref_parts (mem2, ®2, &off2, &size2)) + return ((REGNO (reg1) == REGNO (reg2)) + && (((off1 <= off2) && (off1 + size1 > off2)) + || ((off2 <= off1) && (off2 + size2 > off1)))); + + return false; +} + +/* A C statement (sans semicolon) to update the integer scheduling + priority INSN_PRIORITY (INSN). Increase the priority to execute the + INSN earlier, reduce the priority to execute INSN later. Do not + define this macro if you do not need to adjust the scheduling + priorities of insns. */ + +static int +rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority) +{ + rtx load_mem, str_mem; + /* On machines (like the 750) which have asymmetric integer units, + where one integer unit can do multiply and divides and the other + can't, reduce the priority of multiply/divide so it is scheduled + before other integer operations. */ + +#if 0 + if (! INSN_P (insn)) + return priority; + + if (GET_CODE (PATTERN (insn)) == USE) + return priority; + + switch (rs6000_cpu_attr) { + case CPU_PPC750: + switch (get_attr_type (insn)) + { + default: + break; + + case TYPE_MUL: + case TYPE_DIV: + fprintf (stderr, "priority was %#x (%d) before adjustment\n", + priority, priority); + if (priority >= 0 && priority < 0x01000000) + priority >>= 3; + break; + } + } +#endif + + if (insn_must_be_first_in_group (insn) + && reload_completed + && current_sched_info->sched_max_insns_priority + && rs6000_sched_restricted_insns_priority) + { + + /* Prioritize insns that can be dispatched only in the first + dispatch slot. */ + if (rs6000_sched_restricted_insns_priority == 1) + /* Attach highest priority to insn. This means that in + haifa-sched.c:ready_sort(), dispatch-slot restriction considerations + precede 'priority' (critical path) considerations. */ + return current_sched_info->sched_max_insns_priority; + else if (rs6000_sched_restricted_insns_priority == 2) + /* Increase priority of insn by a minimal amount. This means that in + haifa-sched.c:ready_sort(), only 'priority' (critical path) + considerations precede dispatch-slot restriction considerations. */ + return (priority + 1); + } + + if (rs6000_cpu == PROCESSOR_POWER6 + && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem)) + || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem)))) + /* Attach highest priority to insn if the scheduler has just issued two + stores and this instruction is a load, or two loads and this instruction + is a store. Power6 wants loads and stores scheduled alternately + when possible */ + return current_sched_info->sched_max_insns_priority; + + return priority; +} + +/* Return true if the instruction is nonpipelined on the Cell. */ +static bool +is_nonpipeline_insn (rtx_insn *insn) +{ + enum attr_type type; + if (!insn || !NONDEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + type = get_attr_type (insn); + if (type == TYPE_MUL + || type == TYPE_DIV + || type == TYPE_SDIV + || type == TYPE_DDIV + || type == TYPE_SSQRT + || type == TYPE_DSQRT + || type == TYPE_MFCR + || type == TYPE_MFCRF + || type == TYPE_MFJMPR) + { + return true; + } + return false; +} + + +/* Return how many instructions the machine can issue per cycle. */ + +static int +rs6000_issue_rate (void) +{ + /* Unless scheduling for register pressure, use issue rate of 1 for + first scheduling pass to decrease degradation. */ + if (!reload_completed && !flag_sched_pressure) + return 1; + + switch (rs6000_cpu_attr) { + case CPU_RS64A: + case CPU_PPC601: /* ? */ + case CPU_PPC7450: + return 3; + case CPU_PPC440: + case CPU_PPC603: + case CPU_PPC750: + case CPU_PPC7400: + case CPU_PPC8540: + case CPU_PPC8548: + case CPU_CELL: + case CPU_PPCE300C2: + case CPU_PPCE300C3: + case CPU_PPCE500MC: + case CPU_PPCE500MC64: + case CPU_PPCE5500: + case CPU_PPCE6500: + case CPU_TITAN: + return 2; + case CPU_PPC476: + case CPU_PPC604: + case CPU_PPC604E: + case CPU_PPC620: + case CPU_PPC630: + return 4; + case CPU_POWER4: + case CPU_POWER5: + case CPU_POWER6: + case CPU_POWER7: + return 5; + case CPU_POWER8: + return 7; + case CPU_POWER9: + return 6; + default: + return 1; + } +} + +/* Return how many instructions to look ahead for better insn + scheduling. */ + +static int +rs6000_use_sched_lookahead (void) +{ + switch (rs6000_cpu_attr) + { + case CPU_PPC8540: + case CPU_PPC8548: + return 4; + + case CPU_CELL: + return (reload_completed ? 8 : 0); + + default: + return 0; + } +} + +/* We are choosing insn from the ready queue. Return zero if INSN can be + chosen. */ +static int +rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index) +{ + if (ready_index == 0) + return 0; + + if (rs6000_cpu_attr != CPU_CELL) + return 0; + + gcc_assert (insn != NULL_RTX && INSN_P (insn)); + + if (!reload_completed + || is_nonpipeline_insn (insn) + || is_microcoded_insn (insn)) + return 1; + + return 0; +} + +/* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx + and return true. */ + +static bool +find_mem_ref (rtx pat, rtx *mem_ref) +{ + const char * fmt; + int i, j; + + /* stack_tie does not produce any real memory traffic. */ + if (tie_operand (pat, VOIDmode)) + return false; + + if (GET_CODE (pat) == MEM) + { + *mem_ref = pat; + return true; + } + + /* Recursively process the pattern. */ + fmt = GET_RTX_FORMAT (GET_CODE (pat)); + + for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + if (find_mem_ref (XEXP (pat, i), mem_ref)) + return true; + } + else if (fmt[i] == 'E') + for (j = XVECLEN (pat, i) - 1; j >= 0; j--) + { + if (find_mem_ref (XVECEXP (pat, i, j), mem_ref)) + return true; + } + } + + return false; +} + +/* Determine if PAT is a PATTERN of a load insn. */ + +static bool +is_load_insn1 (rtx pat, rtx *load_mem) +{ + if (!pat || pat == NULL_RTX) + return false; + + if (GET_CODE (pat) == SET) + return find_mem_ref (SET_SRC (pat), load_mem); + + if (GET_CODE (pat) == PARALLEL) + { + int i; + + for (i = 0; i < XVECLEN (pat, 0); i++) + if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem)) + return true; + } + + return false; +} + +/* Determine if INSN loads from memory. */ + +static bool +is_load_insn (rtx insn, rtx *load_mem) +{ + if (!insn || !INSN_P (insn)) + return false; + + if (CALL_P (insn)) + return false; + + return is_load_insn1 (PATTERN (insn), load_mem); +} + +/* Determine if PAT is a PATTERN of a store insn. */ + +static bool +is_store_insn1 (rtx pat, rtx *str_mem) +{ + if (!pat || pat == NULL_RTX) + return false; + + if (GET_CODE (pat) == SET) + return find_mem_ref (SET_DEST (pat), str_mem); + + if (GET_CODE (pat) == PARALLEL) + { + int i; + + for (i = 0; i < XVECLEN (pat, 0); i++) + if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem)) + return true; + } + + return false; +} + +/* Determine if INSN stores to memory. */ + +static bool +is_store_insn (rtx insn, rtx *str_mem) +{ + if (!insn || !INSN_P (insn)) + return false; + + return is_store_insn1 (PATTERN (insn), str_mem); +} + +/* Return whether TYPE is a Power9 pairable vector instruction type. */ + +static bool +is_power9_pairable_vec_type (enum attr_type type) +{ + switch (type) + { + case TYPE_VECSIMPLE: + case TYPE_VECCOMPLEX: + case TYPE_VECDIV: + case TYPE_VECCMP: + case TYPE_VECPERM: + case TYPE_VECFLOAT: + case TYPE_VECFDIV: + case TYPE_VECDOUBLE: + return true; + default: + break; + } + return false; +} + +/* Returns whether the dependence between INSN and NEXT is considered + costly by the given target. */ + +static bool +rs6000_is_costly_dependence (dep_t dep, int cost, int distance) +{ + rtx insn; + rtx next; + rtx load_mem, str_mem; + + /* If the flag is not enabled - no dependence is considered costly; + allow all dependent insns in the same group. + This is the most aggressive option. */ + if (rs6000_sched_costly_dep == no_dep_costly) + return false; + + /* If the flag is set to 1 - a dependence is always considered costly; + do not allow dependent instructions in the same group. + This is the most conservative option. */ + if (rs6000_sched_costly_dep == all_deps_costly) + return true; + + insn = DEP_PRO (dep); + next = DEP_CON (dep); + + if (rs6000_sched_costly_dep == store_to_load_dep_costly + && is_load_insn (next, &load_mem) + && is_store_insn (insn, &str_mem)) + /* Prevent load after store in the same group. */ + return true; + + if (rs6000_sched_costly_dep == true_store_to_load_dep_costly + && is_load_insn (next, &load_mem) + && is_store_insn (insn, &str_mem) + && DEP_TYPE (dep) == REG_DEP_TRUE + && mem_locations_overlap(str_mem, load_mem)) + /* Prevent load after store in the same group if it is a true + dependence. */ + return true; + + /* The flag is set to X; dependences with latency >= X are considered costly, + and will not be scheduled in the same group. */ + if (rs6000_sched_costly_dep <= max_dep_latency + && ((cost - distance) >= (int)rs6000_sched_costly_dep)) + return true; + + return false; +} + +/* Return the next insn after INSN that is found before TAIL is reached, + skipping any "non-active" insns - insns that will not actually occupy + an issue slot. Return NULL_RTX if such an insn is not found. */ + +static rtx_insn * +get_next_active_insn (rtx_insn *insn, rtx_insn *tail) +{ + if (insn == NULL_RTX || insn == tail) + return NULL; + + while (1) + { + insn = NEXT_INSN (insn); + if (insn == NULL_RTX || insn == tail) + return NULL; + + if (CALL_P (insn) + || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) + || (NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && INSN_CODE (insn) != CODE_FOR_stack_tie)) + break; + } + return insn; +} + +/* Do Power9 specific sched_reorder2 reordering of ready list. */ + +static int +power9_sched_reorder2 (rtx_insn **ready, int lastpos) +{ + int pos; + int i; + rtx_insn *tmp; + enum attr_type type, type2; + + type = get_attr_type (last_scheduled_insn); + + /* Try to issue fixed point divides back-to-back in pairs so they will be + routed to separate execution units and execute in parallel. */ + if (type == TYPE_DIV && divide_cnt == 0) + { + /* First divide has been scheduled. */ + divide_cnt = 1; + + /* Scan the ready list looking for another divide, if found move it + to the end of the list so it is chosen next. */ + pos = lastpos; + while (pos >= 0) + { + if (recog_memoized (ready[pos]) >= 0 + && get_attr_type (ready[pos]) == TYPE_DIV) + { + tmp = ready[pos]; + for (i = pos; i < lastpos; i++) + ready[i] = ready[i + 1]; + ready[lastpos] = tmp; + break; + } + pos--; + } + } + else + { + /* Last insn was the 2nd divide or not a divide, reset the counter. */ + divide_cnt = 0; + + /* The best dispatch throughput for vector and vector load insns can be + achieved by interleaving a vector and vector load such that they'll + dispatch to the same superslice. If this pairing cannot be achieved + then it is best to pair vector insns together and vector load insns + together. + + To aid in this pairing, vec_pairing maintains the current state with + the following values: + + 0 : Initial state, no vecload/vector pairing has been started. + + 1 : A vecload or vector insn has been issued and a candidate for + pairing has been found and moved to the end of the ready + list. */ + if (type == TYPE_VECLOAD) + { + /* Issued a vecload. */ + if (vec_pairing == 0) + { + int vecload_pos = -1; + /* We issued a single vecload, look for a vector insn to pair it + with. If one isn't found, try to pair another vecload. */ + pos = lastpos; + while (pos >= 0) + { + if (recog_memoized (ready[pos]) >= 0) + { + type2 = get_attr_type (ready[pos]); + if (is_power9_pairable_vec_type (type2)) + { + /* Found a vector insn to pair with, move it to the + end of the ready list so it is scheduled next. */ + tmp = ready[pos]; + for (i = pos; i < lastpos; i++) + ready[i] = ready[i + 1]; + ready[lastpos] = tmp; + vec_pairing = 1; + return cached_can_issue_more; + } + else if (type2 == TYPE_VECLOAD && vecload_pos == -1) + /* Remember position of first vecload seen. */ + vecload_pos = pos; + } + pos--; + } + if (vecload_pos >= 0) + { + /* Didn't find a vector to pair with but did find a vecload, + move it to the end of the ready list. */ + tmp = ready[vecload_pos]; + for (i = vecload_pos; i < lastpos; i++) + ready[i] = ready[i + 1]; + ready[lastpos] = tmp; + vec_pairing = 1; + return cached_can_issue_more; + } + } + } + else if (is_power9_pairable_vec_type (type)) + { + /* Issued a vector operation. */ + if (vec_pairing == 0) + { + int vec_pos = -1; + /* We issued a single vector insn, look for a vecload to pair it + with. If one isn't found, try to pair another vector. */ + pos = lastpos; + while (pos >= 0) + { + if (recog_memoized (ready[pos]) >= 0) + { + type2 = get_attr_type (ready[pos]); + if (type2 == TYPE_VECLOAD) + { + /* Found a vecload insn to pair with, move it to the + end of the ready list so it is scheduled next. */ + tmp = ready[pos]; + for (i = pos; i < lastpos; i++) + ready[i] = ready[i + 1]; + ready[lastpos] = tmp; + vec_pairing = 1; + return cached_can_issue_more; + } + else if (is_power9_pairable_vec_type (type2) + && vec_pos == -1) + /* Remember position of first vector insn seen. */ + vec_pos = pos; + } + pos--; + } + if (vec_pos >= 0) + { + /* Didn't find a vecload to pair with but did find a vector + insn, move it to the end of the ready list. */ + tmp = ready[vec_pos]; + for (i = vec_pos; i < lastpos; i++) + ready[i] = ready[i + 1]; + ready[lastpos] = tmp; + vec_pairing = 1; + return cached_can_issue_more; + } + } + } + + /* We've either finished a vec/vecload pair, couldn't find an insn to + continue the current pair, or the last insn had nothing to do with + with pairing. In any case, reset the state. */ + vec_pairing = 0; + } + + return cached_can_issue_more; +} + +/* We are about to begin issuing insns for this clock cycle. */ + +static int +rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, + rtx_insn **ready ATTRIBUTE_UNUSED, + int *pn_ready ATTRIBUTE_UNUSED, + int clock_var ATTRIBUTE_UNUSED) +{ + int n_ready = *pn_ready; + + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder :\n"); + + /* Reorder the ready list, if the second to last ready insn + is a nonepipeline insn. */ + if (rs6000_cpu_attr == CPU_CELL && n_ready > 1) + { + if (is_nonpipeline_insn (ready[n_ready - 1]) + && (recog_memoized (ready[n_ready - 2]) > 0)) + /* Simply swap first two insns. */ + std::swap (ready[n_ready - 1], ready[n_ready - 2]); + } + + if (rs6000_cpu == PROCESSOR_POWER6) + load_store_pendulum = 0; + + return rs6000_issue_rate (); +} + +/* Like rs6000_sched_reorder, but called after issuing each insn. */ + +static int +rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready, + int *pn_ready, int clock_var ATTRIBUTE_UNUSED) +{ + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder2 :\n"); + + /* For Power6, we need to handle some special cases to try and keep the + store queue from overflowing and triggering expensive flushes. + + This code monitors how load and store instructions are being issued + and skews the ready list one way or the other to increase the likelihood + that a desired instruction is issued at the proper time. + + A couple of things are done. First, we maintain a "load_store_pendulum" + to track the current state of load/store issue. + + - If the pendulum is at zero, then no loads or stores have been + issued in the current cycle so we do nothing. + + - If the pendulum is 1, then a single load has been issued in this + cycle and we attempt to locate another load in the ready list to + issue with it. + + - If the pendulum is -2, then two stores have already been + issued in this cycle, so we increase the priority of the first load + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum is -1, then a single store has been issued in this + cycle and we attempt to locate another store in the ready list to + issue with it, preferring a store to an adjacent memory location to + facilitate store pairing in the store queue. + + - If the pendulum is 2, then two loads have already been + issued in this cycle, so we increase the priority of the first store + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum < -2 or > 2, then do nothing. + + Note: This code covers the most common scenarios. There exist non + load/store instructions which make use of the LSU and which + would need to be accounted for to strictly model the behavior + of the machine. Those instructions are currently unaccounted + for to help minimize compile time overhead of this code. + */ + if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn) + { + int pos; + int i; + rtx_insn *tmp; + rtx load_mem, str_mem; + + if (is_store_insn (last_scheduled_insn, &str_mem)) + /* Issuing a store, swing the load_store_pendulum to the left */ + load_store_pendulum--; + else if (is_load_insn (last_scheduled_insn, &load_mem)) + /* Issuing a load, swing the load_store_pendulum to the right */ + load_store_pendulum++; + else + return cached_can_issue_more; + + /* If the pendulum is balanced, or there is only one instruction on + the ready list, then all is well, so return. */ + if ((load_store_pendulum == 0) || (*pn_ready <= 1)) + return cached_can_issue_more; + + if (load_store_pendulum == 1) + { + /* A load has been issued in this cycle. Scan the ready list + for another load to issue with it */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos], &load_mem)) + { + /* Found a load. Move it to the head of the ready list, + and adjust it's priority so that it is more likely to + stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + + if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) + INSN_PRIORITY (tmp)++; + break; + } + pos--; + } + } + else if (load_store_pendulum == -2) + { + /* Two stores have been issued in this cycle. Increase the + priority of the first load in the ready list to favor it for + issuing in the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos], &load_mem) + && !sel_sched_p () + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a load + was found and increased in priority. This is to prevent + increasing the priority of multiple loads */ + load_store_pendulum--; + + break; + } + pos--; + } + } + else if (load_store_pendulum == -1) + { + /* A store has been issued in this cycle. Scan the ready list for + another store to issue with it, preferring a store to an adjacent + memory location */ + int first_store_pos = -1; + + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos], &str_mem)) + { + rtx str_mem2; + /* Maintain the index of the first store found on the + list */ + if (first_store_pos == -1) + first_store_pos = pos; + + if (is_store_insn (last_scheduled_insn, &str_mem2) + && adjacent_mem_locations (str_mem, str_mem2)) + { + /* Found an adjacent store. Move it to the head of the + ready list, and adjust it's priority so that it is + more likely to stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + + if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) + INSN_PRIORITY (tmp)++; + + first_store_pos = -1; + + break; + }; + } + pos--; + } + + if (first_store_pos >= 0) + { + /* An adjacent store wasn't found, but a non-adjacent store was, + so move the non-adjacent store to the front of the ready + list, and adjust its priority so that it is more likely to + stay there. */ + tmp = ready[first_store_pos]; + for (i=first_store_pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) + INSN_PRIORITY (tmp)++; + } + } + else if (load_store_pendulum == 2) + { + /* Two loads have been issued in this cycle. Increase the priority + of the first store in the ready list to favor it for issuing in + the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos], &str_mem) + && !sel_sched_p () + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a store + was found and increased in priority. This is to prevent + increasing the priority of multiple stores */ + load_store_pendulum++; + + break; + } + pos--; + } + } + } + + /* Do Power9 dependent reordering if necessary. */ + if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn + && recog_memoized (last_scheduled_insn) >= 0) + return power9_sched_reorder2 (ready, *pn_ready - 1); + + return cached_can_issue_more; +} + +/* Return whether the presence of INSN causes a dispatch group termination + of group WHICH_GROUP. + + If WHICH_GROUP == current_group, this function will return true if INSN + causes the termination of the current group (i.e, the dispatch group to + which INSN belongs). This means that INSN will be the last insn in the + group it belongs to. + + If WHICH_GROUP == previous_group, this function will return true if INSN + causes the termination of the previous group (i.e, the dispatch group that + precedes the group to which INSN belongs). This means that INSN will be + the first insn in the group it belongs to). */ + +static bool +insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group) +{ + bool first, last; + + if (! insn) + return false; + + first = insn_must_be_first_in_group (insn); + last = insn_must_be_last_in_group (insn); + + if (first && last) + return true; + + if (which_group == current_group) + return last; + else if (which_group == previous_group) + return first; + + return false; +} + + +static bool +insn_must_be_first_in_group (rtx_insn *insn) +{ + enum attr_type type; + + if (!insn + || NOTE_P (insn) + || DEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) + { + case PROCESSOR_POWER5: + if (is_cracked_insn (insn)) + return true; + /* FALLTHRU */ + case PROCESSOR_POWER4: + if (is_microcoded_insn (insn)) + return true; + + if (!rs6000_sched_groups) + return false; + + type = get_attr_type (insn); + + switch (type) + { + case TYPE_MFCR: + case TYPE_MFCRF: + case TYPE_MTCR: + case TYPE_DELAYED_CR: + case TYPE_CR_LOGICAL: + case TYPE_MTJMPR: + case TYPE_MFJMPR: + case TYPE_DIV: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_ISYNC: + case TYPE_SYNC: + return true; + default: + break; + } + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_TRAP: + case TYPE_MUL: + case TYPE_INSERT: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + return true; + case TYPE_SHIFT: + if (get_attr_dot (insn) == DOT_NO + || get_attr_var_shift (insn) == VAR_SHIFT_NO) + return true; + else + break; + case TYPE_DIV: + if (get_attr_size (insn) == SIZE_32) + return true; + else + break; + case TYPE_LOAD: + case TYPE_STORE: + case TYPE_FPLOAD: + case TYPE_FPSTORE: + if (get_attr_update (insn) == UPDATE_YES) + return true; + else + break; + default: + break; + } + break; + case PROCESSOR_POWER7: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_CR_LOGICAL: + case TYPE_MFCR: + case TYPE_MFCRF: + case TYPE_MTCR: + case TYPE_DIV: + case TYPE_ISYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + return true; + case TYPE_MUL: + case TYPE_SHIFT: + case TYPE_EXTS: + if (get_attr_dot (insn) == DOT_YES) + return true; + else + break; + case TYPE_LOAD: + if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES + || get_attr_update (insn) == UPDATE_YES) + return true; + else + break; + case TYPE_STORE: + case TYPE_FPLOAD: + case TYPE_FPSTORE: + if (get_attr_update (insn) == UPDATE_YES) + return true; + else + break; + default: + break; + } + break; + case PROCESSOR_POWER8: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_CR_LOGICAL: + case TYPE_DELAYED_CR: + case TYPE_MFCR: + case TYPE_MFCRF: + case TYPE_MTCR: + case TYPE_SYNC: + case TYPE_ISYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_VECSTORE: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + return true; + case TYPE_SHIFT: + case TYPE_EXTS: + case TYPE_MUL: + if (get_attr_dot (insn) == DOT_YES) + return true; + else + break; + case TYPE_LOAD: + if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES + || get_attr_update (insn) == UPDATE_YES) + return true; + else + break; + case TYPE_STORE: + if (get_attr_update (insn) == UPDATE_YES + && get_attr_indexed (insn) == INDEXED_YES) + return true; + else + break; + default: + break; + } + break; + default: + break; + } + + return false; +} + +static bool +insn_must_be_last_in_group (rtx_insn *insn) +{ + enum attr_type type; + + if (!insn + || NOTE_P (insn) + || DEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) { + case PROCESSOR_POWER4: + case PROCESSOR_POWER5: + if (is_microcoded_insn (insn)) + return true; + + if (is_branch_slot_insn (insn)) + return true; + + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_TRAP: + case TYPE_MUL: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + return true; + case TYPE_SHIFT: + if (get_attr_dot (insn) == DOT_NO + || get_attr_var_shift (insn) == VAR_SHIFT_NO) + return true; + else + break; + case TYPE_DIV: + if (get_attr_size (insn) == SIZE_32) + return true; + else + break; + default: + break; + } + break; + case PROCESSOR_POWER7: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + return true; + case TYPE_LOAD: + if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES + && get_attr_update (insn) == UPDATE_YES) + return true; + else + break; + case TYPE_STORE: + if (get_attr_update (insn) == UPDATE_YES + && get_attr_indexed (insn) == INDEXED_YES) + return true; + else + break; + default: + break; + } + break; + case PROCESSOR_POWER8: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + return true; + case TYPE_LOAD: + if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES + && get_attr_update (insn) == UPDATE_YES) + return true; + else + break; + case TYPE_STORE: + if (get_attr_update (insn) == UPDATE_YES + && get_attr_indexed (insn) == INDEXED_YES) + return true; + else + break; + default: + break; + } + break; + default: + break; + } + + return false; +} + +/* Return true if it is recommended to keep NEXT_INSN "far" (in a separate + dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */ + +static bool +is_costly_group (rtx *group_insns, rtx next_insn) +{ + int i; + int issue_rate = rs6000_issue_rate (); + + for (i = 0; i < issue_rate; i++) + { + sd_iterator_def sd_it; + dep_t dep; + rtx insn = group_insns[i]; + + if (!insn) + continue; + + FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep) + { + rtx next = DEP_CON (dep); + + if (next == next_insn + && rs6000_is_costly_dependence (dep, dep_cost (dep), 0)) + return true; + } + } + + return false; +} + +/* Utility of the function redefine_groups. + Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS + in the same dispatch group. If so, insert nops before NEXT_INSN, in order + to keep it "far" (in a separate group) from GROUP_INSNS, following + one of the following schemes, depending on the value of the flag + -minsert_sched_nops = X: + (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed + in order to force NEXT_INSN into a separate group. + (2) X < sched_finish_regroup_exact: insert exactly X nops. + GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop + insertion (has a group just ended, how many vacant issue slots remain in the + last group, and how many dispatch groups were encountered so far). */ + +static int +force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, + rtx_insn *next_insn, bool *group_end, int can_issue_more, + int *group_count) +{ + rtx nop; + bool force; + int issue_rate = rs6000_issue_rate (); + bool end = *group_end; + int i; + + if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn)) + return can_issue_more; + + if (rs6000_sched_insert_nops > sched_finish_regroup_exact) + return can_issue_more; + + force = is_costly_group (group_insns, next_insn); + if (!force) + return can_issue_more; + + if (sched_verbose > 6) + fprintf (dump,"force: group count = %d, can_issue_more = %d\n", + *group_count ,can_issue_more); + + if (rs6000_sched_insert_nops == sched_finish_regroup_exact) + { + if (*group_end) + can_issue_more = 0; + + /* Since only a branch can be issued in the last issue_slot, it is + sufficient to insert 'can_issue_more - 1' nops if next_insn is not + a branch. If next_insn is a branch, we insert 'can_issue_more' nops; + in this case the last nop will start a new group and the branch + will be forced to the new group. */ + if (can_issue_more && !is_branch_slot_insn (next_insn)) + can_issue_more--; + + /* Do we have a special group ending nop? */ + if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7 + || rs6000_cpu_attr == CPU_POWER8) + { + nop = gen_group_ending_nop (); + emit_insn_before (nop, next_insn); + can_issue_more = 0; + } + else + while (can_issue_more > 0) + { + nop = gen_nop (); + emit_insn_before (nop, next_insn); + can_issue_more--; + } + + *group_end = true; + return 0; + } + + if (rs6000_sched_insert_nops < sched_finish_regroup_exact) + { + int n_nops = rs6000_sched_insert_nops; + + /* Nops can't be issued from the branch slot, so the effective + issue_rate for nops is 'issue_rate - 1'. */ + if (can_issue_more == 0) + can_issue_more = issue_rate; + can_issue_more--; + if (can_issue_more == 0) + { + can_issue_more = issue_rate - 1; + (*group_count)++; + end = true; + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + } + + while (n_nops > 0) + { + nop = gen_nop (); + emit_insn_before (nop, next_insn); + if (can_issue_more == issue_rate - 1) /* new group begins */ + end = false; + can_issue_more--; + if (can_issue_more == 0) + { + can_issue_more = issue_rate - 1; + (*group_count)++; + end = true; + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + } + n_nops--; + } + + /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */ + can_issue_more++; + + /* Is next_insn going to start a new group? */ + *group_end + = (end + || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) + || (can_issue_more <= 2 && is_cracked_insn (next_insn)) + || (can_issue_more < issue_rate && + insn_terminates_group_p (next_insn, previous_group))); + if (*group_end && end) + (*group_count)--; + + if (sched_verbose > 6) + fprintf (dump, "done force: group count = %d, can_issue_more = %d\n", + *group_count, can_issue_more); + return can_issue_more; + } + + return can_issue_more; +} + +/* This function tries to synch the dispatch groups that the compiler "sees" + with the dispatch groups that the processor dispatcher is expected to + form in practice. It tries to achieve this synchronization by forcing the + estimated processor grouping on the compiler (as opposed to the function + 'pad_goups' which tries to force the scheduler's grouping on the processor). + + The function scans the insn sequence between PREV_HEAD_INSN and TAIL and + examines the (estimated) dispatch groups that will be formed by the processor + dispatcher. It marks these group boundaries to reflect the estimated + processor grouping, overriding the grouping that the scheduler had marked. + Depending on the value of the flag '-minsert-sched-nops' this function can + force certain insns into separate groups or force a certain distance between + them by inserting nops, for example, if there exists a "costly dependence" + between the insns. + + The function estimates the group boundaries that the processor will form as + follows: It keeps track of how many vacant issue slots are available after + each insn. A subsequent insn will start a new group if one of the following + 4 cases applies: + - no more vacant issue slots remain in the current dispatch group. + - only the last issue slot, which is the branch slot, is vacant, but the next + insn is not a branch. + - only the last 2 or less issue slots, including the branch slot, are vacant, + which means that a cracked insn (which occupies two issue slots) can't be + issued in this group. + - less than 'issue_rate' slots are vacant, and the next insn always needs to + start a new group. */ + +static int +redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, + rtx_insn *tail) +{ + rtx_insn *insn, *next_insn; + int issue_rate; + int can_issue_more; + int slot, i; + bool group_end; + int group_count = 0; + rtx *group_insns; + + /* Initialize. */ + issue_rate = rs6000_issue_rate (); + group_insns = XALLOCAVEC (rtx, issue_rate); + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + can_issue_more = issue_rate; + slot = 0; + insn = get_next_active_insn (prev_head_insn, tail); + group_end = false; + + while (insn != NULL_RTX) + { + slot = (issue_rate - can_issue_more); + group_insns[slot] = insn; + can_issue_more = + rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); + if (insn_terminates_group_p (insn, current_group)) + can_issue_more = 0; + + next_insn = get_next_active_insn (insn, tail); + if (next_insn == NULL_RTX) + return group_count + 1; + + /* Is next_insn going to start a new group? */ + group_end + = (can_issue_more == 0 + || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) + || (can_issue_more <= 2 && is_cracked_insn (next_insn)) + || (can_issue_more < issue_rate && + insn_terminates_group_p (next_insn, previous_group))); + + can_issue_more = force_new_group (sched_verbose, dump, group_insns, + next_insn, &group_end, can_issue_more, + &group_count); + + if (group_end) + { + group_count++; + can_issue_more = 0; + for (i = 0; i < issue_rate; i++) + { + group_insns[i] = 0; + } + } + + if (GET_MODE (next_insn) == TImode && can_issue_more) + PUT_MODE (next_insn, VOIDmode); + else if (!can_issue_more && GET_MODE (next_insn) != TImode) + PUT_MODE (next_insn, TImode); + + insn = next_insn; + if (can_issue_more == 0) + can_issue_more = issue_rate; + } /* while */ + + return group_count; +} + +/* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the + dispatch group boundaries that the scheduler had marked. Pad with nops + any dispatch groups which have vacant issue slots, in order to force the + scheduler's grouping on the processor dispatcher. The function + returns the number of dispatch groups found. */ + +static int +pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, + rtx_insn *tail) +{ + rtx_insn *insn, *next_insn; + rtx nop; + int issue_rate; + int can_issue_more; + int group_end; + int group_count = 0; + + /* Initialize issue_rate. */ + issue_rate = rs6000_issue_rate (); + can_issue_more = issue_rate; + + insn = get_next_active_insn (prev_head_insn, tail); + next_insn = get_next_active_insn (insn, tail); + + while (insn != NULL_RTX) + { + can_issue_more = + rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); + + group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode); + + if (next_insn == NULL_RTX) + break; + + if (group_end) + { + /* If the scheduler had marked group termination at this location + (between insn and next_insn), and neither insn nor next_insn will + force group termination, pad the group with nops to force group + termination. */ + if (can_issue_more + && (rs6000_sched_insert_nops == sched_finish_pad_groups) + && !insn_terminates_group_p (insn, current_group) + && !insn_terminates_group_p (next_insn, previous_group)) + { + if (!is_branch_slot_insn (next_insn)) + can_issue_more--; + + while (can_issue_more) + { + nop = gen_nop (); + emit_insn_before (nop, next_insn); + can_issue_more--; + } + } + + can_issue_more = issue_rate; + group_count++; + } + + insn = next_insn; + next_insn = get_next_active_insn (insn, tail); + } + + return group_count; +} + +/* We're beginning a new block. Initialize data structures as necessary. */ + +static void +rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ + last_scheduled_insn = NULL; + load_store_pendulum = 0; + divide_cnt = 0; + vec_pairing = 0; +} + +/* The following function is called at the end of scheduling BB. + After reload, it inserts nops at insn group bundling. */ + +static void +rs6000_sched_finish (FILE *dump, int sched_verbose) +{ + int n_groups; + + if (sched_verbose) + fprintf (dump, "=== Finishing schedule.\n"); + + if (reload_completed && rs6000_sched_groups) + { + /* Do not run sched_finish hook when selective scheduling enabled. */ + if (sel_sched_p ()) + return; + + if (rs6000_sched_insert_nops == sched_finish_none) + return; + + if (rs6000_sched_insert_nops == sched_finish_pad_groups) + n_groups = pad_groups (dump, sched_verbose, + current_sched_info->prev_head, + current_sched_info->next_tail); + else + n_groups = redefine_groups (dump, sched_verbose, + current_sched_info->prev_head, + current_sched_info->next_tail); + + if (sched_verbose >= 6) + { + fprintf (dump, "ngroups = %d\n", n_groups); + print_rtl (dump, current_sched_info->prev_head); + fprintf (dump, "Done finish_sched\n"); + } + } +} + +struct rs6000_sched_context +{ + short cached_can_issue_more; + rtx_insn *last_scheduled_insn; + int load_store_pendulum; + int divide_cnt; + int vec_pairing; +}; + +typedef struct rs6000_sched_context rs6000_sched_context_def; +typedef rs6000_sched_context_def *rs6000_sched_context_t; + +/* Allocate store for new scheduling context. */ +static void * +rs6000_alloc_sched_context (void) +{ + return xmalloc (sizeof (rs6000_sched_context_def)); +} + +/* If CLEAN_P is true then initializes _SC with clean data, + and from the global context otherwise. */ +static void +rs6000_init_sched_context (void *_sc, bool clean_p) +{ + rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; + + if (clean_p) + { + sc->cached_can_issue_more = 0; + sc->last_scheduled_insn = NULL; + sc->load_store_pendulum = 0; + sc->divide_cnt = 0; + sc->vec_pairing = 0; + } + else + { + sc->cached_can_issue_more = cached_can_issue_more; + sc->last_scheduled_insn = last_scheduled_insn; + sc->load_store_pendulum = load_store_pendulum; + sc->divide_cnt = divide_cnt; + sc->vec_pairing = vec_pairing; + } +} + +/* Sets the global scheduling context to the one pointed to by _SC. */ +static void +rs6000_set_sched_context (void *_sc) +{ + rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; + + gcc_assert (sc != NULL); + + cached_can_issue_more = sc->cached_can_issue_more; + last_scheduled_insn = sc->last_scheduled_insn; + load_store_pendulum = sc->load_store_pendulum; + divide_cnt = sc->divide_cnt; + vec_pairing = sc->vec_pairing; +} + +/* Free _SC. */ +static void +rs6000_free_sched_context (void *_sc) +{ + gcc_assert (_sc != NULL); + + free (_sc); +} + +static bool +rs6000_sched_can_speculate_insn (rtx_insn *insn) +{ + switch (get_attr_type (insn)) + { + case TYPE_DIV: + case TYPE_SDIV: + case TYPE_DDIV: + case TYPE_VECDIV: + case TYPE_SSQRT: + case TYPE_DSQRT: + return false; + + default: + return true; + } +} + +/* Length in units of the trampoline for entering a nested function. */ + +int +rs6000_trampoline_size (void) +{ + int ret = 0; + + switch (DEFAULT_ABI) + { + default: + gcc_unreachable (); + + case ABI_AIX: + ret = (TARGET_32BIT) ? 12 : 24; + break; + + case ABI_ELFv2: + gcc_assert (!TARGET_32BIT); + ret = 32; + break; + + case ABI_DARWIN: + case ABI_V4: + ret = (TARGET_32BIT) ? 40 : 48; + break; + } + + return ret; +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ + +static void +rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) +{ + int regsize = (TARGET_32BIT) ? 4 : 8; + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx ctx_reg = force_reg (Pmode, cxt); + rtx addr = force_reg (Pmode, XEXP (m_tramp, 0)); + + switch (DEFAULT_ABI) + { + default: + gcc_unreachable (); + + /* Under AIX, just build the 3 word function descriptor */ + case ABI_AIX: + { + rtx fnmem, fn_reg, toc_reg; + + if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS) + error ("You cannot take the address of a nested function if you use " + "the -mno-pointers-to-nested-functions option."); + + fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr)); + fn_reg = gen_reg_rtx (Pmode); + toc_reg = gen_reg_rtx (Pmode); + + /* Macro to shorten the code expansions below. */ +# define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET) + + m_tramp = replace_equiv_address (m_tramp, addr); + + emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0)); + emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize)); + emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg); + emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg); + emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg); + +# undef MEM_PLUS + } + break; + + /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */ + case ABI_ELFv2: + case ABI_DARWIN: + case ABI_V4: + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"), + LCT_NORMAL, VOIDmode, 4, + addr, Pmode, + GEN_INT (rs6000_trampoline_size ()), SImode, + fnaddr, Pmode, + ctx_reg, Pmode); + break; + } +} + + +/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain + identifier as an argument, so the front end shouldn't look it up. */ + +static bool +rs6000_attribute_takes_identifier_p (const_tree attr_id) +{ + return is_attribute_p ("altivec", attr_id); +} + +/* Handle the "altivec" attribute. The attribute may have + arguments as follows: + + __attribute__((altivec(vector__))) + __attribute__((altivec(pixel__))) (always followed by 'unsigned short') + __attribute__((altivec(bool__))) (always followed by 'unsigned') + + and may appear more than once (e.g., 'vector bool char') in a + given declaration. */ + +static tree +rs6000_handle_altivec_attribute (tree *node, + tree name ATTRIBUTE_UNUSED, + tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree type = *node, result = NULL_TREE; + machine_mode mode; + int unsigned_p; + char altivec_type + = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) + && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE) + ? *IDENTIFIER_POINTER (TREE_VALUE (args)) + : '?'); + + while (POINTER_TYPE_P (type) + || TREE_CODE (type) == FUNCTION_TYPE + || TREE_CODE (type) == METHOD_TYPE + || TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + + mode = TYPE_MODE (type); + + /* Check for invalid AltiVec type qualifiers. */ + if (type == long_double_type_node) + error ("use of %<long double%> in AltiVec types is invalid"); + else if (type == boolean_type_node) + error ("use of boolean types in AltiVec types is invalid"); + else if (TREE_CODE (type) == COMPLEX_TYPE) + error ("use of %<complex%> in AltiVec types is invalid"); + else if (DECIMAL_FLOAT_MODE_P (mode)) + error ("use of decimal floating point types in AltiVec types is invalid"); + else if (!TARGET_VSX) + { + if (type == long_unsigned_type_node || type == long_integer_type_node) + { + if (TARGET_64BIT) + error ("use of %<long%> in AltiVec types is invalid for " + "64-bit code without -mvsx"); + else if (rs6000_warn_altivec_long) + warning (0, "use of %<long%> in AltiVec types is deprecated; " + "use %<int%>"); + } + else if (type == long_long_unsigned_type_node + || type == long_long_integer_type_node) + error ("use of %<long long%> in AltiVec types is invalid without " + "-mvsx"); + else if (type == double_type_node) + error ("use of %<double%> in AltiVec types is invalid without -mvsx"); + } + + switch (altivec_type) + { + case 'v': + unsigned_p = TYPE_UNSIGNED (type); + switch (mode) + { + case TImode: + result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); + break; + case DImode: + result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); + break; + case SImode: + result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); + break; + case HImode: + result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); + break; + case QImode: + result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); + break; + case SFmode: result = V4SF_type_node; break; + case DFmode: result = V2DF_type_node; break; + /* If the user says 'vector int bool', we may be handed the 'bool' + attribute _before_ the 'vector' attribute, and so select the + proper type in the 'b' case below. */ + case V4SImode: case V8HImode: case V16QImode: case V4SFmode: + case V2DImode: case V2DFmode: + result = type; + default: break; + } + break; + case 'b': + switch (mode) + { + case DImode: case V2DImode: result = bool_V2DI_type_node; break; + case SImode: case V4SImode: result = bool_V4SI_type_node; break; + case HImode: case V8HImode: result = bool_V8HI_type_node; break; + case QImode: case V16QImode: result = bool_V16QI_type_node; + default: break; + } + break; + case 'p': + switch (mode) + { + case V8HImode: result = pixel_V8HI_type_node; + default: break; + } + default: break; + } + + /* Propagate qualifiers attached to the element type + onto the vector type. */ + if (result && result != type && TYPE_QUALS (type)) + result = build_qualified_type (result, TYPE_QUALS (type)); + + *no_add_attrs = true; /* No need to hang on to the attribute. */ + + if (result) + *node = lang_hooks.types.reconstruct_complex_type (*node, result); + + return NULL_TREE; +} + +/* AltiVec defines four built-in scalar types that serve as vector + elements; we must teach the compiler how to mangle them. */ + +static const char * +rs6000_mangle_type (const_tree type) +{ + type = TYPE_MAIN_VARIANT (type); + + if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE + && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) + return NULL; + + if (type == bool_char_type_node) return "U6__boolc"; + if (type == bool_short_type_node) return "U6__bools"; + if (type == pixel_type_node) return "u7__pixel"; + if (type == bool_int_type_node) return "U6__booli"; + if (type == bool_long_type_node) return "U6__booll"; + + /* Use a unique name for __float128 rather than trying to use "e" or "g". Use + "g" for IBM extended double, no matter whether it is long double (using + -mabi=ibmlongdouble) or the distinct __ibm128 type. */ + if (TARGET_FLOAT128_TYPE) + { + if (type == ieee128_float_type_node) + return "U10__float128"; + + if (type == ibm128_float_type_node) + return "g"; + + if (type == long_double_type_node && TARGET_LONG_DOUBLE_128) + return (TARGET_IEEEQUAD) ? "U10__float128" : "g"; + } + + /* Mangle IBM extended float long double as `g' (__float128) on + powerpc*-linux where long-double-64 previously was the default. */ + if (TYPE_MAIN_VARIANT (type) == long_double_type_node + && TARGET_ELF + && TARGET_LONG_DOUBLE_128 + && !TARGET_IEEEQUAD) + return "g"; + + /* For all other types, use normal C++ mangling. */ + return NULL; +} + +/* Handle a "longcall" or "shortcall" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +rs6000_handle_longcall_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Set longcall attributes on all functions declared when + rs6000_default_long_calls is true. */ +static void +rs6000_set_default_type_attributes (tree type) +{ + if (rs6000_default_long_calls + && (TREE_CODE (type) == FUNCTION_TYPE + || TREE_CODE (type) == METHOD_TYPE)) + TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"), + NULL_TREE, + TYPE_ATTRIBUTES (type)); + +#if TARGET_MACHO + darwin_set_default_type_attributes (type); +#endif +} + +/* Return a reference suitable for calling a function with the + longcall attribute. */ + +rtx +rs6000_longcall_ref (rtx call_ref) +{ + const char *call_name; + tree node; + + if (GET_CODE (call_ref) != SYMBOL_REF) + return call_ref; + + /* System V adds '.' to the internal name, so skip them. */ + call_name = XSTR (call_ref, 0); + if (*call_name == '.') + { + while (*call_name == '.') + call_name++; + + node = get_identifier (call_name); + call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node)); + } + + return force_reg (Pmode, call_ref); +} + +#ifndef TARGET_USE_MS_BITFIELD_LAYOUT +#define TARGET_USE_MS_BITFIELD_LAYOUT 0 +#endif + +/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +rs6000_handle_struct_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + tree *type = NULL; + if (DECL_P (*node)) + { + if (TREE_CODE (*node) == TYPE_DECL) + type = &TREE_TYPE (*node); + } + else + type = node; + + if (!(type && (TREE_CODE (*type) == RECORD_TYPE + || TREE_CODE (*type) == UNION_TYPE))) + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + + else if ((is_attribute_p ("ms_struct", name) + && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) + || ((is_attribute_p ("gcc_struct", name) + && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) + { + warning (OPT_Wattributes, "%qE incompatible attribute ignored", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +static bool +rs6000_ms_bitfield_layout_p (const_tree record_type) +{ + return (TARGET_USE_MS_BITFIELD_LAYOUT && + !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) + || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); +} + +#ifdef USING_ELFOS_H + +/* A get_unnamed_section callback, used for switching to toc_section. */ + +static void +rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) +{ + if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && TARGET_MINIMAL_TOC) + { + if (!toc_initialized) + { + fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); + ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0); + fprintf (asm_out_file, "\t.tc "); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],"); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); + fprintf (asm_out_file, "\n"); + + fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); + ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); + fprintf (asm_out_file, " = .+32768\n"); + toc_initialized = 1; + } + else + fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); + } + else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { + fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); + if (!toc_initialized) + { + ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + toc_initialized = 1; + } + } + else + { + fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); + if (!toc_initialized) + { + ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); + fprintf (asm_out_file, " = .+32768\n"); + toc_initialized = 1; + } + } +} + +/* Implement TARGET_ASM_INIT_SECTIONS. */ + +static void +rs6000_elf_asm_init_sections (void) +{ + toc_section + = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL); + + sdata2_section + = get_unnamed_section (SECTION_WRITE, output_section_asm_op, + SDATA2_SECTION_ASM_OP); +} + +/* Implement TARGET_SELECT_RTX_SECTION. */ + +static section * +rs6000_elf_select_rtx_section (machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) +{ + if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) + return toc_section; + else + return default_elf_select_rtx_section (mode, x, align); +} + +/* For a SYMBOL_REF, set generic flags and then perform some + target-specific processing. + + When the AIX ABI is requested on a non-AIX system, replace the + function name with the real name (with a leading .) rather than the + function descriptor name. This saves a lot of overriding code to + read the prefixes. */ + +static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; +static void +rs6000_elf_encode_section_info (tree decl, rtx rtl, int first) +{ + default_encode_section_info (decl, rtl, first); + + if (first + && TREE_CODE (decl) == FUNCTION_DECL + && !TARGET_AIX + && DEFAULT_ABI == ABI_AIX) + { + rtx sym_ref = XEXP (rtl, 0); + size_t len = strlen (XSTR (sym_ref, 0)); + char *str = XALLOCAVEC (char, len + 2); + str[0] = '.'; + memcpy (str + 1, XSTR (sym_ref, 0), len + 1); + XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1); + } +} + +static inline bool +compare_section_name (const char *section, const char *templ) +{ + int len; + + len = strlen (templ); + return (strncmp (section, templ, len) == 0 + && (section[len] == 0 || section[len] == '.')); +} + +bool +rs6000_elf_in_small_data_p (const_tree decl) +{ + if (rs6000_sdata == SDATA_NONE) + return false; + + /* We want to merge strings, so we never consider them small data. */ + if (TREE_CODE (decl) == STRING_CST) + return false; + + /* Functions are never in the small data area. */ + if (TREE_CODE (decl) == FUNCTION_DECL) + return false; + + if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl)) + { + const char *section = DECL_SECTION_NAME (decl); + if (compare_section_name (section, ".sdata") + || compare_section_name (section, ".sdata2") + || compare_section_name (section, ".gnu.linkonce.s") + || compare_section_name (section, ".sbss") + || compare_section_name (section, ".sbss2") + || compare_section_name (section, ".gnu.linkonce.sb") + || strcmp (section, ".PPC.EMB.sdata0") == 0 + || strcmp (section, ".PPC.EMB.sbss0") == 0) + return true; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl)); + + if (size > 0 + && size <= g_switch_value + /* If it's not public, and we're not going to reference it there, + there's no need to put it in the small data section. */ + && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl))) + return true; + } + + return false; +} + +#endif /* USING_ELFOS_H */ + +/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */ + +static bool +rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x) +{ + return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode); +} + +/* Do not place thread-local symbols refs in the object blocks. */ + +static bool +rs6000_use_blocks_for_decl_p (const_tree decl) +{ + return !DECL_THREAD_LOCAL_P (decl); +} + +/* Return a REG that occurs in ADDR with coefficient 1. + ADDR can be effectively incremented by incrementing REG. + + r0 is special and we must not select it as an address + register by this routine since our caller will try to + increment the returned register via an "la" instruction. */ + +rtx +find_addr_reg (rtx addr) +{ + while (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 0)) == REG + && REGNO (XEXP (addr, 0)) != 0) + addr = XEXP (addr, 0); + else if (GET_CODE (XEXP (addr, 1)) == REG + && REGNO (XEXP (addr, 1)) != 0) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 0))) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 1))) + addr = XEXP (addr, 0); + else + gcc_unreachable (); + } + gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0); + return addr; +} + +void +rs6000_fatal_bad_address (rtx op) +{ + fatal_insn ("bad address", op); +} + +#if TARGET_MACHO + +typedef struct branch_island_d { + tree function_name; + tree label_name; + int line_number; +} branch_island; + + +static vec<branch_island, va_gc> *branch_islands; + +/* Remember to generate a branch island for far calls to the given + function. */ + +static void +add_compiler_branch_island (tree label_name, tree function_name, + int line_number) +{ + branch_island bi = {function_name, label_name, line_number}; + vec_safe_push (branch_islands, bi); +} + +/* Generate far-jump branch islands for everything recorded in + branch_islands. Invoked immediately after the last instruction of + the epilogue has been emitted; the branch islands must be appended + to, and contiguous with, the function body. Mach-O stubs are + generated in machopic_output_stub(). */ + +static void +macho_branch_islands (void) +{ + char tmp_buf[512]; + + while (!vec_safe_is_empty (branch_islands)) + { + branch_island *bi = &branch_islands->last (); + const char *label = IDENTIFIER_POINTER (bi->label_name); + const char *name = IDENTIFIER_POINTER (bi->function_name); + char name_buf[512]; + /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */ + if (name[0] == '*' || name[0] == '&') + strcpy (name_buf, name+1); + else + { + name_buf[0] = '_'; + strcpy (name_buf+1, name); + } + strcpy (tmp_buf, "\n"); + strcat (tmp_buf, label); +#if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO) + if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) + dbxout_stabd (N_SLINE, bi->line_number); +#endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */ + if (flag_pic) + { + if (TARGET_LINK_STACK) + { + char name[32]; + get_ppc476_thunk_name (name); + strcat (tmp_buf, ":\n\tmflr r0\n\tbl "); + strcat (tmp_buf, name); + strcat (tmp_buf, "\n"); + strcat (tmp_buf, label); + strcat (tmp_buf, "_pic:\n\tmflr r11\n"); + } + else + { + strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,"); + strcat (tmp_buf, label); + strcat (tmp_buf, "_pic\n"); + strcat (tmp_buf, label); + strcat (tmp_buf, "_pic:\n\tmflr r11\n"); + } + + strcat (tmp_buf, "\taddis r11,r11,ha16("); + strcat (tmp_buf, name_buf); + strcat (tmp_buf, " - "); + strcat (tmp_buf, label); + strcat (tmp_buf, "_pic)\n"); + + strcat (tmp_buf, "\tmtlr r0\n"); + + strcat (tmp_buf, "\taddi r12,r11,lo16("); + strcat (tmp_buf, name_buf); + strcat (tmp_buf, " - "); + strcat (tmp_buf, label); + strcat (tmp_buf, "_pic)\n"); + + strcat (tmp_buf, "\tmtctr r12\n\tbctr\n"); + } + else + { + strcat (tmp_buf, ":\nlis r12,hi16("); + strcat (tmp_buf, name_buf); + strcat (tmp_buf, ")\n\tori r12,r12,lo16("); + strcat (tmp_buf, name_buf); + strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr"); + } + output_asm_insn (tmp_buf, 0); +#if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO) + if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) + dbxout_stabd (N_SLINE, bi->line_number); +#endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */ + branch_islands->pop (); + } +} + +/* NO_PREVIOUS_DEF checks in the link list whether the function name is + already there or not. */ + +static int +no_previous_def (tree function_name) +{ + branch_island *bi; + unsigned ix; + + FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) + if (function_name == bi->function_name) + return 0; + return 1; +} + +/* GET_PREV_LABEL gets the label name from the previous definition of + the function. */ + +static tree +get_prev_label (tree function_name) +{ + branch_island *bi; + unsigned ix; + + FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) + if (function_name == bi->function_name) + return bi->label_name; + return NULL_TREE; +} + +/* INSN is either a function call or a millicode call. It may have an + unconditional jump in its delay slot. + + CALL_DEST is the routine we are calling. */ + +char * +output_call (rtx_insn *insn, rtx *operands, int dest_operand_number, + int cookie_operand_number) +{ + static char buf[256]; + if (darwin_emit_branch_islands + && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF + && (INTVAL (operands[cookie_operand_number]) & CALL_LONG)) + { + tree labelname; + tree funname = get_identifier (XSTR (operands[dest_operand_number], 0)); + + if (no_previous_def (funname)) + { + rtx label_rtx = gen_label_rtx (); + char *label_buf, temp_buf[256]; + ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L", + CODE_LABEL_NUMBER (label_rtx)); + label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf; + labelname = get_identifier (label_buf); + add_compiler_branch_island (labelname, funname, insn_line (insn)); + } + else + labelname = get_prev_label (funname); + + /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl' + instruction will reach 'foo', otherwise link as 'bl L42'". + "L42" should be a 'branch island', that will do a far jump to + 'foo'. Branch islands are generated in + macho_branch_islands(). */ + sprintf (buf, "jbsr %%z%d,%.246s", + dest_operand_number, IDENTIFIER_POINTER (labelname)); + } + else + sprintf (buf, "bl %%z%d", dest_operand_number); + return buf; +} + +/* Generate PIC and indirect symbol stubs. */ + +void +machopic_output_stub (FILE *file, const char *symb, const char *stub) +{ + unsigned int length; + char *symbol_name, *lazy_ptr_name; + char *local_label_0; + static int label = 0; + + /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ + symb = (*targetm.strip_name_encoding) (symb); + + + length = strlen (symb); + symbol_name = XALLOCAVEC (char, length + 32); + GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); + + lazy_ptr_name = XALLOCAVEC (char, length + 32); + GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length); + + if (flag_pic == 2) + switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]); + else + switch_to_section (darwin_sections[machopic_symbol_stub1_section]); + + if (flag_pic == 2) + { + fprintf (file, "\t.align 5\n"); + + fprintf (file, "%s:\n", stub); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + + label++; + local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\"")); + sprintf (local_label_0, "\"L%011d$spb\"", label); + + fprintf (file, "\tmflr r0\n"); + if (TARGET_LINK_STACK) + { + char name[32]; + get_ppc476_thunk_name (name); + fprintf (file, "\tbl %s\n", name); + fprintf (file, "%s:\n\tmflr r11\n", local_label_0); + } + else + { + fprintf (file, "\tbcl 20,31,%s\n", local_label_0); + fprintf (file, "%s:\n\tmflr r11\n", local_label_0); + } + fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n", + lazy_ptr_name, local_label_0); + fprintf (file, "\tmtlr r0\n"); + fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n", + (TARGET_64BIT ? "ldu" : "lwzu"), + lazy_ptr_name, local_label_0); + fprintf (file, "\tmtctr r12\n"); + fprintf (file, "\tbctr\n"); + } + else + { + fprintf (file, "\t.align 4\n"); + + fprintf (file, "%s:\n", stub); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + + fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name); + fprintf (file, "\t%s r12,lo16(%s)(r11)\n", + (TARGET_64BIT ? "ldu" : "lwzu"), + lazy_ptr_name); + fprintf (file, "\tmtctr r12\n"); + fprintf (file, "\tbctr\n"); + } + + switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); + fprintf (file, "%s:\n", lazy_ptr_name); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + fprintf (file, "%sdyld_stub_binding_helper\n", + (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t")); +} + +/* Legitimize PIC addresses. If the address is already + position-independent, we return ORIG. Newly generated + position-independent addresses go into a reg. This is REG if non + zero, otherwise we allocate register(s) as necessary. */ + +#define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000) + +rtx +rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode, + rtx reg) +{ + rtx base, offset; + + if (reg == NULL && ! reload_in_progress && ! reload_completed) + reg = gen_reg_rtx (Pmode); + + if (GET_CODE (orig) == CONST) + { + rtx reg_temp; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) + return orig; + + gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); + + /* Use a different reg for the intermediate value, as + it will be marked UNCHANGING. */ + reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode); + base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), + Pmode, reg_temp); + offset = + rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), + Pmode, reg); + + if (GET_CODE (offset) == CONST_INT) + { + if (SMALL_INT (offset)) + return plus_constant (Pmode, base, INTVAL (offset)); + else if (! reload_in_progress && ! reload_completed) + offset = force_reg (Pmode, offset); + else + { + rtx mem = force_const_mem (Pmode, orig); + return machopic_legitimize_pic_address (mem, Pmode, reg); + } + } + return gen_rtx_PLUS (Pmode, base, offset); + } + + /* Fall back on generic machopic code. */ + return machopic_legitimize_pic_address (orig, mode, reg); +} + +/* Output a .machine directive for the Darwin assembler, and call + the generic start_file routine. */ + +static void +rs6000_darwin_file_start (void) +{ + static const struct + { + const char *arg; + const char *name; + HOST_WIDE_INT if_set; + } mapping[] = { + { "ppc64", "ppc64", MASK_64BIT }, + { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 }, + { "power4", "ppc970", 0 }, + { "G5", "ppc970", 0 }, + { "7450", "ppc7450", 0 }, + { "7400", "ppc7400", MASK_ALTIVEC }, + { "G4", "ppc7400", 0 }, + { "750", "ppc750", 0 }, + { "740", "ppc750", 0 }, + { "G3", "ppc750", 0 }, + { "604e", "ppc604e", 0 }, + { "604", "ppc604", 0 }, + { "603e", "ppc603", 0 }, + { "603", "ppc603", 0 }, + { "601", "ppc601", 0 }, + { NULL, "ppc", 0 } }; + const char *cpu_id = ""; + size_t i; + + rs6000_file_start (); + darwin_file_start (); + + /* Determine the argument to -mcpu=. Default to G3 if not specified. */ + + if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') + cpu_id = rs6000_default_cpu; + + if (global_options_set.x_rs6000_cpu_index) + cpu_id = processor_target_table[rs6000_cpu_index].name; + + /* Look through the mapping array. Pick the first name that either + matches the argument, has a bit set in IF_SET that is also set + in the target flags, or has a NULL name. */ + + i = 0; + while (mapping[i].arg != NULL + && strcmp (mapping[i].arg, cpu_id) != 0 + && (mapping[i].if_set & rs6000_isa_flags) == 0) + i++; + + fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name); +} + +#endif /* TARGET_MACHO */ + +#if TARGET_ELF +static int +rs6000_elf_reloc_rw_mask (void) +{ + if (flag_pic) + return 3; + else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + return 2; + else + return 0; +} + +/* Record an element in the table of global constructors. SYMBOL is + a SYMBOL_REF of the function to be called; PRIORITY is a number + between 0 and MAX_INIT_PRIORITY. + + This differs from default_named_section_asm_out_constructor in + that we have special handling for -mrelocatable. */ + +static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED; +static void +rs6000_elf_asm_out_constructor (rtx symbol, int priority) +{ + const char *section = ".ctors"; + char buf[18]; + + if (priority != DEFAULT_INIT_PRIORITY) + { + sprintf (buf, ".ctors.%.5u", + /* Invert the numbering so the linker puts us in the proper + order; constructors are run from right to left, and the + linker sorts in increasing order. */ + MAX_INIT_PRIORITY - priority); + section = buf; + } + + switch_to_section (get_section (section, SECTION_WRITE, NULL)); + assemble_align (POINTER_SIZE); + + if (DEFAULT_ABI == ABI_V4 + && (TARGET_RELOCATABLE || flag_pic > 1)) + { + fputs ("\t.long (", asm_out_file); + output_addr_const (asm_out_file, symbol); + fputs (")@fixup\n", asm_out_file); + } + else + assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); +} + +static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED; +static void +rs6000_elf_asm_out_destructor (rtx symbol, int priority) +{ + const char *section = ".dtors"; + char buf[18]; + + if (priority != DEFAULT_INIT_PRIORITY) + { + sprintf (buf, ".dtors.%.5u", + /* Invert the numbering so the linker puts us in the proper + order; constructors are run from right to left, and the + linker sorts in increasing order. */ + MAX_INIT_PRIORITY - priority); + section = buf; + } + + switch_to_section (get_section (section, SECTION_WRITE, NULL)); + assemble_align (POINTER_SIZE); + + if (DEFAULT_ABI == ABI_V4 + && (TARGET_RELOCATABLE || flag_pic > 1)) + { + fputs ("\t.long (", asm_out_file); + output_addr_const (asm_out_file, symbol); + fputs (")@fixup\n", asm_out_file); + } + else + assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); +} + +void +rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl) +{ + if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2) + { + fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file); + ASM_OUTPUT_LABEL (file, name); + fputs (DOUBLE_INT_ASM_OP, file); + rs6000_output_function_entry (file, name); + fputs (",.TOC.@tocbase,0\n\t.previous\n", file); + if (DOT_SYMBOLS) + { + fputs ("\t.size\t", file); + assemble_name (file, name); + fputs (",24\n\t.type\t.", file); + assemble_name (file, name); + fputs (",@function\n", file); + if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl)) + { + fputs ("\t.globl\t.", file); + assemble_name (file, name); + putc ('\n', file); + } + } + else + ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); + ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); + rs6000_output_function_entry (file, name); + fputs (":\n", file); + return; + } + + if (DEFAULT_ABI == ABI_V4 + && (TARGET_RELOCATABLE || flag_pic > 1) + && !TARGET_SECURE_PLT + && (!constant_pool_empty_p () || crtl->profile) + && uses_TOC ()) + { + char buf[256]; + + (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); + + fprintf (file, "\t.long "); + assemble_name (file, toc_label_name); + need_toc_init = 1; + putc ('-', file); + ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); + assemble_name (file, buf); + putc ('\n', file); + } + + ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); + ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); + + if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ()) + { + char buf[256]; + + (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); + + fprintf (file, "\t.quad .TOC.-"); + ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); + assemble_name (file, buf); + putc ('\n', file); + } + + if (DEFAULT_ABI == ABI_AIX) + { + const char *desc_name, *orig_name; + + orig_name = (*targetm.strip_name_encoding) (name); + desc_name = orig_name; + while (*desc_name == '.') + desc_name++; + + if (TREE_PUBLIC (decl)) + fprintf (file, "\t.globl %s\n", desc_name); + + fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); + fprintf (file, "%s:\n", desc_name); + fprintf (file, "\t.long %s\n", orig_name); + fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file); + fputs ("\t.long 0\n", file); + fprintf (file, "\t.previous\n"); + } + ASM_OUTPUT_LABEL (file, name); +} + +static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED; +static void +rs6000_elf_file_end (void) +{ +#ifdef HAVE_AS_GNU_ATTRIBUTE + /* ??? The value emitted depends on options active at file end. + Assume anyone using #pragma or attributes that might change + options knows what they are doing. */ + if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4) + && rs6000_passes_float) + { + int fp; + + if (TARGET_DF_FPR | TARGET_DF_SPE) + fp = 1; + else if (TARGET_SF_FPR | TARGET_SF_SPE) + fp = 3; + else + fp = 2; + if (rs6000_passes_long_double) + { + if (!TARGET_LONG_DOUBLE_128) + fp |= 2 * 4; + else if (TARGET_IEEEQUAD) + fp |= 3 * 4; + else + fp |= 1 * 4; + } + fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp); + } + if (TARGET_32BIT && DEFAULT_ABI == ABI_V4) + { + if (rs6000_passes_vector) + fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", + (TARGET_ALTIVEC_ABI ? 2 + : TARGET_SPE_ABI ? 3 + : 1)); + if (rs6000_returns_struct) + fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n", + aix_struct_return ? 2 : 1); + } +#endif +#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) + if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2) + file_end_indicate_exec_stack (); +#endif + + if (flag_split_stack) + file_end_indicate_split_stack (); + + if (cpu_builtin_p) + { + /* We have expanded a CPU builtin, so we need to emit a reference to + the special symbol that LIBC uses to declare it supports the + AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */ + switch_to_section (data_section); + fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3); + fprintf (asm_out_file, "\t%s %s\n", + TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol); + } +} +#endif + +#if TARGET_XCOFF + +#ifndef HAVE_XCOFF_DWARF_EXTRAS +#define HAVE_XCOFF_DWARF_EXTRAS 0 +#endif + +static enum unwind_info_type +rs6000_xcoff_debug_unwind_info (void) +{ + return UI_NONE; +} + +static void +rs6000_xcoff_asm_output_anchor (rtx symbol) +{ + char buffer[100]; + + sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC, + SYMBOL_REF_BLOCK_OFFSET (symbol)); + fprintf (asm_out_file, "%s", SET_ASM_OP); + RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0)); + fprintf (asm_out_file, ","); + RS6000_OUTPUT_BASENAME (asm_out_file, buffer); + fprintf (asm_out_file, "\n"); +} + +static void +rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name) +{ + fputs (GLOBAL_ASM_OP, stream); + RS6000_OUTPUT_BASENAME (stream, name); + putc ('\n', stream); +} + +/* A get_unnamed_decl callback, used for read-only sections. PTR + points to the section string variable. */ + +static void +rs6000_xcoff_output_readonly_section_asm_op (const void *directive) +{ + fprintf (asm_out_file, "\t.csect %s[RO],%s\n", + *(const char *const *) directive, + XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); +} + +/* Likewise for read-write sections. */ + +static void +rs6000_xcoff_output_readwrite_section_asm_op (const void *directive) +{ + fprintf (asm_out_file, "\t.csect %s[RW],%s\n", + *(const char *const *) directive, + XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); +} + +static void +rs6000_xcoff_output_tls_section_asm_op (const void *directive) +{ + fprintf (asm_out_file, "\t.csect %s[TL],%s\n", + *(const char *const *) directive, + XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); +} + +/* A get_unnamed_section callback, used for switching to toc_section. */ + +static void +rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) +{ + if (TARGET_MINIMAL_TOC) + { + /* toc_section is always selected at least once from + rs6000_xcoff_file_start, so this is guaranteed to + always be defined once and only once in each file. */ + if (!toc_initialized) + { + fputs ("\t.toc\nLCTOC..1:\n", asm_out_file); + fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file); + toc_initialized = 1; + } + fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n", + (TARGET_32BIT ? "" : ",3")); + } + else + fputs ("\t.toc\n", asm_out_file); +} + +/* Implement TARGET_ASM_INIT_SECTIONS. */ + +static void +rs6000_xcoff_asm_init_sections (void) +{ + read_only_data_section + = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, + &xcoff_read_only_section_name); + + private_data_section + = get_unnamed_section (SECTION_WRITE, + rs6000_xcoff_output_readwrite_section_asm_op, + &xcoff_private_data_section_name); + + tls_data_section + = get_unnamed_section (SECTION_TLS, + rs6000_xcoff_output_tls_section_asm_op, + &xcoff_tls_data_section_name); + + tls_private_data_section + = get_unnamed_section (SECTION_TLS, + rs6000_xcoff_output_tls_section_asm_op, + &xcoff_private_data_section_name); + + read_only_private_data_section + = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, + &xcoff_private_data_section_name); + + toc_section + = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL); + + readonly_data_section = read_only_data_section; +} + +static int +rs6000_xcoff_reloc_rw_mask (void) +{ + return 3; +} + +static void +rs6000_xcoff_asm_named_section (const char *name, unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + int smclass; + static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" }; + + if (flags & SECTION_EXCLUDE) + smclass = 4; + else if (flags & SECTION_DEBUG) + { + fprintf (asm_out_file, "\t.dwsect %s\n", name); + return; + } + else if (flags & SECTION_CODE) + smclass = 0; + else if (flags & SECTION_TLS) + smclass = 3; + else if (flags & SECTION_WRITE) + smclass = 2; + else + smclass = 1; + + fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n", + (flags & SECTION_CODE) ? "." : "", + name, suffix[smclass], flags & SECTION_ENTSIZE); +} + +#define IN_NAMED_SECTION(DECL) \ + ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ + && DECL_SECTION_NAME (DECL) != NULL) + +static section * +rs6000_xcoff_select_section (tree decl, int reloc, + unsigned HOST_WIDE_INT align) +{ + /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into + named section. */ + if (align > BIGGEST_ALIGNMENT) + { + resolve_unique_section (decl, reloc, true); + if (IN_NAMED_SECTION (decl)) + return get_named_section (decl, NULL, reloc); + } + + if (decl_readonly_section (decl, reloc)) + { + if (TREE_PUBLIC (decl)) + return read_only_data_section; + else + return read_only_private_data_section; + } + else + { +#if HAVE_AS_TLS + if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) + { + if (TREE_PUBLIC (decl)) + return tls_data_section; + else if (bss_initializer_p (decl)) + { + /* Convert to COMMON to emit in BSS. */ + DECL_COMMON (decl) = 1; + return tls_comm_section; + } + else + return tls_private_data_section; + } + else +#endif + if (TREE_PUBLIC (decl)) + return data_section; + else + return private_data_section; + } +} + +static void +rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED) +{ + const char *name; + + /* Use select_section for private data and uninitialized data with + alignment <= BIGGEST_ALIGNMENT. */ + if (!TREE_PUBLIC (decl) + || DECL_COMMON (decl) + || (DECL_INITIAL (decl) == NULL_TREE + && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT) + || DECL_INITIAL (decl) == error_mark_node + || (flag_zero_initialized_in_bss + && initializer_zerop (DECL_INITIAL (decl)))) + return; + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = (*targetm.strip_name_encoding) (name); + set_decl_section_name (decl, name); +} + +/* Select section for constant in constant pool. + + On RS/6000, all constants are in the private read-only data area. + However, if this is being placed in the TOC it must be output as a + toc entry. */ + +static section * +rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) +{ + if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) + return toc_section; + else + return read_only_private_data_section; +} + +/* Remove any trailing [DS] or the like from the symbol name. */ + +static const char * +rs6000_xcoff_strip_name_encoding (const char *name) +{ + size_t len; + if (*name == '*') + name++; + len = strlen (name); + if (name[len - 1] == ']') + return ggc_alloc_string (name, len - 4); + else + return name; +} + +/* Section attributes. AIX is always PIC. */ + +static unsigned int +rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int align; + unsigned int flags = default_section_type_flags (decl, name, reloc); + + /* Align to at least UNIT size. */ + if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl)) + align = MIN_UNITS_PER_WORD; + else + /* Increase alignment of large objects if not already stricter. */ + align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), + int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD + ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD); + + return flags | (exact_log2 (align) & SECTION_ENTSIZE); +} + +/* Output at beginning of assembler file. + + Initialize the section names for the RS/6000 at this point. + + Specify filename, including full path, to assembler. + + We want to go into the TOC section so at least one .toc will be emitted. + Also, in order to output proper .bs/.es pairs, we need at least one static + [RW] section emitted. + + Finally, declare mcount when profiling to make the assembler happy. */ + +static void +rs6000_xcoff_file_start (void) +{ + rs6000_gen_section_name (&xcoff_bss_section_name, + main_input_filename, ".bss_"); + rs6000_gen_section_name (&xcoff_private_data_section_name, + main_input_filename, ".rw_"); + rs6000_gen_section_name (&xcoff_read_only_section_name, + main_input_filename, ".ro_"); + rs6000_gen_section_name (&xcoff_tls_data_section_name, + main_input_filename, ".tls_"); + rs6000_gen_section_name (&xcoff_tbss_section_name, + main_input_filename, ".tbss_[UL]"); + + fputs ("\t.file\t", asm_out_file); + output_quoted_string (asm_out_file, main_input_filename); + fputc ('\n', asm_out_file); + if (write_symbols != NO_DEBUG) + switch_to_section (private_data_section); + switch_to_section (toc_section); + switch_to_section (text_section); + if (profile_flag) + fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT); + rs6000_file_start (); +} + +/* Output at end of assembler file. + On the RS/6000, referencing data should automatically pull in text. */ + +static void +rs6000_xcoff_file_end (void) +{ + switch_to_section (text_section); + fputs ("_section_.text:\n", asm_out_file); + switch_to_section (data_section); + fputs (TARGET_32BIT + ? "\t.long _section_.text\n" : "\t.llong _section_.text\n", + asm_out_file); +} + +struct declare_alias_data +{ + FILE *file; + bool function_descriptor; +}; + +/* Declare alias N. A helper function for for_node_and_aliases. */ + +static bool +rs6000_declare_alias (struct symtab_node *n, void *d) +{ + struct declare_alias_data *data = (struct declare_alias_data *)d; + /* Main symbol is output specially, because varasm machinery does part of + the job for us - we do not need to declare .globl/lglobs and such. */ + if (!n->alias || n->weakref) + return false; + + if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl))) + return false; + + /* Prevent assemble_alias from trying to use .set pseudo operation + that does not behave as expected by the middle-end. */ + TREE_ASM_WRITTEN (n->decl) = true; + + const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)); + char *buffer = (char *) alloca (strlen (name) + 2); + char *p; + int dollar_inside = 0; + + strcpy (buffer, name); + p = strchr (buffer, '$'); + while (p) { + *p = '_'; + dollar_inside++; + p = strchr (p + 1, '$'); + } + if (TREE_PUBLIC (n->decl)) + { + if (!RS6000_WEAK || !DECL_WEAK (n->decl)) + { + if (dollar_inside) { + if (data->function_descriptor) + fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); + fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); + } + if (data->function_descriptor) + { + fputs ("\t.globl .", data->file); + RS6000_OUTPUT_BASENAME (data->file, buffer); + putc ('\n', data->file); + } + fputs ("\t.globl ", data->file); + RS6000_OUTPUT_BASENAME (data->file, buffer); + putc ('\n', data->file); + } +#ifdef ASM_WEAKEN_DECL + else if (DECL_WEAK (n->decl) && !data->function_descriptor) + ASM_WEAKEN_DECL (data->file, n->decl, name, NULL); +#endif + } + else + { + if (dollar_inside) + { + if (data->function_descriptor) + fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); + fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); + } + if (data->function_descriptor) + { + fputs ("\t.lglobl .", data->file); + RS6000_OUTPUT_BASENAME (data->file, buffer); + putc ('\n', data->file); + } + fputs ("\t.lglobl ", data->file); + RS6000_OUTPUT_BASENAME (data->file, buffer); + putc ('\n', data->file); + } + if (data->function_descriptor) + fputs (".", data->file); + RS6000_OUTPUT_BASENAME (data->file, buffer); + fputs (":\n", data->file); + return false; +} + + +#ifdef HAVE_GAS_HIDDEN +/* Helper function to calculate visibility of a DECL + and return the value as a const string. */ + +static const char * +rs6000_xcoff_visibility (tree decl) +{ + static const char * const visibility_types[] = { + "", ",protected", ",hidden", ",internal" + }; + + enum symbol_visibility vis = DECL_VISIBILITY (decl); + + if (TREE_CODE (decl) == FUNCTION_DECL + && cgraph_node::get (decl) + && cgraph_node::get (decl)->instrumentation_clone + && cgraph_node::get (decl)->instrumented_version) + vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl); + + return visibility_types[vis]; +} +#endif + + +/* This macro produces the initial definition of a function name. + On the RS/6000, we need to place an extra '.' in the function name and + output the function descriptor. + Dollar signs are converted to underscores. + + The csect for the function will have already been created when + text_section was selected. We do have to go back to that csect, however. + + The third and fourth parameters to the .function pseudo-op (16 and 044) + are placeholders which no longer have any use. + + Because AIX assembler's .set command has unexpected semantics, we output + all aliases as alternative labels in front of the definition. */ + +void +rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl) +{ + char *buffer = (char *) alloca (strlen (name) + 1); + char *p; + int dollar_inside = 0; + struct declare_alias_data data = {file, false}; + + strcpy (buffer, name); + p = strchr (buffer, '$'); + while (p) { + *p = '_'; + dollar_inside++; + p = strchr (p + 1, '$'); + } + if (TREE_PUBLIC (decl)) + { + if (!RS6000_WEAK || !DECL_WEAK (decl)) + { + if (dollar_inside) { + fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); + fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); + } + fputs ("\t.globl .", file); + RS6000_OUTPUT_BASENAME (file, buffer); +#ifdef HAVE_GAS_HIDDEN + fputs (rs6000_xcoff_visibility (decl), file); +#endif + putc ('\n', file); + } + } + else + { + if (dollar_inside) { + fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); + fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); + } + fputs ("\t.lglobl .", file); + RS6000_OUTPUT_BASENAME (file, buffer); + putc ('\n', file); + } + fputs ("\t.csect ", file); + RS6000_OUTPUT_BASENAME (file, buffer); + fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file); + RS6000_OUTPUT_BASENAME (file, buffer); + fputs (":\n", file); + symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, + &data, true); + fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file); + RS6000_OUTPUT_BASENAME (file, buffer); + fputs (", TOC[tc0], 0\n", file); + in_section = NULL; + switch_to_section (function_section (decl)); + putc ('.', file); + RS6000_OUTPUT_BASENAME (file, buffer); + fputs (":\n", file); + data.function_descriptor = true; + symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, + &data, true); + if (!DECL_IGNORED_P (decl)) + { + if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) + xcoffout_declare_function (file, decl, buffer); + else if (write_symbols == DWARF2_DEBUG) + { + name = (*targetm.strip_name_encoding) (name); + fprintf (file, "\t.function .%s,.%s,2,0\n", name, name); + } + } + return; +} + + +/* Output assembly language to globalize a symbol from a DECL, + possibly with visibility. */ + +void +rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl) +{ + const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); + fputs (GLOBAL_ASM_OP, stream); + RS6000_OUTPUT_BASENAME (stream, name); +#ifdef HAVE_GAS_HIDDEN + fputs (rs6000_xcoff_visibility (decl), stream); +#endif + putc ('\n', stream); +} + +/* Output assembly language to define a symbol as COMMON from a DECL, + possibly with visibility. */ + +void +rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream, + tree decl ATTRIBUTE_UNUSED, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned HOST_WIDE_INT align) +{ + unsigned HOST_WIDE_INT align2 = 2; + + if (align > 32) + align2 = floor_log2 (align / BITS_PER_UNIT); + else if (size > 4) + align2 = 3; + + fputs (COMMON_ASM_OP, stream); + RS6000_OUTPUT_BASENAME (stream, name); + + fprintf (stream, + "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED, + size, align2); + +#ifdef HAVE_GAS_HIDDEN + fputs (rs6000_xcoff_visibility (decl), stream); +#endif + putc ('\n', stream); +} + +/* This macro produces the initial definition of a object (variable) name. + Because AIX assembler's .set command has unexpected semantics, we output + all aliases as alternative labels in front of the definition. */ + +void +rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl) +{ + struct declare_alias_data data = {file, false}; + RS6000_OUTPUT_BASENAME (file, name); + fputs (":\n", file); + symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, + &data, true); +} + +/* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */ + +void +rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label) +{ + fputs (integer_asm_op (size, FALSE), file); + assemble_name (file, label); + fputs ("-$", file); +} + +/* Output a symbol offset relative to the dbase for the current object. + We use __gcc_unwind_dbase as an arbitrary base for dbase and assume + signed offsets. + + __gcc_unwind_dbase is embedded in all executables/libraries through + libgcc/config/rs6000/crtdbase.S. */ + +void +rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label) +{ + fputs (integer_asm_op (size, FALSE), file); + assemble_name (file, label); + fputs("-__gcc_unwind_dbase", file); +} + +#ifdef HAVE_AS_TLS +static void +rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) +{ + rtx symbol; + int flags; + const char *symname; + + default_encode_section_info (decl, rtl, first); + + /* Careful not to prod global register variables. */ + if (!MEM_P (rtl)) + return; + symbol = XEXP (rtl, 0); + if (GET_CODE (symbol) != SYMBOL_REF) + return; + + flags = SYMBOL_REF_FLAGS (symbol); + + if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) + flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO; + + SYMBOL_REF_FLAGS (symbol) = flags; + + /* Append mapping class to extern decls. */ + symname = XSTR (symbol, 0); + if (decl /* sync condition with assemble_external () */ + && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) + && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl)) + || TREE_CODE (decl) == FUNCTION_DECL) + && symname[strlen (symname) - 1] != ']') + { + char *newname = (char *) alloca (strlen (symname) + 5); + strcpy (newname, symname); + strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL + ? "[DS]" : "[UA]")); + XSTR (symbol, 0) = ggc_strdup (newname); + } +} +#endif /* HAVE_AS_TLS */ +#endif /* TARGET_XCOFF */ + +void +rs6000_asm_weaken_decl (FILE *stream, tree decl, + const char *name, const char *val) +{ + fputs ("\t.weak\t", stream); + RS6000_OUTPUT_BASENAME (stream, name); + if (decl && TREE_CODE (decl) == FUNCTION_DECL + && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) + { + if (TARGET_XCOFF) + fputs ("[DS]", stream); +#if TARGET_XCOFF && HAVE_GAS_HIDDEN + if (TARGET_XCOFF) + fputs (rs6000_xcoff_visibility (decl), stream); +#endif + fputs ("\n\t.weak\t.", stream); + RS6000_OUTPUT_BASENAME (stream, name); + } +#if TARGET_XCOFF && HAVE_GAS_HIDDEN + if (TARGET_XCOFF) + fputs (rs6000_xcoff_visibility (decl), stream); +#endif + fputc ('\n', stream); + if (val) + { +#ifdef ASM_OUTPUT_DEF + ASM_OUTPUT_DEF (stream, name, val); +#endif + if (decl && TREE_CODE (decl) == FUNCTION_DECL + && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) + { + fputs ("\t.set\t.", stream); + RS6000_OUTPUT_BASENAME (stream, name); + fputs (",.", stream); + RS6000_OUTPUT_BASENAME (stream, val); + fputc ('\n', stream); + } + } +} + + +/* Return true if INSN should not be copied. */ + +static bool +rs6000_cannot_copy_insn_p (rtx_insn *insn) +{ + return recog_memoized (insn) >= 0 + && get_attr_cannot_copy (insn); +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, int *total, bool speed) +{ + int code = GET_CODE (x); + + switch (code) + { + /* On the RS/6000, if it is valid in the insn, it is free. */ + case CONST_INT: + if (((outer_code == SET + || outer_code == PLUS + || outer_code == MINUS) + && (satisfies_constraint_I (x) + || satisfies_constraint_L (x))) + || (outer_code == AND + && (satisfies_constraint_K (x) + || (mode == SImode + ? satisfies_constraint_L (x) + : satisfies_constraint_J (x)))) + || ((outer_code == IOR || outer_code == XOR) + && (satisfies_constraint_K (x) + || (mode == SImode + ? satisfies_constraint_L (x) + : satisfies_constraint_J (x)))) + || outer_code == ASHIFT + || outer_code == ASHIFTRT + || outer_code == LSHIFTRT + || outer_code == ROTATE + || outer_code == ROTATERT + || outer_code == ZERO_EXTRACT + || (outer_code == MULT + && satisfies_constraint_I (x)) + || ((outer_code == DIV || outer_code == UDIV + || outer_code == MOD || outer_code == UMOD) + && exact_log2 (INTVAL (x)) >= 0) + || (outer_code == COMPARE + && (satisfies_constraint_I (x) + || satisfies_constraint_K (x))) + || ((outer_code == EQ || outer_code == NE) + && (satisfies_constraint_I (x) + || satisfies_constraint_K (x) + || (mode == SImode + ? satisfies_constraint_L (x) + : satisfies_constraint_J (x)))) + || (outer_code == GTU + && satisfies_constraint_I (x)) + || (outer_code == LTU + && satisfies_constraint_P (x))) + { + *total = 0; + return true; + } + else if ((outer_code == PLUS + && reg_or_add_cint_operand (x, VOIDmode)) + || (outer_code == MINUS + && reg_or_sub_cint_operand (x, VOIDmode)) + || ((outer_code == SET + || outer_code == IOR + || outer_code == XOR) + && (INTVAL (x) + & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0)) + { + *total = COSTS_N_INSNS (1); + return true; + } + /* FALLTHRU */ + + case CONST_DOUBLE: + case CONST_WIDE_INT: + case CONST: + case HIGH: + case SYMBOL_REF: + *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); + return true; + + case MEM: + /* When optimizing for size, MEM should be slightly more expensive + than generating address, e.g., (plus (reg) (const)). + L1 cache latency is about two instructions. */ + *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); + if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x))) + *total += COSTS_N_INSNS (100); + return true; + + case LABEL_REF: + *total = 0; + return true; + + case PLUS: + case MINUS: + if (FLOAT_MODE_P (mode)) + *total = rs6000_cost->fp; + else + *total = COSTS_N_INSNS (1); + return false; + + case MULT: + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && satisfies_constraint_I (XEXP (x, 1))) + { + if (INTVAL (XEXP (x, 1)) >= -256 + && INTVAL (XEXP (x, 1)) <= 255) + *total = rs6000_cost->mulsi_const9; + else + *total = rs6000_cost->mulsi_const; + } + else if (mode == SFmode) + *total = rs6000_cost->fp; + else if (FLOAT_MODE_P (mode)) + *total = rs6000_cost->dmul; + else if (mode == DImode) + *total = rs6000_cost->muldi; + else + *total = rs6000_cost->mulsi; + return false; + + case FMA: + if (mode == SFmode) + *total = rs6000_cost->fp; + else + *total = rs6000_cost->dmul; + break; + + case DIV: + case MOD: + if (FLOAT_MODE_P (mode)) + { + *total = mode == DFmode ? rs6000_cost->ddiv + : rs6000_cost->sdiv; + return false; + } + /* FALLTHRU */ + + case UDIV: + case UMOD: + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && exact_log2 (INTVAL (XEXP (x, 1))) >= 0) + { + if (code == DIV || code == MOD) + /* Shift, addze */ + *total = COSTS_N_INSNS (2); + else + /* Shift */ + *total = COSTS_N_INSNS (1); + } + else + { + if (GET_MODE (XEXP (x, 1)) == DImode) + *total = rs6000_cost->divdi; + else + *total = rs6000_cost->divsi; + } + /* Add in shift and subtract for MOD unless we have a mod instruction. */ + if (!TARGET_MODULO && (code == MOD || code == UMOD)) + *total += COSTS_N_INSNS (2); + return false; + + case CTZ: + *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4); + return false; + + case FFS: + *total = COSTS_N_INSNS (4); + return false; + + case POPCOUNT: + *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6); + return false; + + case PARITY: + *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6); + return false; + + case NOT: + if (outer_code == AND || outer_code == IOR || outer_code == XOR) + *total = 0; + else + *total = COSTS_N_INSNS (1); + return false; + + case AND: + if (CONST_INT_P (XEXP (x, 1))) + { + rtx left = XEXP (x, 0); + rtx_code left_code = GET_CODE (left); + + /* rotate-and-mask: 1 insn. */ + if ((left_code == ROTATE + || left_code == ASHIFT + || left_code == LSHIFTRT) + && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode)) + { + *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed); + if (!CONST_INT_P (XEXP (left, 1))) + *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed); + *total += COSTS_N_INSNS (1); + return true; + } + + /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */ + HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); + if (rs6000_is_valid_and_mask (XEXP (x, 1), mode) + || (val & 0xffff) == val + || (val & 0xffff0000) == val + || ((val & 0xffff) == 0 && mode == SImode)) + { + *total = rtx_cost (left, mode, AND, 0, speed); + *total += COSTS_N_INSNS (1); + return true; + } + + /* 2 insns. */ + if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode)) + { + *total = rtx_cost (left, mode, AND, 0, speed); + *total += COSTS_N_INSNS (2); + return true; + } + } + + *total = COSTS_N_INSNS (1); + return false; + + case IOR: + /* FIXME */ + *total = COSTS_N_INSNS (1); + return true; + + case CLZ: + case XOR: + case ZERO_EXTRACT: + *total = COSTS_N_INSNS (1); + return false; + + case ASHIFT: + /* The EXTSWSLI instruction is a combined instruction. Don't count both + the sign extend and shift separately within the insn. */ + if (TARGET_EXTSWSLI && mode == DImode + && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode) + { + *total = 0; + return false; + } + /* fall through */ + + case ASHIFTRT: + case LSHIFTRT: + case ROTATE: + case ROTATERT: + /* Handle mul_highpart. */ + if (outer_code == TRUNCATE + && GET_CODE (XEXP (x, 0)) == MULT) + { + if (mode == DImode) + *total = rs6000_cost->muldi; + else + *total = rs6000_cost->mulsi; + return true; + } + else if (outer_code == AND) + *total = 0; + else + *total = COSTS_N_INSNS (1); + return false; + + case SIGN_EXTEND: + case ZERO_EXTEND: + if (GET_CODE (XEXP (x, 0)) == MEM) + *total = 0; + else + *total = COSTS_N_INSNS (1); + return false; + + case COMPARE: + case NEG: + case ABS: + if (!FLOAT_MODE_P (mode)) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case FLOAT: + case UNSIGNED_FLOAT: + case FIX: + case UNSIGNED_FIX: + case FLOAT_TRUNCATE: + *total = rs6000_cost->fp; + return false; + + case FLOAT_EXTEND: + if (mode == DFmode) + *total = rs6000_cost->sfdf_convert; + else + *total = rs6000_cost->fp; + return false; + + case UNSPEC: + switch (XINT (x, 1)) + { + case UNSPEC_FRSP: + *total = rs6000_cost->fp; + return true; + + default: + break; + } + break; + + case CALL: + case IF_THEN_ELSE: + if (!speed) + { + *total = COSTS_N_INSNS (1); + return true; + } + else if (FLOAT_MODE_P (mode) + && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS) + { + *total = rs6000_cost->fp; + return false; + } + break; + + case NE: + case EQ: + case GTU: + case LTU: + /* Carry bit requires mode == Pmode. + NEG or PLUS already counted so only add one. */ + if (mode == Pmode + && (outer_code == NEG || outer_code == PLUS)) + { + *total = COSTS_N_INSNS (1); + return true; + } + if (outer_code == SET) + { + if (XEXP (x, 1) == const0_rtx) + { + if (TARGET_ISEL && !TARGET_MFCRF) + *total = COSTS_N_INSNS (8); + else + *total = COSTS_N_INSNS (2); + return true; + } + else + { + *total = COSTS_N_INSNS (3); + return false; + } + } + /* FALLTHRU */ + + case GT: + case LT: + case UNORDERED: + if (outer_code == SET && (XEXP (x, 1) == const0_rtx)) + { + if (TARGET_ISEL && !TARGET_MFCRF) + *total = COSTS_N_INSNS (8); + else + *total = COSTS_N_INSNS (2); + return true; + } + /* CC COMPARE. */ + if (outer_code == COMPARE) + { + *total = 0; + return true; + } + break; + + default: + break; + } + + return false; +} + +/* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */ + +static bool +rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno, int *total, bool speed) +{ + bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed); + + fprintf (stderr, + "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, " + "opno = %d, total = %d, speed = %s, x:\n", + ret ? "complete" : "scan inner", + GET_MODE_NAME (mode), + GET_RTX_NAME (outer_code), + opno, + *total, + speed ? "true" : "false"); + + debug_rtx (x); + + return ret; +} + +/* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */ + +static int +rs6000_debug_address_cost (rtx x, machine_mode mode, + addr_space_t as, bool speed) +{ + int ret = TARGET_ADDRESS_COST (x, mode, as, speed); + + fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n", + ret, speed ? "true" : "false"); + debug_rtx (x); + + return ret; +} + + +/* A C expression returning the cost of moving data from a register of class + CLASS1 to one of CLASS2. */ + +static int +rs6000_register_move_cost (machine_mode mode, + reg_class_t from, reg_class_t to) +{ + int ret; + + if (TARGET_DEBUG_COST) + dbg_cost_ctrl++; + + /* Moves from/to GENERAL_REGS. */ + if (reg_classes_intersect_p (to, GENERAL_REGS) + || reg_classes_intersect_p (from, GENERAL_REGS)) + { + reg_class_t rclass = from; + + if (! reg_classes_intersect_p (to, GENERAL_REGS)) + rclass = to; + + if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) + ret = (rs6000_memory_move_cost (mode, rclass, false) + + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); + + /* It's more expensive to move CR_REGS than CR0_REGS because of the + shift. */ + else if (rclass == CR_REGS) + ret = 4; + + /* For those processors that have slow LR/CTR moves, make them more + expensive than memory in order to bias spills to memory .*/ + else if ((rs6000_cpu == PROCESSOR_POWER6 + || rs6000_cpu == PROCESSOR_POWER7 + || rs6000_cpu == PROCESSOR_POWER8 + || rs6000_cpu == PROCESSOR_POWER9) + && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) + ret = 6 * hard_regno_nregs[0][mode]; + + else + /* A move will cost one instruction per GPR moved. */ + ret = 2 * hard_regno_nregs[0][mode]; + } + + /* If we have VSX, we can easily move between FPR or Altivec registers. */ + else if (VECTOR_MEM_VSX_P (mode) + && reg_classes_intersect_p (to, VSX_REGS) + && reg_classes_intersect_p (from, VSX_REGS)) + ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode]; + + /* Moving between two similar registers is just one instruction. */ + else if (reg_classes_intersect_p (to, from)) + ret = (FLOAT128_2REG_P (mode)) ? 4 : 2; + + /* Everything else has to go through GENERAL_REGS. */ + else + ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) + + rs6000_register_move_cost (mode, from, GENERAL_REGS)); + + if (TARGET_DEBUG_COST) + { + if (dbg_cost_ctrl == 1) + fprintf (stderr, + "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n", + ret, GET_MODE_NAME (mode), reg_class_names[from], + reg_class_names[to]); + dbg_cost_ctrl--; + } + + return ret; +} + +/* A C expressions returning the cost of moving data of MODE from a register to + or from memory. */ + +static int +rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, + bool in ATTRIBUTE_UNUSED) +{ + int ret; + + if (TARGET_DEBUG_COST) + dbg_cost_ctrl++; + + if (reg_classes_intersect_p (rclass, GENERAL_REGS)) + ret = 4 * hard_regno_nregs[0][mode]; + else if ((reg_classes_intersect_p (rclass, FLOAT_REGS) + || reg_classes_intersect_p (rclass, VSX_REGS))) + ret = 4 * hard_regno_nregs[32][mode]; + else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS)) + ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode]; + else + ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS); + + if (TARGET_DEBUG_COST) + { + if (dbg_cost_ctrl == 1) + fprintf (stderr, + "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n", + ret, GET_MODE_NAME (mode), reg_class_names[rclass], in); + dbg_cost_ctrl--; + } + + return ret; +} + +/* Returns a code for a target-specific builtin that implements + reciprocal of the function, or NULL_TREE if not available. */ + +static tree +rs6000_builtin_reciprocal (tree fndecl) +{ + switch (DECL_FUNCTION_CODE (fndecl)) + { + case VSX_BUILTIN_XVSQRTDP: + if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) + return NULL_TREE; + + return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; + + case VSX_BUILTIN_XVSQRTSP: + if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) + return NULL_TREE; + + return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF]; + + default: + return NULL_TREE; + } +} + +/* Load up a constant. If the mode is a vector mode, splat the value across + all of the vector elements. */ + +static rtx +rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst) +{ + rtx reg; + + if (mode == SFmode || mode == DFmode) + { + rtx d = const_double_from_real_value (dconst, mode); + reg = force_reg (mode, d); + } + else if (mode == V4SFmode) + { + rtx d = const_double_from_real_value (dconst, SFmode); + rtvec v = gen_rtvec (4, d, d, d, d); + reg = gen_reg_rtx (mode); + rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); + } + else if (mode == V2DFmode) + { + rtx d = const_double_from_real_value (dconst, DFmode); + rtvec v = gen_rtvec (2, d, d); + reg = gen_reg_rtx (mode); + rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); + } + else + gcc_unreachable (); + + return reg; +} + +/* Generate an FMA instruction. */ + +static void +rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a) +{ + machine_mode mode = GET_MODE (target); + rtx dst; + + dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0); + gcc_assert (dst != NULL); + + if (dst != target) + emit_move_insn (target, dst); +} + +/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */ + +static void +rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a) +{ + machine_mode mode = GET_MODE (dst); + rtx r; + + /* This is a tad more complicated, since the fnma_optab is for + a different expression: fma(-m1, m2, a), which is the same + thing except in the case of signed zeros. + + Fortunately we know that if FMA is supported that FNMSUB is + also supported in the ISA. Just expand it directly. */ + + gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing); + + r = gen_rtx_NEG (mode, a); + r = gen_rtx_FMA (mode, m1, m2, r); + r = gen_rtx_NEG (mode, r); + emit_insn (gen_rtx_SET (dst, r)); +} + +/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P, + add a reg_note saying that this was a division. Support both scalar and + vector divide. Assumes no trapping math and finite arguments. */ + +void +rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) +{ + machine_mode mode = GET_MODE (dst); + rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v; + int i; + + /* Low precision estimates guarantee 5 bits of accuracy. High + precision estimates guarantee 14 bits of accuracy. SFmode + requires 23 bits of accuracy. DFmode requires 52 bits of + accuracy. Each pass at least doubles the accuracy, leading + to the following. */ + int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; + if (mode == DFmode || mode == V2DFmode) + passes++; + + enum insn_code code = optab_handler (smul_optab, mode); + insn_gen_fn gen_mul = GEN_FCN (code); + + gcc_assert (code != CODE_FOR_nothing); + + one = rs6000_load_constant_and_splat (mode, dconst1); + + /* x0 = 1./d estimate */ + x0 = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), + UNSPEC_FRES))); + + /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */ + if (passes > 1) { + + /* e0 = 1. - d * x0 */ + e0 = gen_reg_rtx (mode); + rs6000_emit_nmsub (e0, d, x0, one); + + /* x1 = x0 + e0 * x0 */ + x1 = gen_reg_rtx (mode); + rs6000_emit_madd (x1, e0, x0, x0); + + for (i = 0, xprev = x1, eprev = e0; i < passes - 2; + ++i, xprev = xnext, eprev = enext) { + + /* enext = eprev * eprev */ + enext = gen_reg_rtx (mode); + emit_insn (gen_mul (enext, eprev, eprev)); + + /* xnext = xprev + enext * xprev */ + xnext = gen_reg_rtx (mode); + rs6000_emit_madd (xnext, enext, xprev, xprev); + } + + } else + xprev = x0; + + /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */ + + /* u = n * xprev */ + u = gen_reg_rtx (mode); + emit_insn (gen_mul (u, n, xprev)); + + /* v = n - (d * u) */ + v = gen_reg_rtx (mode); + rs6000_emit_nmsub (v, d, u, n); + + /* dst = (v * xprev) + u */ + rs6000_emit_madd (dst, v, xprev, u); + + if (note_p) + add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d)); +} + +/* Goldschmidt's Algorithm for single/double-precision floating point + sqrt and rsqrt. Assumes no trapping math and finite arguments. */ + +void +rs6000_emit_swsqrt (rtx dst, rtx src, bool recip) +{ + machine_mode mode = GET_MODE (src); + rtx e = gen_reg_rtx (mode); + rtx g = gen_reg_rtx (mode); + rtx h = gen_reg_rtx (mode); + + /* Low precision estimates guarantee 5 bits of accuracy. High + precision estimates guarantee 14 bits of accuracy. SFmode + requires 23 bits of accuracy. DFmode requires 52 bits of + accuracy. Each pass at least doubles the accuracy, leading + to the following. */ + int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; + if (mode == DFmode || mode == V2DFmode) + passes++; + + int i; + rtx mhalf; + enum insn_code code = optab_handler (smul_optab, mode); + insn_gen_fn gen_mul = GEN_FCN (code); + + gcc_assert (code != CODE_FOR_nothing); + + mhalf = rs6000_load_constant_and_splat (mode, dconsthalf); + + /* e = rsqrt estimate */ + emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src), + UNSPEC_RSQRT))); + + /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */ + if (!recip) + { + rtx zero = force_reg (mode, CONST0_RTX (mode)); + + if (mode == SFmode) + { + rtx target = emit_conditional_move (e, GT, src, zero, mode, + e, zero, mode, 0); + if (target != e) + emit_move_insn (e, target); + } + else + { + rtx cond = gen_rtx_GT (VOIDmode, e, zero); + rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero); + } + } + + /* g = sqrt estimate. */ + emit_insn (gen_mul (g, e, src)); + /* h = 1/(2*sqrt) estimate. */ + emit_insn (gen_mul (h, e, mhalf)); + + if (recip) + { + if (passes == 1) + { + rtx t = gen_reg_rtx (mode); + rs6000_emit_nmsub (t, g, h, mhalf); + /* Apply correction directly to 1/rsqrt estimate. */ + rs6000_emit_madd (dst, e, t, e); + } + else + { + for (i = 0; i < passes; i++) + { + rtx t1 = gen_reg_rtx (mode); + rtx g1 = gen_reg_rtx (mode); + rtx h1 = gen_reg_rtx (mode); + + rs6000_emit_nmsub (t1, g, h, mhalf); + rs6000_emit_madd (g1, g, t1, g); + rs6000_emit_madd (h1, h, t1, h); + + g = g1; + h = h1; + } + /* Multiply by 2 for 1/rsqrt. */ + emit_insn (gen_add3_insn (dst, h, h)); + } + } + else + { + rtx t = gen_reg_rtx (mode); + rs6000_emit_nmsub (t, g, h, mhalf); + rs6000_emit_madd (dst, g, t, g); + } + + return; +} + +/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD + (Power7) targets. DST is the target, and SRC is the argument operand. */ + +void +rs6000_emit_popcount (rtx dst, rtx src) +{ + machine_mode mode = GET_MODE (dst); + rtx tmp1, tmp2; + + /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */ + if (TARGET_POPCNTD) + { + if (mode == SImode) + emit_insn (gen_popcntdsi2 (dst, src)); + else + emit_insn (gen_popcntddi2 (dst, src)); + return; + } + + tmp1 = gen_reg_rtx (mode); + + if (mode == SImode) + { + emit_insn (gen_popcntbsi2 (tmp1, src)); + tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101), + NULL_RTX, 0); + tmp2 = force_reg (SImode, tmp2); + emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24))); + } + else + { + emit_insn (gen_popcntbdi2 (tmp1, src)); + tmp2 = expand_mult (DImode, tmp1, + GEN_INT ((HOST_WIDE_INT) + 0x01010101 << 32 | 0x01010101), + NULL_RTX, 0); + tmp2 = force_reg (DImode, tmp2); + emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56))); + } +} + + +/* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the + target, and SRC is the argument operand. */ + +void +rs6000_emit_parity (rtx dst, rtx src) +{ + machine_mode mode = GET_MODE (dst); + rtx tmp; + + tmp = gen_reg_rtx (mode); + + /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */ + if (TARGET_CMPB) + { + if (mode == SImode) + { + emit_insn (gen_popcntbsi2 (tmp, src)); + emit_insn (gen_paritysi2_cmpb (dst, tmp)); + } + else + { + emit_insn (gen_popcntbdi2 (tmp, src)); + emit_insn (gen_paritydi2_cmpb (dst, tmp)); + } + return; + } + + if (mode == SImode) + { + /* Is mult+shift >= shift+xor+shift+xor? */ + if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) + { + rtx tmp1, tmp2, tmp3, tmp4; + + tmp1 = gen_reg_rtx (SImode); + emit_insn (gen_popcntbsi2 (tmp1, src)); + + tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16))); + tmp3 = gen_reg_rtx (SImode); + emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2)); + + tmp4 = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8))); + emit_insn (gen_xorsi3 (tmp, tmp3, tmp4)); + } + else + rs6000_emit_popcount (tmp, src); + emit_insn (gen_andsi3 (dst, tmp, const1_rtx)); + } + else + { + /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ + if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) + { + rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; + + tmp1 = gen_reg_rtx (DImode); + emit_insn (gen_popcntbdi2 (tmp1, src)); + + tmp2 = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32))); + tmp3 = gen_reg_rtx (DImode); + emit_insn (gen_xordi3 (tmp3, tmp1, tmp2)); + + tmp4 = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16))); + tmp5 = gen_reg_rtx (DImode); + emit_insn (gen_xordi3 (tmp5, tmp3, tmp4)); + + tmp6 = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8))); + emit_insn (gen_xordi3 (tmp, tmp5, tmp6)); + } + else + rs6000_emit_popcount (tmp, src); + emit_insn (gen_anddi3 (dst, tmp, const1_rtx)); + } +} + +/* Expand an Altivec constant permutation for little endian mode. + There are two issues: First, the two input operands must be + swapped so that together they form a double-wide array in LE + order. Second, the vperm instruction has surprising behavior + in LE mode: it interprets the elements of the source vectors + in BE mode ("left to right") and interprets the elements of + the destination vector in LE mode ("right to left"). To + correct for this, we must subtract each element of the permute + control vector from 31. + + For example, suppose we want to concatenate vr10 = {0, 1, 2, 3} + with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm. + We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to + serve as the permute control vector. Then, in BE mode, + + vperm 9,10,11,12 + + places the desired result in vr9. However, in LE mode the + vector contents will be + + vr10 = 00000003 00000002 00000001 00000000 + vr11 = 00000007 00000006 00000005 00000004 + + The result of the vperm using the same permute control vector is + + vr9 = 05000000 07000000 01000000 03000000 + + That is, the leftmost 4 bytes of vr10 are interpreted as the + source for the rightmost 4 bytes of vr9, and so on. + + If we change the permute control vector to + + vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4} + + and issue + + vperm 9,11,10,12 + + we get the desired + + vr9 = 00000006 00000004 00000002 00000000. */ + +void +altivec_expand_vec_perm_const_le (rtx operands[4]) +{ + unsigned int i; + rtx perm[16]; + rtx constv, unspec; + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx sel = operands[3]; + + /* Unpack and adjust the constant selector. */ + for (i = 0; i < 16; ++i) + { + rtx e = XVECEXP (sel, 0, i); + unsigned int elt = 31 - (INTVAL (e) & 31); + perm[i] = GEN_INT (elt); + } + + /* Expand to a permute, swapping the inputs and using the + adjusted selector. */ + if (!REG_P (op0)) + op0 = force_reg (V16QImode, op0); + if (!REG_P (op1)) + op1 = force_reg (V16QImode, op1); + + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv), + UNSPEC_VPERM); + if (!REG_P (target)) + { + rtx tmp = gen_reg_rtx (V16QImode); + emit_move_insn (tmp, unspec); + unspec = tmp; + } + + emit_move_insn (target, unspec); +} + +/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the + permute control vector. But here it's not a constant, so we must + generate a vector NAND or NOR to do the adjustment. */ + +void +altivec_expand_vec_perm_le (rtx operands[4]) +{ + rtx notx, iorx, unspec; + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx sel = operands[3]; + rtx tmp = target; + rtx norreg = gen_reg_rtx (V16QImode); + machine_mode mode = GET_MODE (target); + + /* Get everything in regs so the pattern matches. */ + if (!REG_P (op0)) + op0 = force_reg (mode, op0); + if (!REG_P (op1)) + op1 = force_reg (mode, op1); + if (!REG_P (sel)) + sel = force_reg (V16QImode, sel); + if (!REG_P (target)) + tmp = gen_reg_rtx (mode); + + if (TARGET_P9_VECTOR) + { + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), + UNSPEC_VPERMR); + } + else + { + /* Invert the selector with a VNAND if available, else a VNOR. + The VNAND is preferred for future fusion opportunities. */ + notx = gen_rtx_NOT (V16QImode, sel); + iorx = (TARGET_P8_VECTOR + ? gen_rtx_IOR (V16QImode, notx, notx) + : gen_rtx_AND (V16QImode, notx, notx)); + emit_insn (gen_rtx_SET (norreg, iorx)); + + /* Permute with operands reversed and adjusted selector. */ + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), + UNSPEC_VPERM); + } + + /* Copy into target, possibly by way of a register. */ + if (!REG_P (target)) + { + emit_move_insn (tmp, unspec); + unspec = tmp; + } + + emit_move_insn (target, unspec); +} + +/* Expand an Altivec constant permutation. Return true if we match + an efficient implementation; false to fall back to VPERM. */ + +bool +altivec_expand_vec_perm_const (rtx operands[4]) +{ + struct altivec_perm_insn { + HOST_WIDE_INT mask; + enum insn_code impl; + unsigned char perm[16]; + }; + static const struct altivec_perm_insn patterns[] = { + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct, + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, + { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct, + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, + { OPTION_MASK_ALTIVEC, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct + : CODE_FOR_altivec_vmrglb_direct), + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, + { OPTION_MASK_ALTIVEC, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct + : CODE_FOR_altivec_vmrglh_direct), + { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, + { OPTION_MASK_ALTIVEC, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct + : CODE_FOR_altivec_vmrglw_direct), + { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, + { OPTION_MASK_ALTIVEC, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct + : CODE_FOR_altivec_vmrghb_direct), + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, + { OPTION_MASK_ALTIVEC, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct + : CODE_FOR_altivec_vmrghh_direct), + { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, + { OPTION_MASK_ALTIVEC, + (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct + : CODE_FOR_altivec_vmrghw_direct), + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, + { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew, + { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, + { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow, + { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } } + }; + + unsigned int i, j, elt, which; + unsigned char perm[16]; + rtx target, op0, op1, sel, x; + bool one_vec; + + target = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + sel = operands[3]; + + /* Unpack the constant selector. */ + for (i = which = 0; i < 16; ++i) + { + rtx e = XVECEXP (sel, 0, i); + elt = INTVAL (e) & 31; + which |= (elt < 16 ? 1 : 2); + perm[i] = elt; + } + + /* Simplify the constant selector based on operands. */ + switch (which) + { + default: + gcc_unreachable (); + + case 3: + one_vec = false; + if (!rtx_equal_p (op0, op1)) + break; + /* FALLTHRU */ + + case 2: + for (i = 0; i < 16; ++i) + perm[i] &= 15; + op0 = op1; + one_vec = true; + break; + + case 1: + op1 = op0; + one_vec = true; + break; + } + + /* Look for splat patterns. */ + if (one_vec) + { + elt = perm[0]; + + for (i = 0; i < 16; ++i) + if (perm[i] != elt) + break; + if (i == 16) + { + if (!BYTES_BIG_ENDIAN) + elt = 15 - elt; + emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt))); + return true; + } + + if (elt % 2 == 0) + { + for (i = 0; i < 16; i += 2) + if (perm[i] != elt || perm[i + 1] != elt + 1) + break; + if (i == 16) + { + int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2; + x = gen_reg_rtx (V8HImode); + emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0), + GEN_INT (field))); + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + + if (elt % 4 == 0) + { + for (i = 0; i < 16; i += 4) + if (perm[i] != elt + || perm[i + 1] != elt + 1 + || perm[i + 2] != elt + 2 + || perm[i + 3] != elt + 3) + break; + if (i == 16) + { + int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4; + x = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0), + GEN_INT (field))); + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + } + + /* Look for merge and pack patterns. */ + for (j = 0; j < ARRAY_SIZE (patterns); ++j) + { + bool swapped; + + if ((patterns[j].mask & rs6000_isa_flags) == 0) + continue; + + elt = patterns[j].perm[0]; + if (perm[0] == elt) + swapped = false; + else if (perm[0] == elt + 16) + swapped = true; + else + continue; + for (i = 1; i < 16; ++i) + { + elt = patterns[j].perm[i]; + if (swapped) + elt = (elt >= 16 ? elt - 16 : elt + 16); + else if (one_vec && elt >= 16) + elt -= 16; + if (perm[i] != elt) + break; + } + if (i == 16) + { + enum insn_code icode = patterns[j].impl; + machine_mode omode = insn_data[icode].operand[0].mode; + machine_mode imode = insn_data[icode].operand[1].mode; + + /* For little-endian, don't use vpkuwum and vpkuhum if the + underlying vector type is not V4SI and V8HI, respectively. + For example, using vpkuwum with a V8HI picks up the even + halfwords (BE numbering) when the even halfwords (LE + numbering) are what we need. */ + if (!BYTES_BIG_ENDIAN + && icode == CODE_FOR_altivec_vpkuwum_direct + && ((GET_CODE (op0) == REG + && GET_MODE (op0) != V4SImode) + || (GET_CODE (op0) == SUBREG + && GET_MODE (XEXP (op0, 0)) != V4SImode))) + continue; + if (!BYTES_BIG_ENDIAN + && icode == CODE_FOR_altivec_vpkuhum_direct + && ((GET_CODE (op0) == REG + && GET_MODE (op0) != V8HImode) + || (GET_CODE (op0) == SUBREG + && GET_MODE (XEXP (op0, 0)) != V8HImode))) + continue; + + /* For little-endian, the two input operands must be swapped + (or swapped back) to ensure proper right-to-left numbering + from 0 to 2N-1. */ + if (swapped ^ !BYTES_BIG_ENDIAN) + std::swap (op0, op1); + if (imode != V16QImode) + { + op0 = gen_lowpart (imode, op0); + op1 = gen_lowpart (imode, op1); + } + if (omode == V16QImode) + x = target; + else + x = gen_reg_rtx (omode); + emit_insn (GEN_FCN (icode) (x, op0, op1)); + if (omode != V16QImode) + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + + if (!BYTES_BIG_ENDIAN) + { + altivec_expand_vec_perm_const_le (operands); + return true; + } + + return false; +} + +/* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation. + Return true if we match an efficient implementation. */ + +static bool +rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, + unsigned char perm0, unsigned char perm1) +{ + rtx x; + + /* If both selectors come from the same operand, fold to single op. */ + if ((perm0 & 2) == (perm1 & 2)) + { + if (perm0 & 2) + op0 = op1; + else + op1 = op0; + } + /* If both operands are equal, fold to simpler permutation. */ + if (rtx_equal_p (op0, op1)) + { + perm0 = perm0 & 1; + perm1 = (perm1 & 1) + 2; + } + /* If the first selector comes from the second operand, swap. */ + else if (perm0 & 2) + { + if (perm1 & 2) + return false; + perm0 -= 2; + perm1 += 2; + std::swap (op0, op1); + } + /* If the second selector does not come from the second operand, fail. */ + else if ((perm1 & 2) == 0) + return false; + + /* Success! */ + if (target != NULL) + { + machine_mode vmode, dmode; + rtvec v; + + vmode = GET_MODE (target); + gcc_assert (GET_MODE_NUNITS (vmode) == 2); + dmode = mode_for_vector (GET_MODE_INNER (vmode), 4); + x = gen_rtx_VEC_CONCAT (dmode, op0, op1); + v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1)); + x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (target, x)); + } + return true; +} + +bool +rs6000_expand_vec_perm_const (rtx operands[4]) +{ + rtx target, op0, op1, sel; + unsigned char perm0, perm1; + + target = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + sel = operands[3]; + + /* Unpack the constant selector. */ + perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3; + perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3; + + return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1); +} + +/* Test whether a constant permutation is supported. */ + +static bool +rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, + const unsigned char *sel) +{ + /* AltiVec (and thus VSX) can handle arbitrary permutations. */ + if (TARGET_ALTIVEC) + return true; + + /* Check for ps_merge* or evmerge* insns. */ + if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode) + || (TARGET_SPE && vmode == V2SImode)) + { + rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); + rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); + return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]); + } + + return false; +} + +/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */ + +static void +rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, + machine_mode vmode, unsigned nelt, rtx perm[]) +{ + machine_mode imode; + rtx x; + + imode = vmode; + if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT) + { + imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0); + imode = mode_for_vector (imode, nelt); + } + + x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm)); + x = expand_vec_perm (vmode, op0, op1, x, target); + if (x != target) + emit_move_insn (target, x); +} + +/* Expand an extract even operation. */ + +void +rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) +{ + machine_mode vmode = GET_MODE (target); + unsigned i, nelt = GET_MODE_NUNITS (vmode); + rtx perm[16]; + + for (i = 0; i < nelt; i++) + perm[i] = GEN_INT (i * 2); + + rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); +} + +/* Expand a vector interleave operation. */ + +void +rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) +{ + machine_mode vmode = GET_MODE (target); + unsigned i, high, nelt = GET_MODE_NUNITS (vmode); + rtx perm[16]; + + high = (highp ? 0 : nelt / 2); + for (i = 0; i < nelt / 2; i++) + { + perm[i * 2] = GEN_INT (i + high); + perm[i * 2 + 1] = GEN_INT (i + nelt + high); + } + + rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); +} + +/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ +void +rs6000_scale_v2df (rtx tgt, rtx src, int scale) +{ + HOST_WIDE_INT hwi_scale (scale); + REAL_VALUE_TYPE r_pow; + rtvec v = rtvec_alloc (2); + rtx elt; + rtx scale_vec = gen_reg_rtx (V2DFmode); + (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale); + elt = const_double_from_real_value (r_pow, DFmode); + RTVEC_ELT (v, 0) = elt; + RTVEC_ELT (v, 1) = elt; + rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v)); + emit_insn (gen_mulv2df3 (tgt, src, scale_vec)); +} + +/* Return an RTX representing where to find the function value of a + function returning MODE. */ +static rtx +rs6000_complex_function_value (machine_mode mode) +{ + unsigned int regno; + rtx r1, r2; + machine_mode inner = GET_MODE_INNER (mode); + unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode); + + if (TARGET_FLOAT128_TYPE + && (mode == KCmode + || (mode == TCmode && TARGET_IEEEQUAD))) + regno = ALTIVEC_ARG_RETURN; + + else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) + regno = FP_ARG_RETURN; + + else + { + regno = GP_ARG_RETURN; + + /* 32-bit is OK since it'll go in r3/r4. */ + if (TARGET_32BIT && inner_bytes >= 4) + return gen_rtx_REG (mode, regno); + } + + if (inner_bytes >= 8) + return gen_rtx_REG (mode, regno); + + r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno), + const0_rtx); + r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1), + GEN_INT (inner_bytes)); + return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); +} + +/* Return an rtx describing a return value of MODE as a PARALLEL + in N_ELTS registers, each of mode ELT_MODE, starting at REGNO, + stride REG_STRIDE. */ + +static rtx +rs6000_parallel_return (machine_mode mode, + int n_elts, machine_mode elt_mode, + unsigned int regno, unsigned int reg_stride) +{ + rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); + + int i; + for (i = 0; i < n_elts; i++) + { + rtx r = gen_rtx_REG (elt_mode, regno); + rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); + XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off); + regno += reg_stride; + } + + return par; +} + +/* Target hook for TARGET_FUNCTION_VALUE. + + On the SPE, both FPs and vectors are returned in r3. + + On RS/6000 an integer value is in r3 and a floating-point value is in + fp1, unless -msoft-float. */ + +static rtx +rs6000_function_value (const_tree valtype, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + machine_mode mode; + unsigned int regno; + machine_mode elt_mode; + int n_elts; + + /* Special handling for structs in darwin64. */ + if (TARGET_MACHO + && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype)) + { + CUMULATIVE_ARGS valcum; + rtx valret; + + valcum.words = 0; + valcum.fregno = FP_ARG_MIN_REG; + valcum.vregno = ALTIVEC_ARG_MIN_REG; + /* Do a trial code generation as if this were going to be passed as + an argument; if any part goes in memory, we return NULL. */ + valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true); + if (valret) + return valret; + /* Otherwise fall through to standard ABI rules. */ + } + + mode = TYPE_MODE (valtype); + + /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */ + if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts)) + { + int first_reg, n_regs; + + if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode)) + { + /* _Decimal128 must use even/odd register pairs. */ + first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; + n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3; + } + else + { + first_reg = ALTIVEC_ARG_RETURN; + n_regs = 1; + } + + return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs); + } + + /* Some return value types need be split in -mpowerpc64, 32bit ABI. */ + if (TARGET_32BIT && TARGET_POWERPC64) + switch (mode) + { + default: + break; + case DImode: + case SCmode: + case DCmode: + case TCmode: + int count = GET_MODE_SIZE (mode) / 4; + return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1); + } + + if ((INTEGRAL_TYPE_P (valtype) + && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64)) + || POINTER_TYPE_P (valtype)) + mode = TARGET_32BIT ? SImode : DImode; + + if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) + /* _Decimal128 must use an even/odd register pair. */ + regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; + else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS + && !FLOAT128_VECTOR_P (mode) + && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT)) + regno = FP_ARG_RETURN; + else if (TREE_CODE (valtype) == COMPLEX_TYPE + && targetm.calls.split_complex_arg) + return rs6000_complex_function_value (mode); + /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same + return register is used in both cases, and we won't see V2DImode/V2DFmode + for pure altivec, combine the two cases. */ + else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode)) + && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI + && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) + regno = ALTIVEC_ARG_RETURN; + else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT + && (mode == DFmode || mode == DCmode + || FLOAT128_IBM_P (mode) || mode == TCmode)) + return spe_build_register_parallel (mode, GP_ARG_RETURN); + else + regno = GP_ARG_RETURN; + + return gen_rtx_REG (mode, regno); +} + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +rtx +rs6000_libcall_value (machine_mode mode) +{ + unsigned int regno; + + /* Long long return value need be split in -mpowerpc64, 32bit ABI. */ + if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode) + return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1); + + if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) + /* _Decimal128 must use an even/odd register pair. */ + regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; + else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) + && TARGET_HARD_FLOAT && TARGET_FPRS + && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT)) + regno = FP_ARG_RETURN; + /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same + return register is used in both cases, and we won't see V2DImode/V2DFmode + for pure altivec, combine the two cases. */ + else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) + && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI) + regno = ALTIVEC_ARG_RETURN; + else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg) + return rs6000_complex_function_value (mode); + else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT + && (mode == DFmode || mode == DCmode + || FLOAT128_IBM_P (mode) || mode == TCmode)) + return spe_build_register_parallel (mode, GP_ARG_RETURN); + else + regno = GP_ARG_RETURN; + + return gen_rtx_REG (mode, regno); +} + + +/* Return true if we use LRA instead of reload pass. */ +static bool +rs6000_lra_p (void) +{ + return TARGET_LRA; +} + +/* Compute register pressure classes. We implement the target hook to avoid + IRA picking something like NON_SPECIAL_REGS as a pressure class, which can + lead to incorrect estimates of number of available registers and therefor + increased register pressure/spill. */ +static int +rs6000_compute_pressure_classes (enum reg_class *pressure_classes) +{ + int n; + + n = 0; + pressure_classes[n++] = GENERAL_REGS; + if (TARGET_VSX) + pressure_classes[n++] = VSX_REGS; + else + { + if (TARGET_ALTIVEC) + pressure_classes[n++] = ALTIVEC_REGS; + if (TARGET_HARD_FLOAT && TARGET_FPRS) + pressure_classes[n++] = FLOAT_REGS; + } + pressure_classes[n++] = CR_REGS; + pressure_classes[n++] = SPECIAL_REGS; + + return n; +} + +/* Given FROM and TO register numbers, say whether this elimination is allowed. + Frame pointer elimination is automatically handled. + + For the RS/6000, if frame pointer elimination is being done, we would like + to convert ap into fp, not sp. + + We need r30 if -mminimal-toc was specified, and there are constant pool + references. */ + +static bool +rs6000_can_eliminate (const int from, const int to) +{ + return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM + ? ! frame_pointer_needed + : from == RS6000_PIC_OFFSET_TABLE_REGNUM + ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC + || constant_pool_empty_p () + : true); +} + +/* Define the offset between two registers, FROM to be eliminated and its + replacement TO, at the start of a routine. */ +HOST_WIDE_INT +rs6000_initial_elimination_offset (int from, int to) +{ + rs6000_stack_t *info = rs6000_stack_info (); + HOST_WIDE_INT offset; + + if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + offset = info->push_p ? 0 : -info->total_size; + else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + { + offset = info->push_p ? 0 : -info->total_size; + if (FRAME_GROWS_DOWNWARD) + offset += info->fixed_size + info->vars_size + info->parm_size; + } + else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + offset = FRAME_GROWS_DOWNWARD + ? info->fixed_size + info->vars_size + info->parm_size + : 0; + else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + offset = info->total_size; + else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + offset = info->push_p ? info->total_size : 0; + else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM) + offset = 0; + else + gcc_unreachable (); + + return offset; +} + +static rtx +rs6000_dwarf_register_span (rtx reg) +{ + rtx parts[8]; + int i, words; + unsigned regno = REGNO (reg); + machine_mode mode = GET_MODE (reg); + + if (TARGET_SPE + && regno < 32 + && (SPE_VECTOR_MODE (GET_MODE (reg)) + || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) + && mode != SFmode && mode != SDmode && mode != SCmode))) + ; + else + return NULL_RTX; + + regno = REGNO (reg); + + /* The duality of the SPE register size wreaks all kinds of havoc. + This is a way of distinguishing r0 in 32-bits from r0 in + 64-bits. */ + words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD; + gcc_assert (words <= 4); + for (i = 0; i < words; i++, regno++) + { + if (BYTES_BIG_ENDIAN) + { + parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); + parts[2 * i + 1] = gen_rtx_REG (SImode, regno); + } + else + { + parts[2 * i] = gen_rtx_REG (SImode, regno); + parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); + } + } + + return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts)); +} + +/* Fill in sizes for SPE register high parts in table used by unwinder. */ + +static void +rs6000_init_dwarf_reg_sizes_extra (tree address) +{ + if (TARGET_SPE) + { + int i; + machine_mode mode = TYPE_MODE (char_type_node); + rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); + rtx mem = gen_rtx_MEM (BLKmode, addr); + rtx value = gen_int_mode (4, mode); + + for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++) + { + int column = DWARF_REG_TO_UNWIND_COLUMN + (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); + HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); + + emit_move_insn (adjust_address (mem, mode, offset), value); + } + } + + if (TARGET_MACHO && ! TARGET_ALTIVEC) + { + int i; + machine_mode mode = TYPE_MODE (char_type_node); + rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); + rtx mem = gen_rtx_MEM (BLKmode, addr); + rtx value = gen_int_mode (16, mode); + + /* On Darwin, libgcc may be built to run on both G3 and G4/5. + The unwinder still needs to know the size of Altivec registers. */ + + for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++) + { + int column = DWARF_REG_TO_UNWIND_COLUMN + (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); + HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); + + emit_move_insn (adjust_address (mem, mode, offset), value); + } + } +} + +/* Map internal gcc register numbers to debug format register numbers. + FORMAT specifies the type of debug register number to use: + 0 -- debug information, except for frame-related sections + 1 -- DWARF .debug_frame section + 2 -- DWARF .eh_frame section */ + +unsigned int +rs6000_dbx_register_number (unsigned int regno, unsigned int format) +{ + /* We never use the GCC internal number for SPE high registers. + Those are mapped to the 1200..1231 range for all debug formats. */ + if (SPE_HIGH_REGNO_P (regno)) + return regno - FIRST_SPE_HIGH_REGNO + 1200; + + /* Except for the above, we use the internal number for non-DWARF + debug information, and also for .eh_frame. */ + if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2) + return regno; + + /* On some platforms, we use the standard DWARF register + numbering for .debug_info and .debug_frame. */ +#ifdef RS6000_USE_DWARF_NUMBERING + if (regno <= 63) + return regno; + if (regno == LR_REGNO) + return 108; + if (regno == CTR_REGNO) + return 109; + /* Special handling for CR for .debug_frame: rs6000_emit_prologue has + translated any combination of CR2, CR3, CR4 saves to a save of CR2. + The actual code emitted saves the whole of CR, so we map CR2_REGNO + to the DWARF reg for CR. */ + if (format == 1 && regno == CR2_REGNO) + return 64; + if (CR_REGNO_P (regno)) + return regno - CR0_REGNO + 86; + if (regno == CA_REGNO) + return 101; /* XER */ + if (ALTIVEC_REGNO_P (regno)) + return regno - FIRST_ALTIVEC_REGNO + 1124; + if (regno == VRSAVE_REGNO) + return 356; + if (regno == VSCR_REGNO) + return 67; + if (regno == SPE_ACC_REGNO) + return 99; + if (regno == SPEFSCR_REGNO) + return 612; +#endif + return regno; +} + +/* target hook eh_return_filter_mode */ +static machine_mode +rs6000_eh_return_filter_mode (void) +{ + return TARGET_32BIT ? SImode : word_mode; +} + +/* Target hook for scalar_mode_supported_p. */ +static bool +rs6000_scalar_mode_supported_p (machine_mode mode) +{ + /* -m32 does not support TImode. This is the default, from + default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the + same ABI as for -m32. But default_scalar_mode_supported_p allows + integer modes of precision 2 * BITS_PER_WORD, which matches TImode + for -mpowerpc64. */ + if (TARGET_32BIT && mode == TImode) + return false; + + if (DECIMAL_FLOAT_MODE_P (mode)) + return default_decimal_float_supported_p (); + else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) + return true; + else + return default_scalar_mode_supported_p (mode); +} + +/* Target hook for vector_mode_supported_p. */ +static bool +rs6000_vector_mode_supported_p (machine_mode mode) +{ + + if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode)) + return true; + + if (TARGET_SPE && SPE_VECTOR_MODE (mode)) + return true; + + /* There is no vector form for IEEE 128-bit. If we return true for IEEE + 128-bit, the compiler might try to widen IEEE 128-bit to IBM + double-double. */ + else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode)) + return true; + + else + return false; +} + +/* Target hook for floatn_mode. */ +static machine_mode +rs6000_floatn_mode (int n, bool extended) +{ + if (extended) + { + switch (n) + { + case 32: + return DFmode; + + case 64: + if (TARGET_FLOAT128_KEYWORD) + return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; + else + return VOIDmode; + + case 128: + return VOIDmode; + + default: + /* Those are the only valid _FloatNx types. */ + gcc_unreachable (); + } + } + else + { + switch (n) + { + case 32: + return SFmode; + + case 64: + return DFmode; + + case 128: + if (TARGET_FLOAT128_KEYWORD) + return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; + else + return VOIDmode; + + default: + return VOIDmode; + } + } + +} + +/* Target hook for c_mode_for_suffix. */ +static machine_mode +rs6000_c_mode_for_suffix (char suffix) +{ + if (TARGET_FLOAT128_TYPE) + { + if (suffix == 'q' || suffix == 'Q') + return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; + + /* At the moment, we are not defining a suffix for IBM extended double. + If/when the default for -mabi=ieeelongdouble is changed, and we want + to support __ibm128 constants in legacy library code, we may need to + re-evalaute this decision. Currently, c-lex.c only supports 'w' and + 'q' as machine dependent suffixes. The x86_64 port uses 'w' for + __float80 constants. */ + } + + return VOIDmode; +} + +/* Target hook for invalid_arg_for_unprototyped_fn. */ +static const char * +invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) +{ + return (!rs6000_darwin64_abi + && typelist == 0 + && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE + && (funcdecl == NULL_TREE + || (TREE_CODE (funcdecl) == FUNCTION_DECL + && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) + ? N_("AltiVec argument passed to unprototyped function") + : NULL; +} + +/* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register + setup by using __stack_chk_fail_local hidden function instead of + calling __stack_chk_fail directly. Otherwise it is better to call + __stack_chk_fail directly. */ + +static tree ATTRIBUTE_UNUSED +rs6000_stack_protect_fail (void) +{ + return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) + ? default_hidden_stack_protect_fail () + : default_external_stack_protect_fail (); +} + +void +rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED, + int num_operands ATTRIBUTE_UNUSED) +{ + if (rs6000_warn_cell_microcode) + { + const char *temp; + int insn_code_number = recog_memoized (insn); + location_t location = INSN_LOCATION (insn); + + /* Punt on insns we cannot recognize. */ + if (insn_code_number < 0) + return; + + /* get_insn_template can modify recog_data, so save and restore it. */ + struct recog_data_d recog_data_save = recog_data; + for (int i = 0; i < recog_data.n_operands; i++) + recog_data.operand[i] = copy_rtx (recog_data.operand[i]); + temp = get_insn_template (insn_code_number, insn); + recog_data = recog_data_save; + + if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS) + warning_at (location, OPT_mwarn_cell_microcode, + "emitting microcode insn %s\t[%s] #%d", + temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); + else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL) + warning_at (location, OPT_mwarn_cell_microcode, + "emitting conditional microcode insn %s\t[%s] #%d", + temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); + } +} + +/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ + +#if TARGET_ELF +static unsigned HOST_WIDE_INT +rs6000_asan_shadow_offset (void) +{ + return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29); +} +#endif + +/* Mask options that we want to support inside of attribute((target)) and + #pragma GCC target operations. Note, we do not include things like + 64/32-bit, endianness, hard/soft floating point, etc. that would have + different calling sequences. */ + +struct rs6000_opt_mask { + const char *name; /* option name */ + HOST_WIDE_INT mask; /* mask to set */ + bool invert; /* invert sense of mask */ + bool valid_target; /* option is a target option */ +}; + +static struct rs6000_opt_mask const rs6000_opt_masks[] = +{ + { "altivec", OPTION_MASK_ALTIVEC, false, true }, + { "cmpb", OPTION_MASK_CMPB, false, true }, + { "crypto", OPTION_MASK_CRYPTO, false, true }, + { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, + { "dlmzb", OPTION_MASK_DLMZB, false, true }, + { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, + false, true }, + { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false }, + { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false }, + { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false }, + { "fprnd", OPTION_MASK_FPRND, false, true }, + { "hard-dfp", OPTION_MASK_DFP, false, true }, + { "htm", OPTION_MASK_HTM, false, true }, + { "isel", OPTION_MASK_ISEL, false, true }, + { "mfcrf", OPTION_MASK_MFCRF, false, true }, + { "mfpgpr", OPTION_MASK_MFPGPR, false, true }, + { "modulo", OPTION_MASK_MODULO, false, true }, + { "mulhw", OPTION_MASK_MULHW, false, true }, + { "multiple", OPTION_MASK_MULTIPLE, false, true }, + { "popcntb", OPTION_MASK_POPCNTB, false, true }, + { "popcntd", OPTION_MASK_POPCNTD, false, true }, + { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, + { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true }, + { "power8-vector", OPTION_MASK_P8_VECTOR, false, true }, + { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true }, + { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true }, + { "power9-fusion", OPTION_MASK_P9_FUSION, false, true }, + { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true }, + { "power9-misc", OPTION_MASK_P9_MISC, false, true }, + { "power9-vector", OPTION_MASK_P9_VECTOR, false, true }, + { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, + { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, + { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, + { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true }, + { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, + { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true }, + { "string", OPTION_MASK_STRING, false, true }, + { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true }, + { "update", OPTION_MASK_NO_UPDATE, true , true }, + { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true }, + { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true }, + { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true }, + { "vsx", OPTION_MASK_VSX, false, true }, + { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true }, + { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, +#ifdef OPTION_MASK_64BIT +#if TARGET_AIX_OS + { "aix64", OPTION_MASK_64BIT, false, false }, + { "aix32", OPTION_MASK_64BIT, true, false }, +#else + { "64", OPTION_MASK_64BIT, false, false }, + { "32", OPTION_MASK_64BIT, true, false }, +#endif +#endif +#ifdef OPTION_MASK_EABI + { "eabi", OPTION_MASK_EABI, false, false }, +#endif +#ifdef OPTION_MASK_LITTLE_ENDIAN + { "little", OPTION_MASK_LITTLE_ENDIAN, false, false }, + { "big", OPTION_MASK_LITTLE_ENDIAN, true, false }, +#endif +#ifdef OPTION_MASK_RELOCATABLE + { "relocatable", OPTION_MASK_RELOCATABLE, false, false }, +#endif +#ifdef OPTION_MASK_STRICT_ALIGN + { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false }, +#endif + { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false }, + { "string", OPTION_MASK_STRING, false, false }, +}; + +/* Builtin mask mapping for printing the flags. */ +static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = +{ + { "altivec", RS6000_BTM_ALTIVEC, false, false }, + { "vsx", RS6000_BTM_VSX, false, false }, + { "spe", RS6000_BTM_SPE, false, false }, + { "paired", RS6000_BTM_PAIRED, false, false }, + { "fre", RS6000_BTM_FRE, false, false }, + { "fres", RS6000_BTM_FRES, false, false }, + { "frsqrte", RS6000_BTM_FRSQRTE, false, false }, + { "frsqrtes", RS6000_BTM_FRSQRTES, false, false }, + { "popcntd", RS6000_BTM_POPCNTD, false, false }, + { "cell", RS6000_BTM_CELL, false, false }, + { "power8-vector", RS6000_BTM_P8_VECTOR, false, false }, + { "power9-vector", RS6000_BTM_P9_VECTOR, false, false }, + { "power9-misc", RS6000_BTM_P9_MISC, false, false }, + { "crypto", RS6000_BTM_CRYPTO, false, false }, + { "htm", RS6000_BTM_HTM, false, false }, + { "hard-dfp", RS6000_BTM_DFP, false, false }, + { "hard-float", RS6000_BTM_HARD_FLOAT, false, false }, + { "long-double-128", RS6000_BTM_LDBL128, false, false }, + { "float128", RS6000_BTM_FLOAT128, false, false }, +}; + +/* Option variables that we want to support inside attribute((target)) and + #pragma GCC target operations. */ + +struct rs6000_opt_var { + const char *name; /* option name */ + size_t global_offset; /* offset of the option in global_options. */ + size_t target_offset; /* offset of the option in target options. */ +}; + +static struct rs6000_opt_var const rs6000_opt_vars[] = +{ + { "friz", + offsetof (struct gcc_options, x_TARGET_FRIZ), + offsetof (struct cl_target_option, x_TARGET_FRIZ), }, + { "avoid-indexed-addresses", + offsetof (struct gcc_options, x_TARGET_AVOID_XFORM), + offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) }, + { "paired", + offsetof (struct gcc_options, x_rs6000_paired_float), + offsetof (struct cl_target_option, x_rs6000_paired_float), }, + { "longcall", + offsetof (struct gcc_options, x_rs6000_default_long_calls), + offsetof (struct cl_target_option, x_rs6000_default_long_calls), }, + { "optimize-swaps", + offsetof (struct gcc_options, x_rs6000_optimize_swaps), + offsetof (struct cl_target_option, x_rs6000_optimize_swaps), }, + { "allow-movmisalign", + offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN), + offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), }, + { "allow-df-permute", + offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE), + offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), }, + { "sched-groups", + offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS), + offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), }, + { "always-hint", + offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT), + offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), }, + { "align-branch-targets", + offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS), + offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), }, + { "vectorize-builtins", + offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS), + offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), }, + { "tls-markers", + offsetof (struct gcc_options, x_tls_markers), + offsetof (struct cl_target_option, x_tls_markers), }, + { "sched-prolog", + offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), + offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, + { "sched-epilog", + offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), + offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, + { "gen-cell-microcode", + offsetof (struct gcc_options, x_rs6000_gen_cell_microcode), + offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), }, + { "warn-cell-microcode", + offsetof (struct gcc_options, x_rs6000_warn_cell_microcode), + offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), }, +}; + +/* Inner function to handle attribute((target("..."))) and #pragma GCC target + parsing. Return true if there were no errors. */ + +static bool +rs6000_inner_target_options (tree args, bool attr_p) +{ + bool ret = true; + + if (args == NULL_TREE) + ; + + else if (TREE_CODE (args) == STRING_CST) + { + char *p = ASTRDUP (TREE_STRING_POINTER (args)); + char *q; + + while ((q = strtok (p, ",")) != NULL) + { + bool error_p = false; + bool not_valid_p = false; + const char *cpu_opt = NULL; + + p = NULL; + if (strncmp (q, "cpu=", 4) == 0) + { + int cpu_index = rs6000_cpu_name_lookup (q+4); + if (cpu_index >= 0) + rs6000_cpu_index = cpu_index; + else + { + error_p = true; + cpu_opt = q+4; + } + } + else if (strncmp (q, "tune=", 5) == 0) + { + int tune_index = rs6000_cpu_name_lookup (q+5); + if (tune_index >= 0) + rs6000_tune_index = tune_index; + else + { + error_p = true; + cpu_opt = q+5; + } + } + else + { + size_t i; + bool invert = false; + char *r = q; + + error_p = true; + if (strncmp (r, "no-", 3) == 0) + { + invert = true; + r += 3; + } + + for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++) + if (strcmp (r, rs6000_opt_masks[i].name) == 0) + { + HOST_WIDE_INT mask = rs6000_opt_masks[i].mask; + + if (!rs6000_opt_masks[i].valid_target) + not_valid_p = true; + else + { + error_p = false; + rs6000_isa_flags_explicit |= mask; + + /* VSX needs altivec, so -mvsx automagically sets + altivec and disables -mavoid-indexed-addresses. */ + if (!invert) + { + if (mask == OPTION_MASK_VSX) + { + mask |= OPTION_MASK_ALTIVEC; + TARGET_AVOID_XFORM = 0; + } + } + + if (rs6000_opt_masks[i].invert) + invert = !invert; + + if (invert) + rs6000_isa_flags &= ~mask; + else + rs6000_isa_flags |= mask; + } + break; + } + + if (error_p && !not_valid_p) + { + for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++) + if (strcmp (r, rs6000_opt_vars[i].name) == 0) + { + size_t j = rs6000_opt_vars[i].global_offset; + *((int *) ((char *)&global_options + j)) = !invert; + error_p = false; + not_valid_p = false; + break; + } + } + } + + if (error_p) + { + const char *eprefix, *esuffix; + + ret = false; + if (attr_p) + { + eprefix = "__attribute__((__target__("; + esuffix = ")))"; + } + else + { + eprefix = "#pragma GCC target "; + esuffix = ""; + } + + if (cpu_opt) + error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix, + q, esuffix); + else if (not_valid_p) + error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix); + else + error ("%s\"%s\"%s is invalid", eprefix, q, esuffix); + } + } + } + + else if (TREE_CODE (args) == TREE_LIST) + { + do + { + tree value = TREE_VALUE (args); + if (value) + { + bool ret2 = rs6000_inner_target_options (value, attr_p); + if (!ret2) + ret = false; + } + args = TREE_CHAIN (args); + } + while (args != NULL_TREE); + } + + else + { + error ("attribute %<target%> argument not a string"); + return false; + } + + return ret; +} + +/* Print out the target options as a list for -mdebug=target. */ + +static void +rs6000_debug_target_options (tree args, const char *prefix) +{ + if (args == NULL_TREE) + fprintf (stderr, "%s<NULL>", prefix); + + else if (TREE_CODE (args) == STRING_CST) + { + char *p = ASTRDUP (TREE_STRING_POINTER (args)); + char *q; + + while ((q = strtok (p, ",")) != NULL) + { + p = NULL; + fprintf (stderr, "%s\"%s\"", prefix, q); + prefix = ", "; + } + } + + else if (TREE_CODE (args) == TREE_LIST) + { + do + { + tree value = TREE_VALUE (args); + if (value) + { + rs6000_debug_target_options (value, prefix); + prefix = ", "; + } + args = TREE_CHAIN (args); + } + while (args != NULL_TREE); + } + + else + gcc_unreachable (); + + return; +} + + +/* Hook to validate attribute((target("..."))). */ + +static bool +rs6000_valid_attribute_p (tree fndecl, + tree ARG_UNUSED (name), + tree args, + int flags) +{ + struct cl_target_option cur_target; + bool ret; + tree old_optimize = build_optimization_node (&global_options); + tree new_target, new_optimize; + tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); + + gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE)); + + if (TARGET_DEBUG_TARGET) + { + tree tname = DECL_NAME (fndecl); + fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n"); + if (tname) + fprintf (stderr, "function: %.*s\n", + (int) IDENTIFIER_LENGTH (tname), + IDENTIFIER_POINTER (tname)); + else + fprintf (stderr, "function: unknown\n"); + + fprintf (stderr, "args:"); + rs6000_debug_target_options (args, " "); + fprintf (stderr, "\n"); + + if (flags) + fprintf (stderr, "flags: 0x%x\n", flags); + + fprintf (stderr, "--------------------\n"); + } + + old_optimize = build_optimization_node (&global_options); + func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); + + /* If the function changed the optimization levels as well as setting target + options, start with the optimizations specified. */ + if (func_optimize && func_optimize != old_optimize) + cl_optimization_restore (&global_options, + TREE_OPTIMIZATION (func_optimize)); + + /* The target attributes may also change some optimization flags, so update + the optimization options if necessary. */ + cl_target_option_save (&cur_target, &global_options); + rs6000_cpu_index = rs6000_tune_index = -1; + ret = rs6000_inner_target_options (args, true); + + /* Set up any additional state. */ + if (ret) + { + ret = rs6000_option_override_internal (false); + new_target = build_target_option_node (&global_options); + } + else + new_target = NULL; + + new_optimize = build_optimization_node (&global_options); + + if (!new_target) + ret = false; + + else if (fndecl) + { + DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; + + if (old_optimize != new_optimize) + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; + } + + cl_target_option_restore (&global_options, &cur_target); + + if (old_optimize != new_optimize) + cl_optimization_restore (&global_options, + TREE_OPTIMIZATION (old_optimize)); + + return ret; +} + + +/* Hook to validate the current #pragma GCC target and set the state, and + update the macros based on what was changed. If ARGS is NULL, then + POP_TARGET is used to reset the options. */ + +bool +rs6000_pragma_target_parse (tree args, tree pop_target) +{ + tree prev_tree = build_target_option_node (&global_options); + tree cur_tree; + struct cl_target_option *prev_opt, *cur_opt; + HOST_WIDE_INT prev_flags, cur_flags, diff_flags; + HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask; + + if (TARGET_DEBUG_TARGET) + { + fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n"); + fprintf (stderr, "args:"); + rs6000_debug_target_options (args, " "); + fprintf (stderr, "\n"); + + if (pop_target) + { + fprintf (stderr, "pop_target:\n"); + debug_tree (pop_target); + } + else + fprintf (stderr, "pop_target: <NULL>\n"); + + fprintf (stderr, "--------------------\n"); + } + + if (! args) + { + cur_tree = ((pop_target) + ? pop_target + : target_option_default_node); + cl_target_option_restore (&global_options, + TREE_TARGET_OPTION (cur_tree)); + } + else + { + rs6000_cpu_index = rs6000_tune_index = -1; + if (!rs6000_inner_target_options (args, false) + || !rs6000_option_override_internal (false) + || (cur_tree = build_target_option_node (&global_options)) + == NULL_TREE) + { + if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) + fprintf (stderr, "invalid pragma\n"); + + return false; + } + } + + target_option_current_node = cur_tree; + + /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly + change the macros that are defined. */ + if (rs6000_target_modify_macros_ptr) + { + prev_opt = TREE_TARGET_OPTION (prev_tree); + prev_bumask = prev_opt->x_rs6000_builtin_mask; + prev_flags = prev_opt->x_rs6000_isa_flags; + + cur_opt = TREE_TARGET_OPTION (cur_tree); + cur_flags = cur_opt->x_rs6000_isa_flags; + cur_bumask = cur_opt->x_rs6000_builtin_mask; + + diff_bumask = (prev_bumask ^ cur_bumask); + diff_flags = (prev_flags ^ cur_flags); + + if ((diff_flags != 0) || (diff_bumask != 0)) + { + /* Delete old macros. */ + rs6000_target_modify_macros_ptr (false, + prev_flags & diff_flags, + prev_bumask & diff_bumask); + + /* Define new macros. */ + rs6000_target_modify_macros_ptr (true, + cur_flags & diff_flags, + cur_bumask & diff_bumask); + } + } + + return true; +} + + +/* Remember the last target of rs6000_set_current_function. */ +static GTY(()) tree rs6000_previous_fndecl; + +/* Establish appropriate back-end context for processing the function + FNDECL. The argument might be NULL to indicate processing at top + level, outside of any function scope. */ +static void +rs6000_set_current_function (tree fndecl) +{ + tree old_tree = (rs6000_previous_fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl) + : NULL_TREE); + + tree new_tree = (fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) + : NULL_TREE); + + if (TARGET_DEBUG_TARGET) + { + bool print_final = false; + fprintf (stderr, "\n==================== rs6000_set_current_function"); + + if (fndecl) + fprintf (stderr, ", fndecl %s (%p)", + (DECL_NAME (fndecl) + ? IDENTIFIER_POINTER (DECL_NAME (fndecl)) + : "<unknown>"), (void *)fndecl); + + if (rs6000_previous_fndecl) + fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl); + + fprintf (stderr, "\n"); + if (new_tree) + { + fprintf (stderr, "\nnew fndecl target specific options:\n"); + debug_tree (new_tree); + print_final = true; + } + + if (old_tree) + { + fprintf (stderr, "\nold fndecl target specific options:\n"); + debug_tree (old_tree); + print_final = true; + } + + if (print_final) + fprintf (stderr, "--------------------\n"); + } + + /* Only change the context if the function changes. This hook is called + several times in the course of compiling a function, and we don't want to + slow things down too much or call target_reinit when it isn't safe. */ + if (fndecl && fndecl != rs6000_previous_fndecl) + { + rs6000_previous_fndecl = fndecl; + if (old_tree == new_tree) + ; + + else if (new_tree && new_tree != target_option_default_node) + { + cl_target_option_restore (&global_options, + TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) + restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else + TREE_TARGET_GLOBALS (new_tree) + = save_target_globals_default_opts (); + } + + else if (old_tree && old_tree != target_option_default_node) + { + new_tree = target_option_current_node; + cl_target_option_restore (&global_options, + TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) + restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else if (new_tree == target_option_default_node) + restore_target_globals (&default_target_globals); + else + TREE_TARGET_GLOBALS (new_tree) + = save_target_globals_default_opts (); + } + } +} + + +/* Save the current options */ + +static void +rs6000_function_specific_save (struct cl_target_option *ptr, + struct gcc_options *opts) +{ + ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; + ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; +} + +/* Restore the current options */ + +static void +rs6000_function_specific_restore (struct gcc_options *opts, + struct cl_target_option *ptr) + +{ + opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags; + opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; + (void) rs6000_option_override_internal (false); +} + +/* Print the current options */ + +static void +rs6000_function_specific_print (FILE *file, int indent, + struct cl_target_option *ptr) +{ + rs6000_print_isa_options (file, indent, "Isa options set", + ptr->x_rs6000_isa_flags); + + rs6000_print_isa_options (file, indent, "Isa options explicit", + ptr->x_rs6000_isa_flags_explicit); +} + +/* Helper function to print the current isa or misc options on a line. */ + +static void +rs6000_print_options_internal (FILE *file, + int indent, + const char *string, + HOST_WIDE_INT flags, + const char *prefix, + const struct rs6000_opt_mask *opts, + size_t num_elements) +{ + size_t i; + size_t start_column = 0; + size_t cur_column; + size_t max_column = 120; + size_t prefix_len = strlen (prefix); + size_t comma_len = 0; + const char *comma = ""; + + if (indent) + start_column += fprintf (file, "%*s", indent, ""); + + if (!flags) + { + fprintf (stderr, DEBUG_FMT_S, string, "<none>"); + return; + } + + start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags); + + /* Print the various mask options. */ + cur_column = start_column; + for (i = 0; i < num_elements; i++) + { + bool invert = opts[i].invert; + const char *name = opts[i].name; + const char *no_str = ""; + HOST_WIDE_INT mask = opts[i].mask; + size_t len = comma_len + prefix_len + strlen (name); + + if (!invert) + { + if ((flags & mask) == 0) + { + no_str = "no-"; + len += sizeof ("no-") - 1; + } + + flags &= ~mask; + } + + else + { + if ((flags & mask) != 0) + { + no_str = "no-"; + len += sizeof ("no-") - 1; + } + + flags |= mask; + } + + cur_column += len; + if (cur_column > max_column) + { + fprintf (stderr, ", \\\n%*s", (int)start_column, ""); + cur_column = start_column + len; + comma = ""; + } + + fprintf (file, "%s%s%s%s", comma, prefix, no_str, name); + comma = ", "; + comma_len = sizeof (", ") - 1; + } + + fputs ("\n", file); +} + +/* Helper function to print the current isa options on a line. */ + +static void +rs6000_print_isa_options (FILE *file, int indent, const char *string, + HOST_WIDE_INT flags) +{ + rs6000_print_options_internal (file, indent, string, flags, "-m", + &rs6000_opt_masks[0], + ARRAY_SIZE (rs6000_opt_masks)); +} + +static void +rs6000_print_builtin_options (FILE *file, int indent, const char *string, + HOST_WIDE_INT flags) +{ + rs6000_print_options_internal (file, indent, string, flags, "", + &rs6000_builtin_mask_names[0], + ARRAY_SIZE (rs6000_builtin_mask_names)); +} + +/* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06, + 2.07, and 3.0 options that relate to the vector unit (-mdirect-move, + -mvsx-timode, -mupper-regs-df). + + If the user used -mno-power8-vector, we need to turn off all of the implicit + ISA 2.07 and 3.0 options that relate to the vector unit. + + If the user used -mno-power9-vector, we need to turn off all of the implicit + ISA 3.0 options that relate to the vector unit. + + This function does not handle explicit options such as the user specifying + -mdirect-move. These are handled in rs6000_option_override_internal, and + the appropriate error is given if needed. + + We return a mask of all of the implicit options that should not be enabled + by default. */ + +static HOST_WIDE_INT +rs6000_disable_incompatible_switches (void) +{ + HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit; + size_t i, j; + + static const struct { + const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */ + const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */ + const char *const name; /* name of the switch. */ + } flags[] = { + { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" }, + { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" }, + { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" }, + }; + + for (i = 0; i < ARRAY_SIZE (flags); i++) + { + HOST_WIDE_INT no_flag = flags[i].no_flag; + + if ((rs6000_isa_flags & no_flag) == 0 + && (rs6000_isa_flags_explicit & no_flag) != 0) + { + HOST_WIDE_INT dep_flags = flags[i].dep_flags; + HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit + & rs6000_isa_flags + & dep_flags); + + if (set_flags) + { + for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++) + if ((set_flags & rs6000_opt_masks[j].mask) != 0) + { + set_flags &= ~rs6000_opt_masks[j].mask; + error ("-mno-%s turns off -m%s", + flags[i].name, + rs6000_opt_masks[j].name); + } + + gcc_assert (!set_flags); + } + + rs6000_isa_flags &= ~dep_flags; + ignore_masks |= no_flag | dep_flags; + } + } + + if (!TARGET_P9_VECTOR + && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0 + && TARGET_P9_DFORM_BOTH > 0) + { + error ("-mno-power9-vector turns off -mpower9-dform"); + TARGET_P9_DFORM_BOTH = 0; + } + + return ignore_masks; +} + + +/* Hook to determine if one function can safely inline another. */ + +static bool +rs6000_can_inline_p (tree caller, tree callee) +{ + bool ret = false; + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + + /* If callee has no option attributes, then it is ok to inline. */ + if (!callee_tree) + ret = true; + + /* If caller has no option attributes, but callee does then it is not ok to + inline. */ + else if (!caller_tree) + ret = false; + + else + { + struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); + struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); + + /* Callee's options should a subset of the caller's, i.e. a vsx function + can inline an altivec function but a non-vsx function can't inline a + vsx function. */ + if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags) + == callee_opts->x_rs6000_isa_flags) + ret = true; + } + + if (TARGET_DEBUG_TARGET) + fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n", + (DECL_NAME (caller) + ? IDENTIFIER_POINTER (DECL_NAME (caller)) + : "<unknown>"), + (DECL_NAME (callee) + ? IDENTIFIER_POINTER (DECL_NAME (callee)) + : "<unknown>"), + (ret ? "can" : "cannot")); + + return ret; +} + +/* Allocate a stack temp and fixup the address so it meets the particular + memory requirements (either offetable or REG+REG addressing). */ + +rtx +rs6000_allocate_stack_temp (machine_mode mode, + bool offsettable_p, + bool reg_reg_p) +{ + rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + rtx addr = XEXP (stack, 0); + int strict_p = (reload_in_progress || reload_completed); + + if (!legitimate_indirect_address_p (addr, strict_p)) + { + if (offsettable_p + && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true)) + stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); + + else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p)) + stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); + } + + return stack; +} + +/* Given a memory reference, if it is not a reg or reg+reg addressing, convert + to such a form to deal with memory reference instructions like STFIWX that + only take reg+reg addressing. */ + +rtx +rs6000_address_for_fpconvert (rtx x) +{ + int strict_p = (reload_in_progress || reload_completed); + rtx addr; + + gcc_assert (MEM_P (x)); + addr = XEXP (x, 0); + if (! legitimate_indirect_address_p (addr, strict_p) + && ! legitimate_indexed_address_p (addr, strict_p)) + { + if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) + { + rtx reg = XEXP (addr, 0); + HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x)); + rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size); + gcc_assert (REG_P (reg)); + emit_insn (gen_add3_insn (reg, reg, size_rtx)); + addr = reg; + } + else if (GET_CODE (addr) == PRE_MODIFY) + { + rtx reg = XEXP (addr, 0); + rtx expr = XEXP (addr, 1); + gcc_assert (REG_P (reg)); + gcc_assert (GET_CODE (expr) == PLUS); + emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1))); + addr = reg; + } + + x = replace_equiv_address (x, copy_addr_to_reg (addr)); + } + + return x; +} + +/* Given a memory reference, if it is not in the form for altivec memory + reference instructions (i.e. reg or reg+reg addressing with AND of -16), + convert to the altivec format. */ + +rtx +rs6000_address_for_altivec (rtx x) +{ + gcc_assert (MEM_P (x)); + if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x))) + { + rtx addr = XEXP (x, 0); + int strict_p = (reload_in_progress || reload_completed); + + if (!legitimate_indexed_address_p (addr, strict_p) + && !legitimate_indirect_address_p (addr, strict_p)) + addr = copy_to_mode_reg (Pmode, addr); + + addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); + x = change_address (x, GET_MODE (x), addr); + } + + return x; +} + +/* Implement TARGET_LEGITIMATE_CONSTANT_P. + + On the RS/6000, all integer constants are acceptable, most won't be valid + for particular insns, though. Only easy FP constants are acceptable. */ + +static bool +rs6000_legitimate_constant_p (machine_mode mode, rtx x) +{ + if (TARGET_ELF && tls_referenced_p (x)) + return false; + + return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR) + || GET_MODE (x) == VOIDmode + || (TARGET_POWERPC64 && mode == DImode) + || easy_fp_constant (x, mode) + || easy_vector_constant (x, mode)); +} + + +/* Return TRUE iff the sequence ending in LAST sets the static chain. */ + +static bool +chain_already_loaded (rtx_insn *last) +{ + for (; last != NULL; last = PREV_INSN (last)) + { + if (NONJUMP_INSN_P (last)) + { + rtx patt = PATTERN (last); + + if (GET_CODE (patt) == SET) + { + rtx lhs = XEXP (patt, 0); + + if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM) + return true; + } + } + } + return false; +} + +/* Expand code to perform a call under the AIX or ELFv2 ABI. */ + +void +rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie) +{ + const bool direct_call_p + = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc); + rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM); + rtx toc_load = NULL_RTX; + rtx toc_restore = NULL_RTX; + rtx func_addr; + rtx abi_reg = NULL_RTX; + rtx call[4]; + int n_call; + rtx insn; + + /* Handle longcall attributes. */ + if (INTVAL (cookie) & CALL_LONG) + func_desc = rs6000_longcall_ref (func_desc); + + /* Handle indirect calls. */ + if (GET_CODE (func_desc) != SYMBOL_REF + || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc))) + { + /* Save the TOC into its reserved slot before the call, + and prepare to restore it after the call. */ + rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT); + rtx stack_toc_mem = gen_frame_mem (Pmode, + gen_rtx_PLUS (Pmode, stack_ptr, + stack_toc_offset)); + rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, stack_toc_offset), + UNSPEC_TOCSLOT); + toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec); + + /* Can we optimize saving the TOC in the prologue or + do we need to do it at every call? */ + if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca) + cfun->machine->save_toc_in_prologue = true; + else + { + MEM_VOLATILE_P (stack_toc_mem) = 1; + emit_move_insn (stack_toc_mem, toc_reg); + } + + if (DEFAULT_ABI == ABI_ELFv2) + { + /* A function pointer in the ELFv2 ABI is just a plain address, but + the ABI requires it to be loaded into r12 before the call. */ + func_addr = gen_rtx_REG (Pmode, 12); + emit_move_insn (func_addr, func_desc); + abi_reg = func_addr; + } + else + { + /* A function pointer under AIX is a pointer to a data area whose + first word contains the actual address of the function, whose + second word contains a pointer to its TOC, and whose third word + contains a value to place in the static chain register (r11). + Note that if we load the static chain, our "trampoline" need + not have any executable code. */ + + /* Load up address of the actual function. */ + func_desc = force_reg (Pmode, func_desc); + func_addr = gen_reg_rtx (Pmode); + emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc)); + + /* Prepare to load the TOC of the called function. Note that the + TOC load must happen immediately before the actual call so + that unwinding the TOC registers works correctly. See the + comment in frob_update_context. */ + rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode)); + rtx func_toc_mem = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, func_desc, + func_toc_offset)); + toc_load = gen_rtx_USE (VOIDmode, func_toc_mem); + + /* If we have a static chain, load it up. But, if the call was + originally direct, the 3rd word has not been written since no + trampoline has been built, so we ought not to load it, lest we + override a static chain value. */ + if (!direct_call_p + && TARGET_POINTERS_TO_NESTED_FUNCTIONS + && !chain_already_loaded (get_current_sequence ()->next->last)) + { + rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); + rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode)); + rtx func_sc_mem = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, func_desc, + func_sc_offset)); + emit_move_insn (sc_reg, func_sc_mem); + abi_reg = sc_reg; + } + } + } + else + { + /* Direct calls use the TOC: for local calls, the callee will + assume the TOC register is set; for non-local calls, the + PLT stub needs the TOC register. */ + abi_reg = toc_reg; + func_addr = func_desc; + } + + /* Create the call. */ + call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag); + if (value != NULL_RTX) + call[0] = gen_rtx_SET (value, call[0]); + n_call = 1; + + if (toc_load) + call[n_call++] = toc_load; + if (toc_restore) + call[n_call++] = toc_restore; + + call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); + + insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call)); + insn = emit_call_insn (insn); + + /* Mention all registers defined by the ABI to hold information + as uses in CALL_INSN_FUNCTION_USAGE. */ + if (abi_reg) + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); +} + +/* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */ + +void +rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie) +{ + rtx call[2]; + rtx insn; + + gcc_assert (INTVAL (cookie) == 0); + + /* Create the call. */ + call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag); + if (value != NULL_RTX) + call[0] = gen_rtx_SET (value, call[0]); + + call[1] = simple_return_rtx; + + insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call)); + insn = emit_call_insn (insn); + + /* Note use of the TOC register. */ + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM)); +} + +/* Return whether we need to always update the saved TOC pointer when we update + the stack pointer. */ + +static bool +rs6000_save_toc_in_prologue_p (void) +{ + return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue); +} + +#ifdef HAVE_GAS_HIDDEN +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + +/* Fills in the label name that should be used for a 476 link stack thunk. */ + +void +get_ppc476_thunk_name (char name[32]) +{ + gcc_assert (TARGET_LINK_STACK); + + if (USE_HIDDEN_LINKONCE) + sprintf (name, "__ppc476.get_thunk"); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); +} + +/* This function emits the simple thunk routine that is used to preserve + the link stack on the 476 cpu. */ + +static void rs6000_code_end (void) ATTRIBUTE_UNUSED; +static void +rs6000_code_end (void) +{ + char name[32]; + tree decl; + + if (!TARGET_LINK_STACK) + return; + + get_ppc476_thunk_name (name); + + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), + build_function_type_list (void_type_node, NULL_TREE)); + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, + NULL_TREE, void_type_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; + +#if RS6000_WEAK + if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) + { + cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); + targetm.asm_out.unique_section (decl, 0); + switch_to_section (get_named_section (decl, NULL, 0)); + DECL_WEAK (decl) = 1; + ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); + targetm.asm_out.globalize_label (asm_out_file, name); + targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); + ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); + } + else +#endif + { + switch_to_section (text_section); + ASM_OUTPUT_LABEL (asm_out_file, name); + } + + DECL_INITIAL (decl) = make_node (BLOCK); + current_function_decl = decl; + allocate_struct_function (decl, false); + init_function_start (decl); + first_function_block_is_cold = false; + /* Make sure unwind info is emitted for the thunk if needed. */ + final_start_function (emit_barrier (), asm_out_file, 1); + + fputs ("\tblr\n", asm_out_file); + + final_end_function (); + init_insn_lengths (); + free_after_compilation (cfun); + set_cfun (NULL); + current_function_decl = NULL; +} + +/* Add r30 to hard reg set if the prologue sets it up and it is not + pic_offset_table_rtx. */ + +static void +rs6000_set_up_by_prologue (struct hard_reg_set_container *set) +{ + if (!TARGET_SINGLE_PIC_BASE + && TARGET_TOC + && TARGET_MINIMAL_TOC + && !constant_pool_empty_p ()) + add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); + if (cfun->machine->split_stack_argp_used) + add_to_hard_reg_set (&set->set, Pmode, 12); +} + + +/* Helper function for rs6000_split_logical to emit a logical instruction after + spliting the operation to single GPR registers. + + DEST is the destination register. + OP1 and OP2 are the input source registers. + CODE is the base operation (AND, IOR, XOR, NOT). + MODE is the machine mode. + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ + +static void +rs6000_split_logical_inner (rtx dest, + rtx op1, + rtx op2, + enum rtx_code code, + machine_mode mode, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) +{ + rtx bool_rtx; + + /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ + if (op2 && GET_CODE (op2) == CONST_INT + && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) + && !complement_final_p && !complement_op1_p && !complement_op2_p) + { + HOST_WIDE_INT mask = GET_MODE_MASK (mode); + HOST_WIDE_INT value = INTVAL (op2) & mask; + + /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ + if (code == AND) + { + if (value == 0) + { + emit_insn (gen_rtx_SET (dest, const0_rtx)); + return; + } + + else if (value == mask) + { + if (!rtx_equal_p (dest, op1)) + emit_insn (gen_rtx_SET (dest, op1)); + return; + } + } + + /* Optimize IOR/XOR of 0 to be a simple move. Split large operations + into separate ORI/ORIS or XORI/XORIS instrucitons. */ + else if (code == IOR || code == XOR) + { + if (value == 0) + { + if (!rtx_equal_p (dest, op1)) + emit_insn (gen_rtx_SET (dest, op1)); + return; + } + } + } + + if (code == AND && mode == SImode + && !complement_final_p && !complement_op1_p && !complement_op2_p) + { + emit_insn (gen_andsi3 (dest, op1, op2)); + return; + } + + if (complement_op1_p) + op1 = gen_rtx_NOT (mode, op1); + + if (complement_op2_p) + op2 = gen_rtx_NOT (mode, op2); + + /* For canonical RTL, if only one arm is inverted it is the first. */ + if (!complement_op1_p && complement_op2_p) + std::swap (op1, op2); + + bool_rtx = ((code == NOT) + ? gen_rtx_NOT (mode, op1) + : gen_rtx_fmt_ee (code, mode, op1, op2)); + + if (complement_final_p) + bool_rtx = gen_rtx_NOT (mode, bool_rtx); + + emit_insn (gen_rtx_SET (dest, bool_rtx)); +} + +/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These + operations are split immediately during RTL generation to allow for more + optimizations of the AND/IOR/XOR. + + OPERANDS is an array containing the destination and two input operands. + CODE is the base operation (AND, IOR, XOR, NOT). + MODE is the machine mode. + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. + CLOBBER_REG is either NULL or a scratch register of type CC to allow + formation of the AND instructions. */ + +static void +rs6000_split_logical_di (rtx operands[3], + enum rtx_code code, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) +{ + const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); + const HOST_WIDE_INT upper_32bits = ~ lower_32bits; + const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); + enum hi_lo { hi = 0, lo = 1 }; + rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; + size_t i; + + op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); + op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); + op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); + op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); + + if (code == NOT) + op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; + else + { + if (GET_CODE (operands[2]) != CONST_INT) + { + op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); + op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); + } + else + { + HOST_WIDE_INT value = INTVAL (operands[2]); + HOST_WIDE_INT value_hi_lo[2]; + + gcc_assert (!complement_final_p); + gcc_assert (!complement_op1_p); + gcc_assert (!complement_op2_p); + + value_hi_lo[hi] = value >> 32; + value_hi_lo[lo] = value & lower_32bits; + + for (i = 0; i < 2; i++) + { + HOST_WIDE_INT sub_value = value_hi_lo[i]; + + if (sub_value & sign_bit) + sub_value |= upper_32bits; + + op2_hi_lo[i] = GEN_INT (sub_value); + + /* If this is an AND instruction, check to see if we need to load + the value in a register. */ + if (code == AND && sub_value != -1 && sub_value != 0 + && !and_operand (op2_hi_lo[i], SImode)) + op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); + } + } + } + + for (i = 0; i < 2; i++) + { + /* Split large IOR/XOR operations. */ + if ((code == IOR || code == XOR) + && GET_CODE (op2_hi_lo[i]) == CONST_INT + && !complement_final_p + && !complement_op1_p + && !complement_op2_p + && !logical_const_operand (op2_hi_lo[i], SImode)) + { + HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); + HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); + HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); + rtx tmp = gen_reg_rtx (SImode); + + /* Make sure the constant is sign extended. */ + if ((hi_16bits & sign_bit) != 0) + hi_16bits |= upper_32bits; + + rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), + code, SImode, false, false, false); + + rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), + code, SImode, false, false, false); + } + else + rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], + code, SImode, complement_final_p, + complement_op1_p, complement_op2_p); + } + + return; +} + +/* Split the insns that make up boolean operations operating on multiple GPR + registers. The boolean MD patterns ensure that the inputs either are + exactly the same as the output registers, or there is no overlap. + + OPERANDS is an array containing the destination and two input operands. + CODE is the base operation (AND, IOR, XOR, NOT). + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ + +void +rs6000_split_logical (rtx operands[3], + enum rtx_code code, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) +{ + machine_mode mode = GET_MODE (operands[0]); + machine_mode sub_mode; + rtx op0, op1, op2; + int sub_size, regno0, regno1, nregs, i; + + /* If this is DImode, use the specialized version that can run before + register allocation. */ + if (mode == DImode && !TARGET_POWERPC64) + { + rs6000_split_logical_di (operands, code, complement_final_p, + complement_op1_p, complement_op2_p); + return; + } + + op0 = operands[0]; + op1 = operands[1]; + op2 = (code == NOT) ? NULL_RTX : operands[2]; + sub_mode = (TARGET_POWERPC64) ? DImode : SImode; + sub_size = GET_MODE_SIZE (sub_mode); + regno0 = REGNO (op0); + regno1 = REGNO (op1); + + gcc_assert (reload_completed); + gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); + gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); + + nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; + gcc_assert (nregs > 1); + + if (op2 && REG_P (op2)) + gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); + + for (i = 0; i < nregs; i++) + { + int offset = i * sub_size; + rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); + rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); + rtx sub_op2 = ((code == NOT) + ? NULL_RTX + : simplify_subreg (sub_mode, op2, mode, offset)); + + rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, + complement_final_p, complement_op1_p, + complement_op2_p); + } + + return; +} + + +/* Return true if the peephole2 can combine a load involving a combination of + an addis instruction and a load with an offset that can be fused together on + a power8. */ + +bool +fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */ + rtx addis_value, /* addis value. */ + rtx target, /* target register that is loaded. */ + rtx mem) /* bottom part of the memory addr. */ +{ + rtx addr; + rtx base_reg; + + /* Validate arguments. */ + if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) + return false; + + if (!base_reg_operand (target, GET_MODE (target))) + return false; + + if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) + return false; + + /* Allow sign/zero extension. */ + if (GET_CODE (mem) == ZERO_EXTEND + || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) + mem = XEXP (mem, 0); + + if (!MEM_P (mem)) + return false; + + if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) + return false; + + addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return false; + + /* Validate that the register used to load the high value is either the + register being loaded, or we can safely replace its use. + + This function is only called from the peephole2 pass and we assume that + there are 2 instructions in the peephole (addis and load), so we want to + check if the target register was not used in the memory address and the + register to hold the addis result is dead after the peephole. */ + if (REGNO (addis_reg) != REGNO (target)) + { + if (reg_mentioned_p (target, mem)) + return false; + + if (!peep2_reg_dead_p (2, addis_reg)) + return false; + + /* If the target register being loaded is the stack pointer, we must + avoid loading any other value into it, even temporarily. */ + if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM) + return false; + } + + base_reg = XEXP (addr, 0); + return REGNO (addis_reg) == REGNO (base_reg); +} + +/* During the peephole2 pass, adjust and expand the insns for a load fusion + sequence. We adjust the addis register to use the target register. If the + load sign extends, we adjust the code to do the zero extending load, and an + explicit sign extension later since the fusion only covers zero extending + loads. + + The operands are: + operands[0] register set with addis (to be replaced with target) + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. */ + +void +expand_fusion_gpr_load (rtx *operands) +{ + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx orig_mem = operands[3]; + rtx new_addr, new_mem, orig_addr, offset; + enum rtx_code plus_or_lo_sum; + machine_mode target_mode = GET_MODE (target); + machine_mode extend_mode = target_mode; + machine_mode ptr_mode = Pmode; + enum rtx_code extend = UNKNOWN; + + if (GET_CODE (orig_mem) == ZERO_EXTEND + || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) + { + extend = GET_CODE (orig_mem); + orig_mem = XEXP (orig_mem, 0); + target_mode = GET_MODE (orig_mem); + } + + gcc_assert (MEM_P (orig_mem)); + + orig_addr = XEXP (orig_mem, 0); + plus_or_lo_sum = GET_CODE (orig_addr); + gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); + + offset = XEXP (orig_addr, 1); + new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); + new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); + + if (extend != UNKNOWN) + new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); + + new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), + UNSPEC_FUSION_GPR); + emit_insn (gen_rtx_SET (target, new_mem)); + + if (extend == SIGN_EXTEND) + { + int sub_off = ((BYTES_BIG_ENDIAN) + ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode) + : 0); + rtx sign_reg + = simplify_subreg (target_mode, target, extend_mode, sub_off); + + emit_insn (gen_rtx_SET (target, + gen_rtx_SIGN_EXTEND (extend_mode, sign_reg))); + } + + return; +} + +/* Emit the addis instruction that will be part of a fused instruction + sequence. */ + +void +emit_fusion_addis (rtx target, rtx addis_value, const char *comment, + const char *mode_name) +{ + rtx fuse_ops[10]; + char insn_template[80]; + const char *addis_str = NULL; + const char *comment_str = ASM_COMMENT_START; + + if (*comment_str == ' ') + comment_str++; + + /* Emit the addis instruction. */ + fuse_ops[0] = target; + if (satisfies_constraint_L (addis_value)) + { + fuse_ops[1] = addis_value; + addis_str = "lis %0,%v1"; + } + + else if (GET_CODE (addis_value) == PLUS) + { + rtx op0 = XEXP (addis_value, 0); + rtx op1 = XEXP (addis_value, 1); + + if (REG_P (op0) && CONST_INT_P (op1) + && satisfies_constraint_L (op1)) + { + fuse_ops[1] = op0; + fuse_ops[2] = op1; + addis_str = "addis %0,%1,%v2"; + } + } + + else if (GET_CODE (addis_value) == HIGH) + { + rtx value = XEXP (addis_value, 0); + if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) + { + fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */ + fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */ + if (TARGET_ELF) + addis_str = "addis %0,%2,%1@toc@ha"; + + else if (TARGET_XCOFF) + addis_str = "addis %0,%1@u(%2)"; + + else + gcc_unreachable (); + } + + else if (GET_CODE (value) == PLUS) + { + rtx op0 = XEXP (value, 0); + rtx op1 = XEXP (value, 1); + + if (GET_CODE (op0) == UNSPEC + && XINT (op0, 1) == UNSPEC_TOCREL + && CONST_INT_P (op1)) + { + fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */ + fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */ + fuse_ops[3] = op1; + if (TARGET_ELF) + addis_str = "addis %0,%2,%1+%3@toc@ha"; + + else if (TARGET_XCOFF) + addis_str = "addis %0,%1+%3@u(%2)"; + + else + gcc_unreachable (); + } + } + + else if (satisfies_constraint_L (value)) + { + fuse_ops[1] = value; + addis_str = "lis %0,%v1"; + } + + else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) + { + fuse_ops[1] = value; + addis_str = "lis %0,%1@ha"; + } + } + + if (!addis_str) + fatal_insn ("Could not generate addis value for fusion", addis_value); + + sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str, + comment, mode_name); + output_asm_insn (insn_template, fuse_ops); +} + +/* Emit a D-form load or store instruction that is the second instruction + of a fusion sequence. */ + +void +emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset, + const char *insn_str) +{ + rtx fuse_ops[10]; + char insn_template[80]; + + fuse_ops[0] = load_store_reg; + fuse_ops[1] = addis_reg; + + if (CONST_INT_P (offset) && satisfies_constraint_I (offset)) + { + sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str); + fuse_ops[2] = offset; + output_asm_insn (insn_template, fuse_ops); + } + + else if (GET_CODE (offset) == UNSPEC + && XINT (offset, 1) == UNSPEC_TOCREL) + { + if (TARGET_ELF) + sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str); + + else if (TARGET_XCOFF) + sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); + + else + gcc_unreachable (); + + fuse_ops[2] = XVECEXP (offset, 0, 0); + output_asm_insn (insn_template, fuse_ops); + } + + else if (GET_CODE (offset) == PLUS + && GET_CODE (XEXP (offset, 0)) == UNSPEC + && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL + && CONST_INT_P (XEXP (offset, 1))) + { + rtx tocrel_unspec = XEXP (offset, 0); + if (TARGET_ELF) + sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str); + + else if (TARGET_XCOFF) + sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str); + + else + gcc_unreachable (); + + fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0); + fuse_ops[3] = XEXP (offset, 1); + output_asm_insn (insn_template, fuse_ops); + } + + else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset)) + { + sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); + + fuse_ops[2] = offset; + output_asm_insn (insn_template, fuse_ops); + } + + else + fatal_insn ("Unable to generate load/store offset for fusion", offset); + + return; +} + +/* Wrap a TOC address that can be fused to indicate that special fusion + processing is needed. */ + +rtx +fusion_wrap_memory_address (rtx old_mem) +{ + rtx old_addr = XEXP (old_mem, 0); + rtvec v = gen_rtvec (1, old_addr); + rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS); + return replace_equiv_address_nv (old_mem, new_addr, false); +} + +/* Given an address, convert it into the addis and load offset parts. Addresses + created during the peephole2 process look like: + (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL)) + (unspec [(...)] UNSPEC_TOCREL)) + + Addresses created via toc fusion look like: + (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */ + +static void +fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo) +{ + rtx hi, lo; + + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS) + { + lo = XVECEXP (addr, 0, 0); + hi = gen_rtx_HIGH (Pmode, lo); + } + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) + { + hi = XEXP (addr, 0); + lo = XEXP (addr, 1); + } + else + gcc_unreachable (); + + *p_hi = hi; + *p_lo = lo; +} + +/* Return a string to fuse an addis instruction with a gpr load to the same + register that we loaded up the addis instruction. The address that is used + is the logical address that was formed during peephole2: + (lo_sum (high) (low-part)) + + Or the address is the TOC address that is wrapped before register allocation: + (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS) + + The code is complicated, so we call output_asm_insn directly, and just + return "". */ + +const char * +emit_fusion_gpr_load (rtx target, rtx mem) +{ + rtx addis_value; + rtx addr; + rtx load_offset; + const char *load_str = NULL; + const char *mode_name = NULL; + machine_mode mode; + + if (GET_CODE (mem) == ZERO_EXTEND) + mem = XEXP (mem, 0); + + gcc_assert (REG_P (target) && MEM_P (mem)); + + addr = XEXP (mem, 0); + fusion_split_address (addr, &addis_value, &load_offset); + + /* Now emit the load instruction to the same register. */ + mode = GET_MODE (mem); + switch (mode) + { + case QImode: + mode_name = "char"; + load_str = "lbz"; + break; + + case HImode: + mode_name = "short"; + load_str = "lhz"; + break; + + case SImode: + case SFmode: + mode_name = (mode == SFmode) ? "float" : "int"; + load_str = "lwz"; + break; + + case DImode: + case DFmode: + gcc_assert (TARGET_POWERPC64); + mode_name = (mode == DFmode) ? "double" : "long"; + load_str = "ld"; + break; + + default: + fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem)); + } + + /* Emit the addis instruction. */ + emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name); + + /* Emit the D-form load instruction. */ + emit_fusion_load_store (target, target, load_offset, load_str); + + return ""; +} + + +/* Return true if the peephole2 can combine a load/store involving a + combination of an addis instruction and the memory operation. This was + added to the ISA 3.0 (power9) hardware. */ + +bool +fusion_p9_p (rtx addis_reg, /* register set via addis. */ + rtx addis_value, /* addis value. */ + rtx dest, /* destination (memory or register). */ + rtx src) /* source (register or memory). */ +{ + rtx addr, mem, offset; + enum machine_mode mode = GET_MODE (src); + + /* Validate arguments. */ + if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) + return false; + + if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) + return false; + + /* Ignore extend operations that are part of the load. */ + if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND) + src = XEXP (src, 0); + + /* Test for memory<-register or register<-memory. */ + if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode)) + { + if (!MEM_P (dest)) + return false; + + mem = dest; + } + + else if (MEM_P (src)) + { + if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode)) + return false; + + mem = src; + } + + else + return false; + + addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ + if (GET_CODE (addr) == PLUS) + { + if (!rtx_equal_p (addis_reg, XEXP (addr, 0))) + return false; + + return satisfies_constraint_I (XEXP (addr, 1)); + } + + else if (GET_CODE (addr) == LO_SUM) + { + if (!rtx_equal_p (addis_reg, XEXP (addr, 0))) + return false; + + offset = XEXP (addr, 1); + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); + + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); + } + + return false; +} + +/* During the peephole2 pass, adjust and expand the insns for an extended fusion + load sequence. + + The operands are: + operands[0] register set with addis + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. + + This is similar to the fusion introduced with power8, except it scales to + both loads/stores and does not require the result register to be the same as + the base register. At the moment, we only do this if register set with addis + is dead. */ + +void +expand_fusion_p9_load (rtx *operands) +{ + rtx tmp_reg = operands[0]; + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx orig_mem = operands[3]; + rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn; + enum rtx_code plus_or_lo_sum; + machine_mode target_mode = GET_MODE (target); + machine_mode extend_mode = target_mode; + machine_mode ptr_mode = Pmode; + enum rtx_code extend = UNKNOWN; + + if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND) + { + extend = GET_CODE (orig_mem); + orig_mem = XEXP (orig_mem, 0); + target_mode = GET_MODE (orig_mem); + } + + gcc_assert (MEM_P (orig_mem)); + + orig_addr = XEXP (orig_mem, 0); + plus_or_lo_sum = GET_CODE (orig_addr); + gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); + + offset = XEXP (orig_addr, 1); + new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); + new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); + + if (extend != UNKNOWN) + new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem); + + new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), + UNSPEC_FUSION_P9); + + set = gen_rtx_SET (target, new_mem); + clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg); + insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)); + emit_insn (insn); + + return; +} + +/* During the peephole2 pass, adjust and expand the insns for an extended fusion + store sequence. + + The operands are: + operands[0] register set with addis + operands[1] value set via addis + operands[2] target D-form memory being stored to + operands[3] register being stored + + This is similar to the fusion introduced with power8, except it scales to + both loads/stores and does not require the result register to be the same as + the base register. At the moment, we only do this if register set with addis + is dead. */ + +void +expand_fusion_p9_store (rtx *operands) +{ + rtx tmp_reg = operands[0]; + rtx addis_value = operands[1]; + rtx orig_mem = operands[2]; + rtx src = operands[3]; + rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src; + enum rtx_code plus_or_lo_sum; + machine_mode target_mode = GET_MODE (orig_mem); + machine_mode ptr_mode = Pmode; + + gcc_assert (MEM_P (orig_mem)); + + orig_addr = XEXP (orig_mem, 0); + plus_or_lo_sum = GET_CODE (orig_addr); + gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); + + offset = XEXP (orig_addr, 1); + new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); + new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); + + new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src), + UNSPEC_FUSION_P9); + + set = gen_rtx_SET (new_mem, new_src); + clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg); + insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)); + emit_insn (insn); + + return; +} + +/* Return a string to fuse an addis instruction with a load using extended + fusion. The address that is used is the logical address that was formed + during peephole2: (lo_sum (high) (low-part)) + + The code is complicated, so we call output_asm_insn directly, and just + return "". */ + +const char * +emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg) +{ + enum machine_mode mode = GET_MODE (reg); + rtx hi; + rtx lo; + rtx addr; + const char *load_string; + int r; + + if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND) + { + mem = XEXP (mem, 0); + mode = GET_MODE (mem); + } + + if (GET_CODE (reg) == SUBREG) + { + gcc_assert (SUBREG_BYTE (reg) == 0); + reg = SUBREG_REG (reg); + } + + if (!REG_P (reg)) + fatal_insn ("emit_fusion_p9_load, bad reg #1", reg); + + r = REGNO (reg); + if (FP_REGNO_P (r)) + { + if (mode == SFmode) + load_string = "lfs"; + else if (mode == DFmode || mode == DImode) + load_string = "lfd"; + else + gcc_unreachable (); + } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + load_string = "lxssp"; + else if (mode == DFmode || mode == DImode) + load_string = "lxsd"; + else + gcc_unreachable (); + } + else if (INT_REGNO_P (r)) + { + switch (mode) + { + case QImode: + load_string = "lbz"; + break; + case HImode: + load_string = "lhz"; + break; + case SImode: + case SFmode: + load_string = "lwz"; + break; + case DImode: + case DFmode: + if (!TARGET_POWERPC64) + gcc_unreachable (); + load_string = "ld"; + break; + default: + gcc_unreachable (); + } + } + else + fatal_insn ("emit_fusion_p9_load, bad reg #2", reg); + + if (!MEM_P (mem)) + fatal_insn ("emit_fusion_p9_load not MEM", mem); + + addr = XEXP (mem, 0); + fusion_split_address (addr, &hi, &lo); + + /* Emit the addis instruction. */ + emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode)); + + /* Emit the D-form load instruction. */ + emit_fusion_load_store (reg, tmp_reg, lo, load_string); + + return ""; +} + +/* Return a string to fuse an addis instruction with a store using extended + fusion. The address that is used is the logical address that was formed + during peephole2: (lo_sum (high) (low-part)) + + The code is complicated, so we call output_asm_insn directly, and just + return "". */ + +const char * +emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg) +{ + enum machine_mode mode = GET_MODE (reg); + rtx hi; + rtx lo; + rtx addr; + const char *store_string; + int r; + + if (GET_CODE (reg) == SUBREG) + { + gcc_assert (SUBREG_BYTE (reg) == 0); + reg = SUBREG_REG (reg); + } + + if (!REG_P (reg)) + fatal_insn ("emit_fusion_p9_store, bad reg #1", reg); + + r = REGNO (reg); + if (FP_REGNO_P (r)) + { + if (mode == SFmode) + store_string = "stfs"; + else if (mode == DFmode) + store_string = "stfd"; + else + gcc_unreachable (); + } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + store_string = "stxssp"; + else if (mode == DFmode || mode == DImode) + store_string = "stxsd"; + else + gcc_unreachable (); + } + else if (INT_REGNO_P (r)) + { + switch (mode) + { + case QImode: + store_string = "stb"; + break; + case HImode: + store_string = "sth"; + break; + case SImode: + case SFmode: + store_string = "stw"; + break; + case DImode: + case DFmode: + if (!TARGET_POWERPC64) + gcc_unreachable (); + store_string = "std"; + break; + default: + gcc_unreachable (); + } + } + else + fatal_insn ("emit_fusion_p9_store, bad reg #2", reg); + + if (!MEM_P (mem)) + fatal_insn ("emit_fusion_p9_store not MEM", mem); + + addr = XEXP (mem, 0); + fusion_split_address (addr, &hi, &lo); + + /* Emit the addis instruction. */ + emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode)); + + /* Emit the D-form load instruction. */ + emit_fusion_load_store (reg, tmp_reg, lo, store_string); + + return ""; +} + + +/* Analyze vector computations and remove unnecessary doubleword + swaps (xxswapdi instructions). This pass is performed only + for little-endian VSX code generation. + + For this specific case, loads and stores of 4x32 and 2x64 vectors + are inefficient. These are implemented using the lvx2dx and + stvx2dx instructions, which invert the order of doublewords in + a vector register. Thus the code generation inserts an xxswapdi + after each such load, and prior to each such store. (For spill + code after register assignment, an additional xxswapdi is inserted + following each store in order to return a hard register to its + unpermuted value.) + + The extra xxswapdi instructions reduce performance. This can be + particularly bad for vectorized code. The purpose of this pass + is to reduce the number of xxswapdi instructions required for + correctness. + + The primary insight is that much code that operates on vectors + does not care about the relative order of elements in a register, + so long as the correct memory order is preserved. If we have + a computation where all input values are provided by lvxd2x/xxswapdi + sequences, all outputs are stored using xxswapdi/stvxd2x sequences, + and all intermediate computations are pure SIMD (independent of + element order), then all the xxswapdi's associated with the loads + and stores may be removed. + + This pass uses some of the infrastructure and logical ideas from + the "web" pass in web.c. We create maximal webs of computations + fitting the description above using union-find. Each such web is + then optimized by removing its unnecessary xxswapdi instructions. + + The pass is placed prior to global optimization so that we can + perform the optimization in the safest and simplest way possible; + that is, by replacing each xxswapdi insn with a register copy insn. + Subsequent forward propagation will remove copies where possible. + + There are some operations sensitive to element order for which we + can still allow the operation, provided we modify those operations. + These include CONST_VECTORs, for which we must swap the first and + second halves of the constant vector; and SUBREGs, for which we + must adjust the byte offset to account for the swapped doublewords. + A remaining opportunity would be non-immediate-form splats, for + which we should adjust the selected lane of the input. We should + also make code generation adjustments for sum-across operations, + since this is a common vectorizer reduction. + + Because we run prior to the first split, we can see loads and stores + here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla + vector loads and stores that have not yet been split into a permuting + load/store and a swap. (One way this can happen is with a builtin + call to vec_vsx_{ld,st}.) We can handle these as well, but rather + than deleting a swap, we convert the load/store into a permuting + load/store (which effectively removes the swap). */ + +/* Notes on Permutes + + We do not currently handle computations that contain permutes. There + is a general transformation that can be performed correctly, but it + may introduce more expensive code than it replaces. To handle these + would require a cost model to determine when to perform the optimization. + This commentary records how this could be done if desired. + + The most general permute is something like this (example for V16QI): + + (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) + (parallel [(const_int a0) (const_int a1) + ... + (const_int a14) (const_int a15)])) + + where a0,...,a15 are in [0,31] and select elements from op1 and op2 + to produce in the result. + + Regardless of mode, we can convert the PARALLEL to a mask of 16 + byte-element selectors. Let's call this M, with M[i] representing + the ith byte-element selector value. Then if we swap doublewords + throughout the computation, we can get correct behavior by replacing + M with M' as follows: + + M'[i] = { (M[i]+8)%16 : M[i] in [0,15] + { ((M[i]+8)%16)+16 : M[i] in [16,31] + + This seems promising at first, since we are just replacing one mask + with another. But certain masks are preferable to others. If M + is a mask that matches a vmrghh pattern, for example, M' certainly + will not. Instead of a single vmrghh, we would generate a load of + M' and a vperm. So we would need to know how many xxswapd's we can + remove as a result of this transformation to determine if it's + profitable; and preferably the logic would need to be aware of all + the special preferable masks. + + Another form of permute is an UNSPEC_VPERM, in which the mask is + already in a register. In some cases, this mask may be a constant + that we can discover with ud-chains, in which case the above + transformation is ok. However, the common usage here is for the + mask to be produced by an UNSPEC_LVSL, in which case the mask + cannot be known at compile time. In such a case we would have to + generate several instructions to compute M' as above at run time, + and a cost model is needed again. + + However, when the mask M for an UNSPEC_VPERM is loaded from the + constant pool, we can replace M with M' as above at no cost + beyond adding a constant pool entry. */ + +/* This is based on the union-find logic in web.c. web_entry_base is + defined in df.h. */ +class swap_web_entry : public web_entry_base +{ + public: + /* Pointer to the insn. */ + rtx_insn *insn; + /* Set if insn contains a mention of a vector register. All other + fields are undefined if this field is unset. */ + unsigned int is_relevant : 1; + /* Set if insn is a load. */ + unsigned int is_load : 1; + /* Set if insn is a store. */ + unsigned int is_store : 1; + /* Set if insn is a doubleword swap. This can either be a register swap + or a permuting load or store (test is_load and is_store for this). */ + unsigned int is_swap : 1; + /* Set if the insn has a live-in use of a parameter register. */ + unsigned int is_live_in : 1; + /* Set if the insn has a live-out def of a return register. */ + unsigned int is_live_out : 1; + /* Set if the insn contains a subreg reference of a vector register. */ + unsigned int contains_subreg : 1; + /* Set if the insn contains a 128-bit integer operand. */ + unsigned int is_128_int : 1; + /* Set if this is a call-insn. */ + unsigned int is_call : 1; + /* Set if this insn does not perform a vector operation for which + element order matters, or if we know how to fix it up if it does. + Undefined if is_swap is set. */ + unsigned int is_swappable : 1; + /* A nonzero value indicates what kind of special handling for this + insn is required if doublewords are swapped. Undefined if + is_swappable is not set. */ + unsigned int special_handling : 4; + /* Set if the web represented by this entry cannot be optimized. */ + unsigned int web_not_optimizable : 1; + /* Set if this insn should be deleted. */ + unsigned int will_delete : 1; +}; + +enum special_handling_values { + SH_NONE = 0, + SH_CONST_VECTOR, + SH_SUBREG, + SH_NOSWAP_LD, + SH_NOSWAP_ST, + SH_EXTRACT, + SH_SPLAT, + SH_XXPERMDI, + SH_CONCAT, + SH_VPERM +}; + +/* Union INSN with all insns containing definitions that reach USE. + Detect whether USE is live-in to the current function. */ +static void +union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) +{ + struct df_link *link = DF_REF_CHAIN (use); + + if (!link) + insn_entry[INSN_UID (insn)].is_live_in = 1; + + while (link) + { + if (DF_REF_IS_ARTIFICIAL (link->ref)) + insn_entry[INSN_UID (insn)].is_live_in = 1; + + if (DF_REF_INSN_INFO (link->ref)) + { + rtx def_insn = DF_REF_INSN (link->ref); + (void)unionfind_union (insn_entry + INSN_UID (insn), + insn_entry + INSN_UID (def_insn)); + } + + link = link->next; + } +} + +/* Union INSN with all insns containing uses reached from DEF. + Detect whether DEF is live-out from the current function. */ +static void +union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) +{ + struct df_link *link = DF_REF_CHAIN (def); + + if (!link) + insn_entry[INSN_UID (insn)].is_live_out = 1; + + while (link) + { + /* This could be an eh use or some other artificial use; + we treat these all the same (killing the optimization). */ + if (DF_REF_IS_ARTIFICIAL (link->ref)) + insn_entry[INSN_UID (insn)].is_live_out = 1; + + if (DF_REF_INSN_INFO (link->ref)) + { + rtx use_insn = DF_REF_INSN (link->ref); + (void)unionfind_union (insn_entry + INSN_UID (insn), + insn_entry + INSN_UID (use_insn)); + } + + link = link->next; + } +} + +/* Return 1 iff INSN is a load insn, including permuting loads that + represent an lvxd2x instruction; else return 0. */ +static unsigned int +insn_is_load_p (rtx insn) +{ + rtx body = PATTERN (insn); + + if (GET_CODE (body) == SET) + { + if (GET_CODE (SET_SRC (body)) == MEM) + return 1; + + if (GET_CODE (SET_SRC (body)) == VEC_SELECT + && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM) + return 1; + + return 0; + } + + if (GET_CODE (body) != PARALLEL) + return 0; + + rtx set = XVECEXP (body, 0, 0); + + if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM) + return 1; + + return 0; +} + +/* Return 1 iff INSN is a store insn, including permuting stores that + represent an stvxd2x instruction; else return 0. */ +static unsigned int +insn_is_store_p (rtx insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM) + return 1; + if (GET_CODE (body) != PARALLEL) + return 0; + rtx set = XVECEXP (body, 0, 0); + if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM) + return 1; + return 0; +} + +/* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, + a permuting load, or a permuting store. */ +static unsigned int +insn_is_swap_p (rtx insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) != SET) + return 0; + rtx rhs = SET_SRC (body); + if (GET_CODE (rhs) != VEC_SELECT) + return 0; + rtx parallel = XEXP (rhs, 1); + if (GET_CODE (parallel) != PARALLEL) + return 0; + unsigned int len = XVECLEN (parallel, 0); + if (len != 2 && len != 4 && len != 8 && len != 16) + return 0; + for (unsigned int i = 0; i < len / 2; ++i) + { + rtx op = XVECEXP (parallel, 0, i); + if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i) + return 0; + } + for (unsigned int i = len / 2; i < len; ++i) + { + rtx op = XVECEXP (parallel, 0, i); + if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2) + return 0; + } + return 1; +} + +/* Return TRUE if insn is a swap fed by a load from the constant pool. */ +static bool +const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) +{ + unsigned uid = INSN_UID (insn); + if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) + return false; + + /* Find the unique use in the swap and locate its def. If the def + isn't unique, punt. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + if (!def_link || def_link->next) + return false; + + rtx def_insn = DF_REF_INSN (def_link->ref); + unsigned uid2 = INSN_UID (def_insn); + if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) + return false; + + rtx body = PATTERN (def_insn); + if (GET_CODE (body) != SET + || GET_CODE (SET_SRC (body)) != VEC_SELECT + || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM) + return false; + + rtx mem = XEXP (SET_SRC (body), 0); + rtx base_reg = XEXP (mem, 0); + + df_ref base_use; + insn_info = DF_INSN_INFO_GET (def_insn); + FOR_EACH_INSN_INFO_USE (base_use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) + continue; + + struct df_link *base_def_link = DF_REF_CHAIN (base_use); + if (!base_def_link || base_def_link->next) + return false; + + rtx tocrel_insn = DF_REF_INSN (base_def_link->ref); + rtx tocrel_body = PATTERN (tocrel_insn); + rtx base, offset; + if (GET_CODE (tocrel_body) != SET) + return false; + /* There is an extra level of indirection for small/large + code models. */ + rtx tocrel_expr = SET_SRC (tocrel_body); + if (GET_CODE (tocrel_expr) == MEM) + tocrel_expr = XEXP (tocrel_expr, 0); + if (!toc_relative_expr_p (tocrel_expr, false)) + return false; + split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); + if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base)) + return false; + } + } + return true; +} + +/* Return TRUE iff OP matches a V2DF reduction pattern. See the + definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */ +static bool +v2df_reduction_p (rtx op) +{ + if (GET_MODE (op) != V2DFmode) + return false; + + enum rtx_code code = GET_CODE (op); + if (code != PLUS && code != SMIN && code != SMAX) + return false; + + rtx concat = XEXP (op, 0); + if (GET_CODE (concat) != VEC_CONCAT) + return false; + + rtx select0 = XEXP (concat, 0); + rtx select1 = XEXP (concat, 1); + if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT) + return false; + + rtx reg0 = XEXP (select0, 0); + rtx reg1 = XEXP (select1, 0); + if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0)) + return false; + + rtx parallel0 = XEXP (select0, 1); + rtx parallel1 = XEXP (select1, 1); + if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL) + return false; + + if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx) + || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx)) + return false; + + return true; +} + +/* Return 1 iff OP is an operand that will not be affected by having + vector doublewords swapped in memory. */ +static unsigned int +rtx_is_swappable_p (rtx op, unsigned int *special) +{ + enum rtx_code code = GET_CODE (op); + int i, j; + rtx parallel; + + switch (code) + { + case LABEL_REF: + case SYMBOL_REF: + case CLOBBER: + case REG: + return 1; + + case VEC_CONCAT: + case ASM_INPUT: + case ASM_OPERANDS: + return 0; + + case CONST_VECTOR: + { + *special = SH_CONST_VECTOR; + return 1; + } + + case VEC_DUPLICATE: + /* Opportunity: If XEXP (op, 0) has the same mode as the result, + and XEXP (op, 1) is a PARALLEL with a single QImode const int, + it represents a vector splat for which we can do special + handling. */ + if (GET_CODE (XEXP (op, 0)) == CONST_INT) + return 1; + else if (REG_P (XEXP (op, 0)) + && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) + /* This catches V2DF and V2DI splat, at a minimum. */ + return 1; + else if (GET_CODE (XEXP (op, 0)) == TRUNCATE + && REG_P (XEXP (XEXP (op, 0), 0)) + && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) + /* This catches splat of a truncated value. */ + return 1; + else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) + /* If the duplicated item is from a select, defer to the select + processing to see if we can change the lane for the splat. */ + return rtx_is_swappable_p (XEXP (op, 0), special); + else + return 0; + + case VEC_SELECT: + /* A vec_extract operation is ok if we change the lane. */ + if (GET_CODE (XEXP (op, 0)) == REG + && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 1 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT) + { + *special = SH_EXTRACT; + return 1; + } + /* An XXPERMDI is ok if we adjust the lanes. Note that if the + XXPERMDI is a swap operation, it will be identified by + insn_is_swap_p and therefore we won't get here. */ + else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT + && (GET_MODE (XEXP (op, 0)) == V4DFmode + || GET_MODE (XEXP (op, 0)) == V4DImode) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 2 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT + && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT) + { + *special = SH_XXPERMDI; + return 1; + } + else if (v2df_reduction_p (op)) + return 1; + else + return 0; + + case UNSPEC: + { + /* Various operations are unsafe for this optimization, at least + without significant additional work. Permutes are obviously + problematic, as both the permute control vector and the ordering + of the target values are invalidated by doubleword swapping. + Vector pack and unpack modify the number of vector lanes. + Merge-high/low will not operate correctly on swapped operands. + Vector shifts across element boundaries are clearly uncool, + as are vector select and concatenate operations. Vector + sum-across instructions define one operand with a specific + order-dependent element, so additional fixup code would be + needed to make those work. Vector set and non-immediate-form + vector splat are element-order sensitive. A few of these + cases might be workable with special handling if required. + Adding cost modeling would be appropriate in some cases. */ + int val = XINT (op, 1); + switch (val) + { + default: + break; + case UNSPEC_VMRGH_DIRECT: + case UNSPEC_VMRGL_DIRECT: + case UNSPEC_VPACK_SIGN_SIGN_SAT: + case UNSPEC_VPACK_SIGN_UNS_SAT: + case UNSPEC_VPACK_UNS_UNS_MOD: + case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT: + case UNSPEC_VPACK_UNS_UNS_SAT: + case UNSPEC_VPERM: + case UNSPEC_VPERM_UNS: + case UNSPEC_VPERMHI: + case UNSPEC_VPERMSI: + case UNSPEC_VPKPX: + case UNSPEC_VSLDOI: + case UNSPEC_VSLO: + case UNSPEC_VSRO: + case UNSPEC_VSUM2SWS: + case UNSPEC_VSUM4S: + case UNSPEC_VSUM4UBS: + case UNSPEC_VSUMSWS: + case UNSPEC_VSUMSWS_DIRECT: + case UNSPEC_VSX_CONCAT: + case UNSPEC_VSX_SET: + case UNSPEC_VSX_SLDWI: + case UNSPEC_VUNPACK_HI_SIGN: + case UNSPEC_VUNPACK_HI_SIGN_DIRECT: + case UNSPEC_VUNPACK_LO_SIGN: + case UNSPEC_VUNPACK_LO_SIGN_DIRECT: + case UNSPEC_VUPKHPX: + case UNSPEC_VUPKHS_V4SF: + case UNSPEC_VUPKHU_V4SF: + case UNSPEC_VUPKLPX: + case UNSPEC_VUPKLS_V4SF: + case UNSPEC_VUPKLU_V4SF: + case UNSPEC_VSX_CVDPSPN: + case UNSPEC_VSX_CVSPDP: + case UNSPEC_VSX_CVSPDPN: + case UNSPEC_VSX_EXTRACT: + case UNSPEC_VSX_VSLO: + case UNSPEC_VSX_VEC_INIT: + return 0; + case UNSPEC_VSPLT_DIRECT: + case UNSPEC_VSX_XXSPLTD: + *special = SH_SPLAT; + return 1; + case UNSPEC_REDUC_PLUS: + case UNSPEC_REDUC: + return 1; + } + } + + default: + break; + } + + const char *fmt = GET_RTX_FORMAT (code); + int ok = 1; + + for (i = 0; i < GET_RTX_LENGTH (code); ++i) + if (fmt[i] == 'e' || fmt[i] == 'u') + { + unsigned int special_op = SH_NONE; + ok &= rtx_is_swappable_p (XEXP (op, i), &special_op); + if (special_op == SH_NONE) + continue; + /* Ensure we never have two kinds of special handling + for the same insn. */ + if (*special != SH_NONE && *special != special_op) + return 0; + *special = special_op; + } + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (op, i); ++j) + { + unsigned int special_op = SH_NONE; + ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op); + if (special_op == SH_NONE) + continue; + /* Ensure we never have two kinds of special handling + for the same insn. */ + if (*special != SH_NONE && *special != special_op) + return 0; + *special = special_op; + } + + return ok; +} + +/* Return 1 iff INSN is an operand that will not be affected by + having vector doublewords swapped in memory (in which case + *SPECIAL is unchanged), or that can be modified to be correct + if vector doublewords are swapped in memory (in which case + *SPECIAL is changed to a value indicating how). */ +static unsigned int +insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, + unsigned int *special) +{ + /* Calls are always bad. */ + if (GET_CODE (insn) == CALL_INSN) + return 0; + + /* Loads and stores seen here are not permuting, but we can still + fix them up by converting them to permuting ones. Exceptions: + UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL + body instead of a SET; and UNSPEC_STVE, which has an UNSPEC + for the SET source. Also we must now make an exception for lvx + and stvx when they are not in the UNSPEC_LVX/STVX form (with the + explicit "& -16") since this leads to unrecognizable insns. */ + rtx body = PATTERN (insn); + int i = INSN_UID (insn); + + if (insn_entry[i].is_load) + { + if (GET_CODE (body) == SET) + { + rtx rhs = SET_SRC (body); + /* Even without a swap, the RHS might be a vec_select for, say, + a byte-reversing load. */ + if (GET_CODE (rhs) != MEM) + return 0; + if (GET_CODE (XEXP (rhs, 0)) == AND) + return 0; + + *special = SH_NOSWAP_LD; + return 1; + } + else + return 0; + } + + if (insn_entry[i].is_store) + { + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) != UNSPEC) + { + rtx lhs = SET_DEST (body); + /* Even without a swap, the LHS might be a vec_select for, say, + a byte-reversing store. */ + if (GET_CODE (lhs) != MEM) + return 0; + if (GET_CODE (XEXP (lhs, 0)) == AND) + return 0; + + *special = SH_NOSWAP_ST; + return 1; + } + else + return 0; + } + + /* A convert to single precision can be left as is provided that + all of its uses are in xxspltw instructions that splat BE element + zero. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == UNSPEC + && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN) + { + df_ref def; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + + FOR_EACH_INSN_INFO_DEF (def, insn_info) + { + struct df_link *link = DF_REF_CHAIN (def); + if (!link) + return 0; + + for (; link; link = link->next) { + rtx use_insn = DF_REF_INSN (link->ref); + rtx use_body = PATTERN (use_insn); + if (GET_CODE (use_body) != SET + || GET_CODE (SET_SRC (use_body)) != UNSPEC + || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW + || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx) + return 0; + } + } + + return 1; + } + + /* A concatenation of two doublewords is ok if we reverse the + order of the inputs. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == VEC_CONCAT + && (GET_MODE (SET_SRC (body)) == V2DFmode + || GET_MODE (SET_SRC (body)) == V2DImode)) + { + *special = SH_CONCAT; + return 1; + } + + /* V2DF reductions are always swappable. */ + if (GET_CODE (body) == PARALLEL) + { + rtx expr = XVECEXP (body, 0, 0); + if (GET_CODE (expr) == SET + && v2df_reduction_p (SET_SRC (expr))) + return 1; + } + + /* An UNSPEC_VPERM is ok if the mask operand is loaded from the + constant pool. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == UNSPEC + && XINT (SET_SRC (body), 1) == UNSPEC_VPERM + && XVECLEN (SET_SRC (body), 0) == 3 + && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG) + { + rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), mask_reg)) + { + struct df_link *def_link = DF_REF_CHAIN (use); + /* Punt if multiple definitions for this reg. */ + if (def_link && !def_link->next && + const_load_sequence_p (insn_entry, + DF_REF_INSN (def_link->ref))) + { + *special = SH_VPERM; + return 1; + } + } + } + + /* Otherwise check the operands for vector lane violations. */ + return rtx_is_swappable_p (body, special); +} + +enum chain_purpose { FOR_LOADS, FOR_STORES }; + +/* Return true if the UD or DU chain headed by LINK is non-empty, + and every entry on the chain references an insn that is a + register swap. Furthermore, if PURPOSE is FOR_LOADS, each such + register swap must have only permuting loads as reaching defs. + If PURPOSE is FOR_STORES, each such register swap must have only + register swaps or permuting stores as reached uses. */ +static bool +chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link, + enum chain_purpose purpose) +{ + if (!link) + return false; + + for (; link; link = link->next) + { + if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref)))) + continue; + + if (DF_REF_IS_ARTIFICIAL (link->ref)) + return false; + + rtx reached_insn = DF_REF_INSN (link->ref); + unsigned uid = INSN_UID (reached_insn); + struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn); + + if (!insn_entry[uid].is_swap || insn_entry[uid].is_load + || insn_entry[uid].is_store) + return false; + + if (purpose == FOR_LOADS) + { + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *swap_link = DF_REF_CHAIN (use); + + while (swap_link) + { + if (DF_REF_IS_ARTIFICIAL (link->ref)) + return false; + + rtx swap_def_insn = DF_REF_INSN (swap_link->ref); + unsigned uid2 = INSN_UID (swap_def_insn); + + /* Only permuting loads are allowed. */ + if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load) + return false; + + swap_link = swap_link->next; + } + } + } + else if (purpose == FOR_STORES) + { + df_ref def; + FOR_EACH_INSN_INFO_DEF (def, insn_info) + { + struct df_link *swap_link = DF_REF_CHAIN (def); + + while (swap_link) + { + if (DF_REF_IS_ARTIFICIAL (link->ref)) + return false; + + rtx swap_use_insn = DF_REF_INSN (swap_link->ref); + unsigned uid2 = INSN_UID (swap_use_insn); + + /* Permuting stores or register swaps are allowed. */ + if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load) + return false; + + swap_link = swap_link->next; + } + } + } + } + + return true; +} + +/* Mark the xxswapdi instructions associated with permuting loads and + stores for removal. Note that we only flag them for deletion here, + as there is a possibility of a swap being reached from multiple + loads, etc. */ +static void +mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i) +{ + rtx insn = insn_entry[i].insn; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + + if (insn_entry[i].is_load) + { + df_ref def; + FOR_EACH_INSN_INFO_DEF (def, insn_info) + { + struct df_link *link = DF_REF_CHAIN (def); + + /* We know by now that these are swaps, so we can delete + them confidently. */ + while (link) + { + rtx use_insn = DF_REF_INSN (link->ref); + insn_entry[INSN_UID (use_insn)].will_delete = 1; + link = link->next; + } + } + } + else if (insn_entry[i].is_store) + { + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + /* Ignore uses for addressability. */ + machine_mode mode = GET_MODE (DF_REF_REG (use)); + if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode)) + continue; + + struct df_link *link = DF_REF_CHAIN (use); + + /* We know by now that these are swaps, so we can delete + them confidently. */ + while (link) + { + rtx def_insn = DF_REF_INSN (link->ref); + insn_entry[INSN_UID (def_insn)].will_delete = 1; + link = link->next; + } + } + } +} + +/* OP is either a CONST_VECTOR or an expression containing one. + Swap the first half of the vector with the second in the first + case. Recurse to find it in the second. */ +static void +swap_const_vector_halves (rtx op) +{ + int i; + enum rtx_code code = GET_CODE (op); + if (GET_CODE (op) == CONST_VECTOR) + { + int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2; + for (i = 0; i < half_units; ++i) + { + rtx temp = CONST_VECTOR_ELT (op, i); + CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units); + CONST_VECTOR_ELT (op, i + half_units) = temp; + } + } + else + { + int j; + const char *fmt = GET_RTX_FORMAT (code); + for (i = 0; i < GET_RTX_LENGTH (code); ++i) + if (fmt[i] == 'e' || fmt[i] == 'u') + swap_const_vector_halves (XEXP (op, i)); + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (op, i); ++j) + swap_const_vector_halves (XVECEXP (op, i, j)); + } +} + +/* Find all subregs of a vector expression that perform a narrowing, + and adjust the subreg index to account for doubleword swapping. */ +static void +adjust_subreg_index (rtx op) +{ + enum rtx_code code = GET_CODE (op); + if (code == SUBREG + && (GET_MODE_SIZE (GET_MODE (op)) + < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))) + { + unsigned int index = SUBREG_BYTE (op); + if (index < 8) + index += 8; + else + index -= 8; + SUBREG_BYTE (op) = index; + } + + const char *fmt = GET_RTX_FORMAT (code); + int i,j; + for (i = 0; i < GET_RTX_LENGTH (code); ++i) + if (fmt[i] == 'e' || fmt[i] == 'u') + adjust_subreg_index (XEXP (op, i)); + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (op, i); ++j) + adjust_subreg_index (XVECEXP (op, i, j)); +} + +/* Convert the non-permuting load INSN to a permuting one. */ +static void +permute_load (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + rtx mem_op = SET_SRC (body); + rtx tgt_reg = SET_DEST (body); + machine_mode mode = GET_MODE (tgt_reg); + int n_elts = GET_MODE_NUNITS (mode); + int half_elts = n_elts / 2; + rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); + int i, j; + for (i = 0, j = half_elts; i < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + for (i = half_elts, j = 0; j < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par); + SET_SRC (body) = sel; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Replacing load %d with permuted load\n", + INSN_UID (insn)); +} + +/* Convert the non-permuting store INSN to a permuting one. */ +static void +permute_store (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + rtx src_reg = SET_SRC (body); + machine_mode mode = GET_MODE (src_reg); + int n_elts = GET_MODE_NUNITS (mode); + int half_elts = n_elts / 2; + rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); + int i, j; + for (i = 0, j = half_elts; i < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + for (i = half_elts, j = 0; j < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par); + SET_SRC (body) = sel; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Replacing store %d with permuted store\n", + INSN_UID (insn)); +} + +/* Given OP that contains a vector extract operation, adjust the index + of the extracted lane to account for the doubleword swap. */ +static void +adjust_extract (rtx_insn *insn) +{ + rtx pattern = PATTERN (insn); + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + rtx src = SET_SRC (pattern); + /* The vec_select may be wrapped in a vec_duplicate for a splat, so + account for that. */ + rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src; + rtx par = XEXP (sel, 1); + int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1; + int lane = INTVAL (XVECEXP (par, 0, 0)); + lane = lane >= half_elts ? lane - half_elts : lane + half_elts; + XVECEXP (par, 0, 0) = GEN_INT (lane); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); +} + +/* Given OP that contains a vector direct-splat operation, adjust the index + of the source lane to account for the doubleword swap. */ +static void +adjust_splat (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + rtx unspec = XEXP (body, 1); + int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1; + int lane = INTVAL (XVECEXP (unspec, 0, 1)); + lane = lane >= half_elts ? lane - half_elts : lane + half_elts; + XVECEXP (unspec, 0, 1) = GEN_INT (lane); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); +} + +/* Given OP that contains an XXPERMDI operation (that is not a doubleword + swap), reverse the order of the source operands and adjust the indices + of the source lanes to account for doubleword reversal. */ +static void +adjust_xxpermdi (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx select = XEXP (set, 1); + rtx concat = XEXP (select, 0); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + rtx parallel = XEXP (select, 1); + int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); + int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); + int new_lane0 = 3 - lane1; + int new_lane1 = 3 - lane0; + XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); + XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); +} + +/* Given OP that contains a VEC_CONCAT operation of two doublewords, + reverse the order of those inputs. */ +static void +adjust_concat (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx concat = XEXP (set, 1); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); +} + +/* Given an UNSPEC_VPERM insn, modify the mask loaded from the + constant pool to reflect swapped doublewords. */ +static void +adjust_vperm (rtx_insn *insn) +{ + /* We previously determined that the UNSPEC_VPERM was fed by a + swap of a swapping load of a TOC-relative constant pool symbol. + Find the MEM in the swapping load and replace it with a MEM for + the adjusted mask constant. */ + rtx set = PATTERN (insn); + rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); + + /* Find the swap. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + rtx_insn *swap_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), mask_reg)) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + swap_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (swap_insn); + + /* Find the load. */ + insn_info = DF_INSN_INFO_GET (swap_insn); + rtx_insn *load_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + load_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (load_insn); + + /* Find the TOC-relative symbol access. */ + insn_info = DF_INSN_INFO_GET (load_insn); + rtx_insn *tocrel_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + tocrel_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (tocrel_insn); + + /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p + to set tocrel_base; otherwise it would be unnecessary as we've + already established it will return true. */ + rtx base, offset; + rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); + /* There is an extra level of indirection for small/large code models. */ + if (GET_CODE (tocrel_expr) == MEM) + tocrel_expr = XEXP (tocrel_expr, 0); + if (!toc_relative_expr_p (tocrel_expr, false)) + gcc_unreachable (); + split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); + rtx const_vector = get_pool_constant (base); + /* With the extra indirection, get_pool_constant will produce the + real constant from the reg_equal expression, so get the real + constant. */ + if (GET_CODE (const_vector) == SYMBOL_REF) + const_vector = get_pool_constant (const_vector); + gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); + + /* Create an adjusted mask from the initial mask. */ + unsigned int new_mask[16], i, val; + for (i = 0; i < 16; ++i) { + val = INTVAL (XVECEXP (const_vector, 0, i)); + if (val < 16) + new_mask[i] = (val + 8) % 16; + else + new_mask[i] = ((val + 8) % 16) + 16; + } + + /* Create a new CONST_VECTOR and a MEM that references it. */ + rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); + for (i = 0; i < 16; ++i) + XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); + rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); + rtx new_mem = force_const_mem (V16QImode, new_const_vector); + /* This gives us a MEM whose base operand is a SYMBOL_REF, which we + can't recognize. Force the SYMBOL_REF into a register. */ + if (!REG_P (XEXP (new_mem, 0))) { + rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); + XEXP (new_mem, 0) = base_reg; + /* Move the newly created insn ahead of the load insn. */ + rtx_insn *force_insn = get_last_insn (); + remove_insn (force_insn); + rtx_insn *before_load_insn = PREV_INSN (load_insn); + add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); + df_insn_rescan (before_load_insn); + df_insn_rescan (force_insn); + } + + /* Replace the MEM in the load instruction and rescan it. */ + XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; + INSN_CODE (load_insn) = -1; /* Force re-recognition. */ + df_insn_rescan (load_insn); + + if (dump_file) + fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); +} + +/* The insn described by INSN_ENTRY[I] can be swapped, but only + with special handling. Take care of that here. */ +static void +handle_special_swappables (swap_web_entry *insn_entry, unsigned i) +{ + rtx_insn *insn = insn_entry[i].insn; + rtx body = PATTERN (insn); + + switch (insn_entry[i].special_handling) + { + default: + gcc_unreachable (); + case SH_CONST_VECTOR: + { + /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ + gcc_assert (GET_CODE (body) == SET); + rtx rhs = SET_SRC (body); + swap_const_vector_halves (rhs); + if (dump_file) + fprintf (dump_file, "Swapping constant halves in insn %d\n", i); + break; + } + case SH_SUBREG: + /* A subreg of the same size is already safe. For subregs that + select a smaller portion of a reg, adjust the index for + swapped doublewords. */ + adjust_subreg_index (body); + if (dump_file) + fprintf (dump_file, "Adjusting subreg in insn %d\n", i); + break; + case SH_NOSWAP_LD: + /* Convert a non-permuting load to a permuting one. */ + permute_load (insn); + break; + case SH_NOSWAP_ST: + /* Convert a non-permuting store to a permuting one. */ + permute_store (insn); + break; + case SH_EXTRACT: + /* Change the lane on an extract operation. */ + adjust_extract (insn); + break; + case SH_SPLAT: + /* Change the lane on a direct-splat operation. */ + adjust_splat (insn); + break; + case SH_XXPERMDI: + /* Change the lanes on an XXPERMDI operation. */ + adjust_xxpermdi (insn); + break; + case SH_CONCAT: + /* Reverse the order of a concatenation operation. */ + adjust_concat (insn); + break; + case SH_VPERM: + /* Change the mask loaded from the constant pool for a VPERM. */ + adjust_vperm (insn); + break; + } +} + +/* Find the insn from the Ith table entry, which is known to be a + register swap Y = SWAP(X). Replace it with a copy Y = X. */ +static void +replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) +{ + rtx_insn *insn = insn_entry[i].insn; + rtx body = PATTERN (insn); + rtx src_reg = XEXP (SET_SRC (body), 0); + rtx copy = gen_rtx_SET (SET_DEST (body), src_reg); + rtx_insn *new_insn = emit_insn_before (copy, insn); + set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); + df_insn_rescan (new_insn); + + if (dump_file) + { + unsigned int new_uid = INSN_UID (new_insn); + fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); + } + + df_insn_delete (insn); + remove_insn (insn); + insn->set_deleted (); +} + +/* Dump the swap table to DUMP_FILE. */ +static void +dump_swap_insn_table (swap_web_entry *insn_entry) +{ + int e = get_max_uid (); + fprintf (dump_file, "\nRelevant insns with their flag settings\n\n"); + + for (int i = 0; i < e; ++i) + if (insn_entry[i].is_relevant) + { + swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred (); + fprintf (dump_file, "%6d %6d ", i, + pred_entry && pred_entry->insn + ? INSN_UID (pred_entry->insn) : 0); + if (insn_entry[i].is_load) + fputs ("load ", dump_file); + if (insn_entry[i].is_store) + fputs ("store ", dump_file); + if (insn_entry[i].is_swap) + fputs ("swap ", dump_file); + if (insn_entry[i].is_live_in) + fputs ("live-in ", dump_file); + if (insn_entry[i].is_live_out) + fputs ("live-out ", dump_file); + if (insn_entry[i].contains_subreg) + fputs ("subreg ", dump_file); + if (insn_entry[i].is_128_int) + fputs ("int128 ", dump_file); + if (insn_entry[i].is_call) + fputs ("call ", dump_file); + if (insn_entry[i].is_swappable) + { + fputs ("swappable ", dump_file); + if (insn_entry[i].special_handling == SH_CONST_VECTOR) + fputs ("special:constvec ", dump_file); + else if (insn_entry[i].special_handling == SH_SUBREG) + fputs ("special:subreg ", dump_file); + else if (insn_entry[i].special_handling == SH_NOSWAP_LD) + fputs ("special:load ", dump_file); + else if (insn_entry[i].special_handling == SH_NOSWAP_ST) + fputs ("special:store ", dump_file); + else if (insn_entry[i].special_handling == SH_EXTRACT) + fputs ("special:extract ", dump_file); + else if (insn_entry[i].special_handling == SH_SPLAT) + fputs ("special:splat ", dump_file); + else if (insn_entry[i].special_handling == SH_XXPERMDI) + fputs ("special:xxpermdi ", dump_file); + else if (insn_entry[i].special_handling == SH_CONCAT) + fputs ("special:concat ", dump_file); + else if (insn_entry[i].special_handling == SH_VPERM) + fputs ("special:vperm ", dump_file); + } + if (insn_entry[i].web_not_optimizable) + fputs ("unoptimizable ", dump_file); + if (insn_entry[i].will_delete) + fputs ("delete ", dump_file); + fputs ("\n", dump_file); + } + fputs ("\n", dump_file); +} + +/* Return RTX with its address canonicalized to (reg) or (+ reg reg). + Here RTX is an (& addr (const_int -16)). Always return a new copy + to avoid problems with combine. */ +static rtx +alignment_with_canonical_addr (rtx align) +{ + rtx canon; + rtx addr = XEXP (align, 0); + + if (REG_P (addr)) + canon = addr; + + else if (GET_CODE (addr) == PLUS) + { + rtx addrop0 = XEXP (addr, 0); + rtx addrop1 = XEXP (addr, 1); + + if (!REG_P (addrop0)) + addrop0 = force_reg (GET_MODE (addrop0), addrop0); + + if (!REG_P (addrop1)) + addrop1 = force_reg (GET_MODE (addrop1), addrop1); + + canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1); + } + + else + canon = force_reg (GET_MODE (addr), addr); + + return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16)); +} + +/* Check whether an rtx is an alignment mask, and if so, return + a fully-expanded rtx for the masking operation. */ +static rtx +alignment_mask (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + + if (GET_CODE (body) != SET + || GET_CODE (SET_SRC (body)) != AND + || !REG_P (XEXP (SET_SRC (body), 0))) + return 0; + + rtx mask = XEXP (SET_SRC (body), 1); + + if (GET_CODE (mask) == CONST_INT) + { + if (INTVAL (mask) == -16) + return alignment_with_canonical_addr (SET_SRC (body)); + else + return 0; + } + + if (!REG_P (mask)) + return 0; + + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + rtx real_mask = 0; + + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (use), mask)) + continue; + + struct df_link *def_link = DF_REF_CHAIN (use); + if (!def_link || def_link->next) + return 0; + + rtx_insn *const_insn = DF_REF_INSN (def_link->ref); + rtx const_body = PATTERN (const_insn); + if (GET_CODE (const_body) != SET) + return 0; + + real_mask = SET_SRC (const_body); + + if (GET_CODE (real_mask) != CONST_INT + || INTVAL (real_mask) != -16) + return 0; + } + + if (real_mask == 0) + return 0; + + return alignment_with_canonical_addr (SET_SRC (body)); +} + +/* Given INSN that's a load or store based at BASE_REG, look for a + feeding computation that aligns its address on a 16-byte boundary. */ +static rtx +find_alignment_op (rtx_insn *insn, rtx base_reg) +{ + df_ref base_use; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + rtx and_operation = 0; + + FOR_EACH_INSN_INFO_USE (base_use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) + continue; + + struct df_link *base_def_link = DF_REF_CHAIN (base_use); + if (!base_def_link || base_def_link->next) + break; + + /* With stack-protector code enabled, and possibly in other + circumstances, there may not be an associated insn for + the def. */ + if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) + break; + + rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); + and_operation = alignment_mask (and_insn); + if (and_operation != 0) + break; + } + + return and_operation; +} + +struct del_info { bool replace; rtx_insn *replace_insn; }; + +/* If INSN is the load for an lvx pattern, put it in canonical form. */ +static void +recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) +{ + rtx body = PATTERN (insn); + gcc_assert (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == VEC_SELECT + && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM); + + rtx mem = XEXP (SET_SRC (body), 0); + rtx base_reg = XEXP (mem, 0); + + rtx and_operation = find_alignment_op (insn, base_reg); + + if (and_operation != 0) + { + df_ref def; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + FOR_EACH_INSN_INFO_DEF (def, insn_info) + { + struct df_link *link = DF_REF_CHAIN (def); + if (!link || link->next) + break; + + rtx_insn *swap_insn = DF_REF_INSN (link->ref); + if (!insn_is_swap_p (swap_insn) + || insn_is_load_p (swap_insn) + || insn_is_store_p (swap_insn)) + break; + + /* Expected lvx pattern found. Change the swap to + a copy, and propagate the AND operation into the + load. */ + to_delete[INSN_UID (swap_insn)].replace = true; + to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; + + XEXP (mem, 0) = and_operation; + SET_SRC (body) = mem; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "lvx opportunity found at %d\n", + INSN_UID (insn)); + } + } +} + +/* If INSN is the store for an stvx pattern, put it in canonical form. */ +static void +recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) +{ + rtx body = PATTERN (insn); + gcc_assert (GET_CODE (body) == SET + && GET_CODE (SET_DEST (body)) == MEM + && GET_CODE (SET_SRC (body)) == VEC_SELECT); + rtx mem = SET_DEST (body); + rtx base_reg = XEXP (mem, 0); + + rtx and_operation = find_alignment_op (insn, base_reg); + + if (and_operation != 0) + { + rtx src_reg = XEXP (SET_SRC (body), 0); + df_ref src_use; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + FOR_EACH_INSN_INFO_USE (src_use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (src_use), src_reg)) + continue; + + struct df_link *link = DF_REF_CHAIN (src_use); + if (!link || link->next) + break; + + rtx_insn *swap_insn = DF_REF_INSN (link->ref); + if (!insn_is_swap_p (swap_insn) + || insn_is_load_p (swap_insn) + || insn_is_store_p (swap_insn)) + break; + + /* Expected stvx pattern found. Change the swap to + a copy, and propagate the AND operation into the + store. */ + to_delete[INSN_UID (swap_insn)].replace = true; + to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; + + XEXP (mem, 0) = and_operation; + SET_SRC (body) = src_reg; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "stvx opportunity found at %d\n", + INSN_UID (insn)); + } + } +} + +/* Look for patterns created from builtin lvx and stvx calls, and + canonicalize them to be properly recognized as such. */ +static void +recombine_lvx_stvx_patterns (function *fun) +{ + int i; + basic_block bb; + rtx_insn *insn; + + int num_insns = get_max_uid (); + del_info *to_delete = XCNEWVEC (del_info, num_insns); + + FOR_ALL_BB_FN (bb, fun) + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + if (insn_is_load_p (insn) && insn_is_swap_p (insn)) + recombine_lvx_pattern (insn, to_delete); + else if (insn_is_store_p (insn) && insn_is_swap_p (insn)) + recombine_stvx_pattern (insn, to_delete); + } + + /* Turning swaps into copies is delayed until now, to avoid problems + with deleting instructions during the insn walk. */ + for (i = 0; i < num_insns; i++) + if (to_delete[i].replace) + { + rtx swap_body = PATTERN (to_delete[i].replace_insn); + rtx src_reg = XEXP (SET_SRC (swap_body), 0); + rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg); + rtx_insn *new_insn = emit_insn_before (copy, + to_delete[i].replace_insn); + set_block_for_insn (new_insn, + BLOCK_FOR_INSN (to_delete[i].replace_insn)); + df_insn_rescan (new_insn); + df_insn_delete (to_delete[i].replace_insn); + remove_insn (to_delete[i].replace_insn); + to_delete[i].replace_insn->set_deleted (); + } + + free (to_delete); +} + +/* Main entry point for this pass. */ +unsigned int +rs6000_analyze_swaps (function *fun) +{ + swap_web_entry *insn_entry; + basic_block bb; + rtx_insn *insn, *curr_insn = 0; + + /* Dataflow analysis for use-def chains. */ + df_set_flags (DF_RD_PRUNE_DEAD_DEFS); + df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); + df_analyze (); + df_set_flags (DF_DEFER_INSN_RESCAN); + + /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */ + recombine_lvx_stvx_patterns (fun); + + /* Allocate structure to represent webs of insns. */ + insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); + + /* Walk the insns to gather basic data. */ + FOR_ALL_BB_FN (bb, fun) + FOR_BB_INSNS_SAFE (bb, insn, curr_insn) + { + unsigned int uid = INSN_UID (insn); + if (NONDEBUG_INSN_P (insn)) + { + insn_entry[uid].insn = insn; + + if (GET_CODE (insn) == CALL_INSN) + insn_entry[uid].is_call = 1; + + /* Walk the uses and defs to see if we mention vector regs. + Record any constraints on optimization of such mentions. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref mention; + FOR_EACH_INSN_INFO_USE (mention, insn_info) + { + /* We use DF_REF_REAL_REG here to get inside any subregs. */ + machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); + + /* If a use gets its value from a call insn, it will be + a hard register and will look like (reg:V4SI 3 3). + The df analysis creates two mentions for GPR3 and GPR4, + both DImode. We must recognize this and treat it as a + vector mention to ensure the call is unioned with this + use. */ + if (mode == DImode && DF_REF_INSN_INFO (mention)) + { + rtx feeder = DF_REF_INSN (mention); + /* FIXME: It is pretty hard to get from the df mention + to the mode of the use in the insn. We arbitrarily + pick a vector mode here, even though the use might + be a real DImode. We can be too conservative + (create a web larger than necessary) because of + this, so consider eventually fixing this. */ + if (GET_CODE (feeder) == CALL_INSN) + mode = V4SImode; + } + + if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) + { + insn_entry[uid].is_relevant = 1; + if (mode == TImode || mode == V1TImode + || FLOAT128_VECTOR_P (mode)) + insn_entry[uid].is_128_int = 1; + if (DF_REF_INSN_INFO (mention)) + insn_entry[uid].contains_subreg + = !rtx_equal_p (DF_REF_REG (mention), + DF_REF_REAL_REG (mention)); + union_defs (insn_entry, insn, mention); + } + } + FOR_EACH_INSN_INFO_DEF (mention, insn_info) + { + /* We use DF_REF_REAL_REG here to get inside any subregs. */ + machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); + + /* If we're loading up a hard vector register for a call, + it looks like (set (reg:V4SI 9 9) (...)). The df + analysis creates two mentions for GPR9 and GPR10, both + DImode. So relying on the mode from the mentions + isn't sufficient to ensure we union the call into the + web with the parameter setup code. */ + if (mode == DImode && GET_CODE (insn) == SET + && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn)))) + mode = GET_MODE (SET_DEST (insn)); + + if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) + { + insn_entry[uid].is_relevant = 1; + if (mode == TImode || mode == V1TImode + || FLOAT128_VECTOR_P (mode)) + insn_entry[uid].is_128_int = 1; + if (DF_REF_INSN_INFO (mention)) + insn_entry[uid].contains_subreg + = !rtx_equal_p (DF_REF_REG (mention), + DF_REF_REAL_REG (mention)); + /* REG_FUNCTION_VALUE_P is not valid for subregs. */ + else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention))) + insn_entry[uid].is_live_out = 1; + union_uses (insn_entry, insn, mention); + } + } + + if (insn_entry[uid].is_relevant) + { + /* Determine if this is a load or store. */ + insn_entry[uid].is_load = insn_is_load_p (insn); + insn_entry[uid].is_store = insn_is_store_p (insn); + + /* Determine if this is a doubleword swap. If not, + determine whether it can legally be swapped. */ + if (insn_is_swap_p (insn)) + insn_entry[uid].is_swap = 1; + else + { + unsigned int special = SH_NONE; + insn_entry[uid].is_swappable + = insn_is_swappable_p (insn_entry, insn, &special); + if (special != SH_NONE && insn_entry[uid].contains_subreg) + insn_entry[uid].is_swappable = 0; + else if (special != SH_NONE) + insn_entry[uid].special_handling = special; + else if (insn_entry[uid].contains_subreg) + insn_entry[uid].special_handling = SH_SUBREG; + } + } + } + } + + if (dump_file) + { + fprintf (dump_file, "\nSwap insn entry table when first built\n"); + dump_swap_insn_table (insn_entry); + } + + /* Record unoptimizable webs. */ + unsigned e = get_max_uid (), i; + for (i = 0; i < e; ++i) + { + if (!insn_entry[i].is_relevant) + continue; + + swap_web_entry *root + = (swap_web_entry*)(&insn_entry[i])->unionfind_root (); + + if (insn_entry[i].is_live_in || insn_entry[i].is_live_out + || (insn_entry[i].contains_subreg + && insn_entry[i].special_handling != SH_SUBREG) + || insn_entry[i].is_128_int || insn_entry[i].is_call + || !(insn_entry[i].is_swappable || insn_entry[i].is_swap)) + root->web_not_optimizable = 1; + + /* If we have loads or stores that aren't permuting then the + optimization isn't appropriate. */ + else if ((insn_entry[i].is_load || insn_entry[i].is_store) + && !insn_entry[i].is_swap && !insn_entry[i].is_swappable) + root->web_not_optimizable = 1; + + /* If we have permuting loads or stores that are not accompanied + by a register swap, the optimization isn't appropriate. */ + else if (insn_entry[i].is_load && insn_entry[i].is_swap) + { + rtx insn = insn_entry[i].insn; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref def; + + FOR_EACH_INSN_INFO_DEF (def, insn_info) + { + struct df_link *link = DF_REF_CHAIN (def); + + if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS)) + { + root->web_not_optimizable = 1; + break; + } + } + } + else if (insn_entry[i].is_store && insn_entry[i].is_swap) + { + rtx insn = insn_entry[i].insn; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *link = DF_REF_CHAIN (use); + + if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES)) + { + root->web_not_optimizable = 1; + break; + } + } + } + } + + if (dump_file) + { + fprintf (dump_file, "\nSwap insn entry table after web analysis\n"); + dump_swap_insn_table (insn_entry); + } + + /* For each load and store in an optimizable web (which implies + the loads and stores are permuting), find the associated + register swaps and mark them for removal. Due to various + optimizations we may mark the same swap more than once. Also + perform special handling for swappable insns that require it. */ + for (i = 0; i < e; ++i) + if ((insn_entry[i].is_load || insn_entry[i].is_store) + && insn_entry[i].is_swap) + { + swap_web_entry* root_entry + = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); + if (!root_entry->web_not_optimizable) + mark_swaps_for_removal (insn_entry, i); + } + else if (insn_entry[i].is_swappable && insn_entry[i].special_handling) + { + swap_web_entry* root_entry + = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); + if (!root_entry->web_not_optimizable) + handle_special_swappables (insn_entry, i); + } + + /* Now delete the swaps marked for removal. */ + for (i = 0; i < e; ++i) + if (insn_entry[i].will_delete) + replace_swap_with_copy (insn_entry, i); + + /* Clean up. */ + free (insn_entry); + return 0; +} + +const pass_data pass_data_analyze_swaps = +{ + RTL_PASS, /* type */ + "swaps", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_analyze_swaps : public rtl_opt_pass +{ +public: + pass_analyze_swaps(gcc::context *ctxt) + : rtl_opt_pass(pass_data_analyze_swaps, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX + && !TARGET_P9_VECTOR && rs6000_optimize_swaps); + } + + virtual unsigned int execute (function *fun) + { + return rs6000_analyze_swaps (fun); + } + + opt_pass *clone () + { + return new pass_analyze_swaps (m_ctxt); + } + +}; // class pass_analyze_swaps + +rtl_opt_pass * +make_pass_analyze_swaps (gcc::context *ctxt) +{ + return new pass_analyze_swaps (ctxt); +} + +#ifdef RS6000_GLIBC_ATOMIC_FENV +/* Function declarations for rs6000_atomic_assign_expand_fenv. */ +static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl; +#endif + +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ + +static void +rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + if (!TARGET_HARD_FLOAT || !TARGET_FPRS) + { +#ifdef RS6000_GLIBC_ATOMIC_FENV + if (atomic_hold_decl == NULL_TREE) + { + atomic_hold_decl + = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier ("__atomic_feholdexcept"), + build_function_type_list (void_type_node, + double_ptr_type_node, + NULL_TREE)); + TREE_PUBLIC (atomic_hold_decl) = 1; + DECL_EXTERNAL (atomic_hold_decl) = 1; + } + + if (atomic_clear_decl == NULL_TREE) + { + atomic_clear_decl + = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier ("__atomic_feclearexcept"), + build_function_type_list (void_type_node, + NULL_TREE)); + TREE_PUBLIC (atomic_clear_decl) = 1; + DECL_EXTERNAL (atomic_clear_decl) = 1; + } + + tree const_double = build_qualified_type (double_type_node, + TYPE_QUAL_CONST); + tree const_double_ptr = build_pointer_type (const_double); + if (atomic_update_decl == NULL_TREE) + { + atomic_update_decl + = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier ("__atomic_feupdateenv"), + build_function_type_list (void_type_node, + const_double_ptr, + NULL_TREE)); + TREE_PUBLIC (atomic_update_decl) = 1; + DECL_EXTERNAL (atomic_update_decl) = 1; + } + + tree fenv_var = create_tmp_var_raw (double_type_node); + TREE_ADDRESSABLE (fenv_var) = 1; + tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var); + + *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr); + *clear = build_call_expr (atomic_clear_decl, 0); + *update = build_call_expr (atomic_update_decl, 1, + fold_convert (const_double_ptr, fenv_addr)); +#endif + return; + } + + tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS]; + tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]; + tree call_mffs = build_call_expr (mffs, 0); + + /* Generates the equivalent of feholdexcept (&fenv_var) + + *fenv_var = __builtin_mffs (); + double fenv_hold; + *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL; + __builtin_mtfsf (0xff, fenv_hold); */ + + /* Mask to clear everything except for the rounding modes and non-IEEE + arithmetic flag. */ + const unsigned HOST_WIDE_INT hold_exception_mask = + HOST_WIDE_INT_C (0xffffffff00000007); + + tree fenv_var = create_tmp_var_raw (double_type_node); + + tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs); + + tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); + tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, + build_int_cst (uint64_type_node, + hold_exception_mask)); + + tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, + fenv_llu_and); + + tree hold_mtfsf = build_call_expr (mtfsf, 2, + build_int_cst (unsigned_type_node, 0xff), + fenv_hold_mtfsf); + + *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); + + /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): + + double fenv_clear = __builtin_mffs (); + *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; + __builtin_mtfsf (0xff, fenv_clear); */ + + /* Mask to clear everything except for the rounding modes and non-IEEE + arithmetic flag. */ + const unsigned HOST_WIDE_INT clear_exception_mask = + HOST_WIDE_INT_C (0xffffffff00000000); + + tree fenv_clear = create_tmp_var_raw (double_type_node); + + tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs); + + tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear); + tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, + fenv_clean_llu, + build_int_cst (uint64_type_node, + clear_exception_mask)); + + tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, + fenv_clear_llu_and); + + tree clear_mtfsf = build_call_expr (mtfsf, 2, + build_int_cst (unsigned_type_node, 0xff), + fenv_clear_mtfsf); + + *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf); + + /* Generates the equivalent of feupdateenv (&fenv_var) + + double old_fenv = __builtin_mffs (); + double fenv_update; + *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) | + (*(uint64_t*)fenv_var 0x1ff80fff); + __builtin_mtfsf (0xff, fenv_update); */ + + const unsigned HOST_WIDE_INT update_exception_mask = + HOST_WIDE_INT_C (0xffffffff1fffff00); + const unsigned HOST_WIDE_INT new_exception_mask = + HOST_WIDE_INT_C (0x1ff80fff); + + tree old_fenv = create_tmp_var_raw (double_type_node); + tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs); + + tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv); + tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu, + build_int_cst (uint64_type_node, + update_exception_mask)); + + tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, + build_int_cst (uint64_type_node, + new_exception_mask)); + + tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node, + old_llu_and, new_llu_and); + + tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, + new_llu_mask); + + tree update_mtfsf = build_call_expr (mtfsf, 2, + build_int_cst (unsigned_type_node, 0xff), + fenv_update_mtfsf); + + *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf); +} + +/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ + +static bool +rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode, + optimization_type opt_type) +{ + switch (op) + { + case rsqrt_optab: + return (opt_type == OPTIMIZE_FOR_SPEED + && RS6000_RECIP_AUTO_RSQRTE_P (mode1)); + + default: + return true; + } +} + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-powerpcspe.h" diff --git a/gcc/config/powerpcspe/powerpcspe.h b/gcc/config/powerpcspe/powerpcspe.h new file mode 100644 index 000000000000..e3e417ab73e3 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe.h @@ -0,0 +1,2928 @@ +/* Definitions of target machine for GNU compiler, for IBM RS/6000. + Copyright (C) 1992-2017 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Note that some other tm.h files include this one and then override + many of the definitions. */ + +#ifndef RS6000_OPTS_H +#include "config/powerpcspe/powerpcspe-opts.h" +#endif + +/* Definitions for the object file format. These are set at + compile-time. */ + +#define OBJECT_XCOFF 1 +#define OBJECT_ELF 2 +#define OBJECT_PEF 3 +#define OBJECT_MACHO 4 + +#define TARGET_ELF (TARGET_OBJECT_FORMAT == OBJECT_ELF) +#define TARGET_XCOFF (TARGET_OBJECT_FORMAT == OBJECT_XCOFF) +#define TARGET_MACOS (TARGET_OBJECT_FORMAT == OBJECT_PEF) +#define TARGET_MACHO (TARGET_OBJECT_FORMAT == OBJECT_MACHO) + +#ifndef TARGET_AIX +#define TARGET_AIX 0 +#endif + +#ifndef TARGET_AIX_OS +#define TARGET_AIX_OS 0 +#endif + +/* Control whether function entry points use a "dot" symbol when + ABI_AIX. */ +#define DOT_SYMBOLS 1 + +/* Default string to use for cpu if not specified. */ +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT ((char *)0) +#endif + +/* If configured for PPC405, support PPC405CR Erratum77. */ +#ifdef CONFIG_PPC405CR +#define PPC405_ERRATUM77 (rs6000_cpu == PROCESSOR_PPC405) +#else +#define PPC405_ERRATUM77 0 +#endif + +#ifndef TARGET_PAIRED_FLOAT +#define TARGET_PAIRED_FLOAT 0 +#endif + +#ifdef HAVE_AS_POPCNTB +#define ASM_CPU_POWER5_SPEC "-mpower5" +#else +#define ASM_CPU_POWER5_SPEC "-mpower4" +#endif + +#ifdef HAVE_AS_DFP +#define ASM_CPU_POWER6_SPEC "-mpower6 -maltivec" +#else +#define ASM_CPU_POWER6_SPEC "-mpower4 -maltivec" +#endif + +#ifdef HAVE_AS_POPCNTD +#define ASM_CPU_POWER7_SPEC "-mpower7" +#else +#define ASM_CPU_POWER7_SPEC "-mpower4 -maltivec" +#endif + +#ifdef HAVE_AS_POWER8 +#define ASM_CPU_POWER8_SPEC "-mpower8" +#else +#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC +#endif + +#ifdef HAVE_AS_POWER9 +#define ASM_CPU_POWER9_SPEC "-mpower9" +#else +#define ASM_CPU_POWER9_SPEC ASM_CPU_POWER8_SPEC +#endif + +#ifdef HAVE_AS_DCI +#define ASM_CPU_476_SPEC "-m476" +#else +#define ASM_CPU_476_SPEC "-mpower4" +#endif + +/* Common ASM definitions used by ASM_SPEC among the various targets for + handling -mcpu=xxx switches. There is a parallel list in driver-powerpcspe.c to + provide the default assembler options if the user uses -mcpu=native, so if + you make changes here, make them also there. */ +#define ASM_CPU_SPEC \ +"%{!mcpu*: \ + %{mpowerpc64*: -mppc64} \ + %{!mpowerpc64*: %(asm_default)}} \ +%{mcpu=native: %(asm_cpu_native)} \ +%{mcpu=cell: -mcell} \ +%{mcpu=power3: -mppc64} \ +%{mcpu=power4: -mpower4} \ +%{mcpu=power5: %(asm_cpu_power5)} \ +%{mcpu=power5+: %(asm_cpu_power5)} \ +%{mcpu=power6: %(asm_cpu_power6) -maltivec} \ +%{mcpu=power6x: %(asm_cpu_power6) -maltivec} \ +%{mcpu=power7: %(asm_cpu_power7)} \ +%{mcpu=power8: %(asm_cpu_power8)} \ +%{mcpu=power9: %(asm_cpu_power9)} \ +%{mcpu=a2: -ma2} \ +%{mcpu=powerpc: -mppc} \ +%{mcpu=powerpc64le: %(asm_cpu_power8)} \ +%{mcpu=rs64a: -mppc64} \ +%{mcpu=401: -mppc} \ +%{mcpu=403: -m403} \ +%{mcpu=405: -m405} \ +%{mcpu=405fp: -m405} \ +%{mcpu=440: -m440} \ +%{mcpu=440fp: -m440} \ +%{mcpu=464: -m440} \ +%{mcpu=464fp: -m440} \ +%{mcpu=476: %(asm_cpu_476)} \ +%{mcpu=476fp: %(asm_cpu_476)} \ +%{mcpu=505: -mppc} \ +%{mcpu=601: -m601} \ +%{mcpu=602: -mppc} \ +%{mcpu=603: -mppc} \ +%{mcpu=603e: -mppc} \ +%{mcpu=ec603e: -mppc} \ +%{mcpu=604: -mppc} \ +%{mcpu=604e: -mppc} \ +%{mcpu=620: -mppc64} \ +%{mcpu=630: -mppc64} \ +%{mcpu=740: -mppc} \ +%{mcpu=750: -mppc} \ +%{mcpu=G3: -mppc} \ +%{mcpu=7400: -mppc -maltivec} \ +%{mcpu=7450: -mppc -maltivec} \ +%{mcpu=G4: -mppc -maltivec} \ +%{mcpu=801: -mppc} \ +%{mcpu=821: -mppc} \ +%{mcpu=823: -mppc} \ +%{mcpu=860: -mppc} \ +%{mcpu=970: -mpower4 -maltivec} \ +%{mcpu=G5: -mpower4 -maltivec} \ +%{mcpu=8540: -me500} \ +%{mcpu=8548: -me500} \ +%{mcpu=e300c2: -me300} \ +%{mcpu=e300c3: -me300} \ +%{mcpu=e500mc: -me500mc} \ +%{mcpu=e500mc64: -me500mc64} \ +%{mcpu=e5500: -me5500} \ +%{mcpu=e6500: -me6500} \ +%{maltivec: -maltivec} \ +%{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \ +%{mpower8-vector|mcrypto|mdirect-move|mhtm: %{!mcpu*: %(asm_cpu_power8)}} \ +-many" + +#define CPP_DEFAULT_SPEC "" + +#define ASM_DEFAULT_SPEC "" + +/* This macro defines names of additional specifications to put in the specs + that can be used in various specifications like CC1_SPEC. Its definition + is an initializer with a subgrouping for each command option. + + Each subgrouping contains a string constant, that defines the + specification name, and a string constant that used by the GCC driver + program. + + Do not define this macro if it does not need to do anything. */ + +#define SUBTARGET_EXTRA_SPECS + +#define EXTRA_SPECS \ + { "cpp_default", CPP_DEFAULT_SPEC }, \ + { "asm_cpu", ASM_CPU_SPEC }, \ + { "asm_cpu_native", ASM_CPU_NATIVE_SPEC }, \ + { "asm_default", ASM_DEFAULT_SPEC }, \ + { "cc1_cpu", CC1_CPU_SPEC }, \ + { "asm_cpu_power5", ASM_CPU_POWER5_SPEC }, \ + { "asm_cpu_power6", ASM_CPU_POWER6_SPEC }, \ + { "asm_cpu_power7", ASM_CPU_POWER7_SPEC }, \ + { "asm_cpu_power8", ASM_CPU_POWER8_SPEC }, \ + { "asm_cpu_power9", ASM_CPU_POWER9_SPEC }, \ + { "asm_cpu_476", ASM_CPU_476_SPEC }, \ + SUBTARGET_EXTRA_SPECS + +/* -mcpu=native handling only makes sense with compiler running on + an PowerPC chip. If changing this condition, also change + the condition in driver-powerpcspe.c. */ +#if defined(__powerpc__) || defined(__POWERPC__) || defined(_AIX) +/* In driver-powerpcspe.c. */ +extern const char *host_detect_local_cpu (int argc, const char **argv); +#define EXTRA_SPEC_FUNCTIONS \ + { "local_cpu_detect", host_detect_local_cpu }, +#define HAVE_LOCAL_CPU_DETECT +#define ASM_CPU_NATIVE_SPEC "%:local_cpu_detect(asm)" + +#else +#define ASM_CPU_NATIVE_SPEC "%(asm_default)" +#endif + +#ifndef CC1_CPU_SPEC +#ifdef HAVE_LOCAL_CPU_DETECT +#define CC1_CPU_SPEC \ +"%{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)} \ + %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}" +#else +#define CC1_CPU_SPEC "" +#endif +#endif + +/* Architecture type. */ + +/* Define TARGET_MFCRF if the target assembler does not support the + optional field operand for mfcr. */ + +#ifndef HAVE_AS_MFCRF +#undef TARGET_MFCRF +#define TARGET_MFCRF 0 +#endif + +/* Define TARGET_POPCNTB if the target assembler does not support the + popcount byte instruction. */ + +#ifndef HAVE_AS_POPCNTB +#undef TARGET_POPCNTB +#define TARGET_POPCNTB 0 +#endif + +/* Define TARGET_FPRND if the target assembler does not support the + fp rounding instructions. */ + +#ifndef HAVE_AS_FPRND +#undef TARGET_FPRND +#define TARGET_FPRND 0 +#endif + +/* Define TARGET_CMPB if the target assembler does not support the + cmpb instruction. */ + +#ifndef HAVE_AS_CMPB +#undef TARGET_CMPB +#define TARGET_CMPB 0 +#endif + +/* Define TARGET_MFPGPR if the target assembler does not support the + mffpr and mftgpr instructions. */ + +#ifndef HAVE_AS_MFPGPR +#undef TARGET_MFPGPR +#define TARGET_MFPGPR 0 +#endif + +/* Define TARGET_DFP if the target assembler does not support decimal + floating point instructions. */ +#ifndef HAVE_AS_DFP +#undef TARGET_DFP +#define TARGET_DFP 0 +#endif + +/* Define TARGET_POPCNTD if the target assembler does not support the + popcount word and double word instructions. */ + +#ifndef HAVE_AS_POPCNTD +#undef TARGET_POPCNTD +#define TARGET_POPCNTD 0 +#endif + +/* Define the ISA 2.07 flags as 0 if the target assembler does not support the + waitasecond instruction. Allow -mpower8-fusion, since it does not add new + instructions. */ + +#ifndef HAVE_AS_POWER8 +#undef TARGET_DIRECT_MOVE +#undef TARGET_CRYPTO +#undef TARGET_HTM +#undef TARGET_P8_VECTOR +#define TARGET_DIRECT_MOVE 0 +#define TARGET_CRYPTO 0 +#define TARGET_HTM 0 +#define TARGET_P8_VECTOR 0 +#endif + +/* Define the ISA 3.0 flags as 0 if the target assembler does not support + Power9 instructions. Allow -mpower9-fusion, since it does not add new + instructions. Allow -misel, since it predates ISA 3.0 and does + not require any Power9 features. */ + +#ifndef HAVE_AS_POWER9 +#undef TARGET_FLOAT128_HW +#undef TARGET_MODULO +#undef TARGET_P9_VECTOR +#undef TARGET_P9_MINMAX +#undef TARGET_P9_DFORM_SCALAR +#undef TARGET_P9_DFORM_VECTOR +#undef TARGET_P9_MISC +#define TARGET_FLOAT128_HW 0 +#define TARGET_MODULO 0 +#define TARGET_P9_VECTOR 0 +#define TARGET_P9_MINMAX 0 +#define TARGET_P9_DFORM_SCALAR 0 +#define TARGET_P9_DFORM_VECTOR 0 +#define TARGET_P9_MISC 0 +#endif + +/* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync. If + not, generate the lwsync code as an integer constant. */ +#ifdef HAVE_AS_LWSYNC +#define TARGET_LWSYNC_INSTRUCTION 1 +#else +#define TARGET_LWSYNC_INSTRUCTION 0 +#endif + +/* Define TARGET_TLS_MARKERS if the target assembler does not support + arg markers for __tls_get_addr calls. */ +#ifndef HAVE_AS_TLS_MARKERS +#undef TARGET_TLS_MARKERS +#define TARGET_TLS_MARKERS 0 +#else +#define TARGET_TLS_MARKERS tls_markers +#endif + +#ifndef TARGET_SECURE_PLT +#define TARGET_SECURE_PLT 0 +#endif + +#ifndef TARGET_CMODEL +#define TARGET_CMODEL CMODEL_SMALL +#endif + +#define TARGET_32BIT (! TARGET_64BIT) + +#ifndef HAVE_AS_TLS +#define HAVE_AS_TLS 0 +#endif + +#ifndef TARGET_LINK_STACK +#define TARGET_LINK_STACK 0 +#endif + +#ifndef SET_TARGET_LINK_STACK +#define SET_TARGET_LINK_STACK(X) do { } while (0) +#endif + +#ifndef TARGET_FLOAT128_ENABLE_TYPE +#define TARGET_FLOAT128_ENABLE_TYPE 0 +#endif + +/* Return 1 for a symbol ref for a thread-local storage symbol. */ +#define RS6000_SYMBOL_REF_TLS_P(RTX) \ + (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0) + +#ifdef IN_LIBGCC2 +/* For libgcc2 we make sure this is a compile time constant */ +#if defined (__64BIT__) || defined (__powerpc64__) || defined (__ppc64__) +#undef TARGET_POWERPC64 +#define TARGET_POWERPC64 1 +#else +#undef TARGET_POWERPC64 +#define TARGET_POWERPC64 0 +#endif +#else + /* The option machinery will define this. */ +#endif + +#define TARGET_DEFAULT (MASK_MULTIPLE | MASK_STRING) + +/* FPU operations supported. + Each use of TARGET_SINGLE_FLOAT or TARGET_DOUBLE_FLOAT must + also test TARGET_HARD_FLOAT. */ +#define TARGET_SINGLE_FLOAT 1 +#define TARGET_DOUBLE_FLOAT 1 +#define TARGET_SINGLE_FPU 0 +#define TARGET_SIMPLE_FPU 0 +#define TARGET_XILINX_FPU 0 + +/* Recast the processor type to the cpu attribute. */ +#define rs6000_cpu_attr ((enum attr_cpu)rs6000_cpu) + +/* Define generic processor types based upon current deployment. */ +#define PROCESSOR_COMMON PROCESSOR_PPC601 +#define PROCESSOR_POWERPC PROCESSOR_PPC604 +#define PROCESSOR_POWERPC64 PROCESSOR_RS64A + +/* Define the default processor. This is overridden by other tm.h files. */ +#define PROCESSOR_DEFAULT PROCESSOR_PPC603 +#define PROCESSOR_DEFAULT64 PROCESSOR_RS64A + +/* Specify the dialect of assembler to use. Only new mnemonics are supported + starting with GCC 4.8, i.e. just one dialect, but for backwards + compatibility with older inline asm ASSEMBLER_DIALECT needs to be + defined. */ +#define ASSEMBLER_DIALECT 1 + +/* Debug support */ +#define MASK_DEBUG_STACK 0x01 /* debug stack applications */ +#define MASK_DEBUG_ARG 0x02 /* debug argument handling */ +#define MASK_DEBUG_REG 0x04 /* debug register handling */ +#define MASK_DEBUG_ADDR 0x08 /* debug memory addressing */ +#define MASK_DEBUG_COST 0x10 /* debug rtx codes */ +#define MASK_DEBUG_TARGET 0x20 /* debug target attribute/pragma */ +#define MASK_DEBUG_BUILTIN 0x40 /* debug builtins */ +#define MASK_DEBUG_ALL (MASK_DEBUG_STACK \ + | MASK_DEBUG_ARG \ + | MASK_DEBUG_REG \ + | MASK_DEBUG_ADDR \ + | MASK_DEBUG_COST \ + | MASK_DEBUG_TARGET \ + | MASK_DEBUG_BUILTIN) + +#define TARGET_DEBUG_STACK (rs6000_debug & MASK_DEBUG_STACK) +#define TARGET_DEBUG_ARG (rs6000_debug & MASK_DEBUG_ARG) +#define TARGET_DEBUG_REG (rs6000_debug & MASK_DEBUG_REG) +#define TARGET_DEBUG_ADDR (rs6000_debug & MASK_DEBUG_ADDR) +#define TARGET_DEBUG_COST (rs6000_debug & MASK_DEBUG_COST) +#define TARGET_DEBUG_TARGET (rs6000_debug & MASK_DEBUG_TARGET) +#define TARGET_DEBUG_BUILTIN (rs6000_debug & MASK_DEBUG_BUILTIN) + +/* Helper macros for TFmode. Quad floating point (TFmode) can be either IBM + long double format that uses a pair of doubles, or IEEE 128-bit floating + point. KFmode was added as a way to represent IEEE 128-bit floating point, + even if the default for long double is the IBM long double format. + Similarly IFmode is the IBM long double format even if the default is IEEE + 128-bit. Don't allow IFmode if -msoft-float. */ +#define FLOAT128_IEEE_P(MODE) \ + ((TARGET_IEEEQUAD && ((MODE) == TFmode || (MODE) == TCmode)) \ + || ((MODE) == KFmode) || ((MODE) == KCmode)) + +#define FLOAT128_IBM_P(MODE) \ + ((!TARGET_IEEEQUAD && ((MODE) == TFmode || (MODE) == TCmode)) \ + || (TARGET_HARD_FLOAT && TARGET_FPRS \ + && ((MODE) == IFmode || (MODE) == ICmode))) + +/* Helper macros to say whether a 128-bit floating point type can go in a + single vector register, or whether it needs paired scalar values. */ +#define FLOAT128_VECTOR_P(MODE) (TARGET_FLOAT128_TYPE && FLOAT128_IEEE_P (MODE)) + +#define FLOAT128_2REG_P(MODE) \ + (FLOAT128_IBM_P (MODE) \ + || ((MODE) == TDmode) \ + || (!TARGET_FLOAT128_TYPE && FLOAT128_IEEE_P (MODE))) + +/* Return true for floating point that does not use a vector register. */ +#define SCALAR_FLOAT_MODE_NOT_VECTOR_P(MODE) \ + (SCALAR_FLOAT_MODE_P (MODE) && !FLOAT128_VECTOR_P (MODE)) + +/* Describe the vector unit used for arithmetic operations. */ +extern enum rs6000_vector rs6000_vector_unit[]; + +#define VECTOR_UNIT_NONE_P(MODE) \ + (rs6000_vector_unit[(MODE)] == VECTOR_NONE) + +#define VECTOR_UNIT_VSX_P(MODE) \ + (rs6000_vector_unit[(MODE)] == VECTOR_VSX) + +#define VECTOR_UNIT_P8_VECTOR_P(MODE) \ + (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR) + +#define VECTOR_UNIT_ALTIVEC_P(MODE) \ + (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC) + +#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \ + (int)VECTOR_VSX, \ + (int)VECTOR_P8_VECTOR)) + +/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either + altivec (VMX) or VSX vector instructions. P8 vector support is upwards + compatible, so allow it as well, rather than changing all of the uses of the + macro. */ +#define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \ + (int)VECTOR_ALTIVEC, \ + (int)VECTOR_P8_VECTOR)) + +/* Describe whether to use VSX loads or Altivec loads. For now, just use the + same unit as the vector unit we are using, but we may want to migrate to + using VSX style loads even for types handled by altivec. */ +extern enum rs6000_vector rs6000_vector_mem[]; + +#define VECTOR_MEM_NONE_P(MODE) \ + (rs6000_vector_mem[(MODE)] == VECTOR_NONE) + +#define VECTOR_MEM_VSX_P(MODE) \ + (rs6000_vector_mem[(MODE)] == VECTOR_VSX) + +#define VECTOR_MEM_P8_VECTOR_P(MODE) \ + (rs6000_vector_mem[(MODE)] == VECTOR_VSX) + +#define VECTOR_MEM_ALTIVEC_P(MODE) \ + (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC) + +#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \ + (int)VECTOR_VSX, \ + (int)VECTOR_P8_VECTOR)) + +#define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE) \ + (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \ + (int)VECTOR_ALTIVEC, \ + (int)VECTOR_P8_VECTOR)) + +/* Return the alignment of a given vector type, which is set based on the + vector unit use. VSX for instance can load 32 or 64 bit aligned words + without problems, while Altivec requires 128-bit aligned vectors. */ +extern int rs6000_vector_align[]; + +#define VECTOR_ALIGN(MODE) \ + ((rs6000_vector_align[(MODE)] != 0) \ + ? rs6000_vector_align[(MODE)] \ + : (int)GET_MODE_BITSIZE ((MODE))) + +/* Determine the element order to use for vector instructions. By + default we use big-endian element order when targeting big-endian, + and little-endian element order when targeting little-endian. For + programs being ported from BE Power to LE Power, it can sometimes + be useful to use big-endian element order when targeting little-endian. + This is set via -maltivec=be, for example. */ +#define VECTOR_ELT_ORDER_BIG \ + (BYTES_BIG_ENDIAN || (rs6000_altivec_element_order == 2)) + +/* Element number of the 64-bit value in a 128-bit vector that can be accessed + with scalar instructions. */ +#define VECTOR_ELEMENT_SCALAR_64BIT ((BYTES_BIG_ENDIAN) ? 0 : 1) + +/* Element number of the 64-bit value in a 128-bit vector that can be accessed + with the ISA 3.0 MFVSRLD instructions. */ +#define VECTOR_ELEMENT_MFVSRLD_64BIT ((BYTES_BIG_ENDIAN) ? 1 : 0) + +/* Alignment options for fields in structures for sub-targets following + AIX-like ABI. + ALIGN_POWER word-aligns FP doubles (default AIX ABI). + ALIGN_NATURAL doubleword-aligns FP doubles (align to object size). + + Override the macro definitions when compiling libobjc to avoid undefined + reference to rs6000_alignment_flags due to library's use of GCC alignment + macros which use the macros below. */ + +#ifndef IN_TARGET_LIBS +#define MASK_ALIGN_POWER 0x00000000 +#define MASK_ALIGN_NATURAL 0x00000001 +#define TARGET_ALIGN_NATURAL (rs6000_alignment_flags & MASK_ALIGN_NATURAL) +#else +#define TARGET_ALIGN_NATURAL 0 +#endif + +#define TARGET_LONG_DOUBLE_128 (rs6000_long_double_type_size == 128) +#define TARGET_IEEEQUAD rs6000_ieeequad +#define TARGET_ALTIVEC_ABI rs6000_altivec_abi +#define TARGET_LDBRX (TARGET_POPCNTD || rs6000_cpu == PROCESSOR_CELL) + +#define TARGET_SPE_ABI 0 +#define TARGET_SPE 0 +#define TARGET_ISEL64 (TARGET_ISEL && TARGET_POWERPC64) +#define TARGET_FPRS 1 +#define TARGET_E500_SINGLE 0 +#define TARGET_E500_DOUBLE 0 +#define CHECK_E500_OPTIONS do { } while (0) + +/* ISA 2.01 allowed FCFID to be done in 32-bit, previously it was 64-bit only. + Enable 32-bit fcfid's on any of the switches for newer ISA machines or + XILINX. */ +#define TARGET_FCFID (TARGET_POWERPC64 \ + || TARGET_PPC_GPOPT /* 970/power4 */ \ + || TARGET_POPCNTB /* ISA 2.02 */ \ + || TARGET_CMPB /* ISA 2.05 */ \ + || TARGET_POPCNTD /* ISA 2.06 */ \ + || TARGET_XILINX_FPU) + +#define TARGET_FCTIDZ TARGET_FCFID +#define TARGET_STFIWX TARGET_PPC_GFXOPT +#define TARGET_LFIWAX TARGET_CMPB +#define TARGET_LFIWZX TARGET_POPCNTD +#define TARGET_FCFIDS TARGET_POPCNTD +#define TARGET_FCFIDU TARGET_POPCNTD +#define TARGET_FCFIDUS TARGET_POPCNTD +#define TARGET_FCTIDUZ TARGET_POPCNTD +#define TARGET_FCTIWUZ TARGET_POPCNTD +#define TARGET_CTZ TARGET_MODULO +#define TARGET_EXTSWSLI (TARGET_MODULO && TARGET_POWERPC64) +#define TARGET_MADDLD (TARGET_MODULO && TARGET_POWERPC64) + +#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) +#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) +#define TARGET_VADDUQM (TARGET_P8_VECTOR && TARGET_POWERPC64) +#define TARGET_DIRECT_MOVE_128 (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \ + && TARGET_POWERPC64) +#define TARGET_VEXTRACTUB (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \ + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64) + + +/* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI). */ +#define TARGET_NO_SF_SUBREG TARGET_DIRECT_MOVE_64BIT +#define TARGET_ALLOW_SF_SUBREG (!TARGET_DIRECT_MOVE_64BIT) + +/* This wants to be set for p8 and newer. On p7, overlapping unaligned + loads are slow. */ +#define TARGET_EFFICIENT_OVERLAPPING_UNALIGNED TARGET_EFFICIENT_UNALIGNED_VSX + +/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present + in power7, so conditionalize them on p8 features. TImode syncs need quad + memory support. */ +#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY \ + || TARGET_QUAD_MEMORY_ATOMIC \ + || TARGET_DIRECT_MOVE) + +#define TARGET_SYNC_TI TARGET_QUAD_MEMORY_ATOMIC + +/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need + to allocate the SDmode stack slot to get the value into the proper location + in the register. */ +#define TARGET_NO_SDMODE_STACK (TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP) + +/* ISA 3.0 has new min/max functions that don't need fast math that are being + phased in. Min/max using FSEL or XSMAXDP/XSMINDP do not return the correct + answers if the arguments are not in the normal range. */ +#define TARGET_MINMAX_SF (TARGET_SF_FPR && TARGET_PPC_GFXOPT \ + && (TARGET_P9_MINMAX || !flag_trapping_math)) + +#define TARGET_MINMAX_DF (TARGET_DF_FPR && TARGET_PPC_GFXOPT \ + && (TARGET_P9_MINMAX || !flag_trapping_math)) + +/* In switching from using target_flags to using rs6000_isa_flags, the options + machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>. For now map + OPTION_MASK_<xxx> back into MASK_<xxx>. */ +#define MASK_ALTIVEC OPTION_MASK_ALTIVEC +#define MASK_CMPB OPTION_MASK_CMPB +#define MASK_CRYPTO OPTION_MASK_CRYPTO +#define MASK_DFP OPTION_MASK_DFP +#define MASK_DIRECT_MOVE OPTION_MASK_DIRECT_MOVE +#define MASK_DLMZB OPTION_MASK_DLMZB +#define MASK_EABI OPTION_MASK_EABI +#define MASK_FLOAT128_TYPE OPTION_MASK_FLOAT128_TYPE +#define MASK_FPRND OPTION_MASK_FPRND +#define MASK_P8_FUSION OPTION_MASK_P8_FUSION +#define MASK_HARD_FLOAT OPTION_MASK_HARD_FLOAT +#define MASK_HTM OPTION_MASK_HTM +#define MASK_ISEL OPTION_MASK_ISEL +#define MASK_MFCRF OPTION_MASK_MFCRF +#define MASK_MFPGPR OPTION_MASK_MFPGPR +#define MASK_MULHW OPTION_MASK_MULHW +#define MASK_MULTIPLE OPTION_MASK_MULTIPLE +#define MASK_NO_UPDATE OPTION_MASK_NO_UPDATE +#define MASK_P8_VECTOR OPTION_MASK_P8_VECTOR +#define MASK_P9_VECTOR OPTION_MASK_P9_VECTOR +#define MASK_P9_MISC OPTION_MASK_P9_MISC +#define MASK_POPCNTB OPTION_MASK_POPCNTB +#define MASK_POPCNTD OPTION_MASK_POPCNTD +#define MASK_PPC_GFXOPT OPTION_MASK_PPC_GFXOPT +#define MASK_PPC_GPOPT OPTION_MASK_PPC_GPOPT +#define MASK_RECIP_PRECISION OPTION_MASK_RECIP_PRECISION +#define MASK_SOFT_FLOAT OPTION_MASK_SOFT_FLOAT +#define MASK_STRICT_ALIGN OPTION_MASK_STRICT_ALIGN +#define MASK_STRING OPTION_MASK_STRING +#define MASK_UPDATE OPTION_MASK_UPDATE +#define MASK_VSX OPTION_MASK_VSX +#define MASK_VSX_TIMODE OPTION_MASK_VSX_TIMODE + +#ifndef IN_LIBGCC2 +#define MASK_POWERPC64 OPTION_MASK_POWERPC64 +#endif + +#ifdef TARGET_64BIT +#define MASK_64BIT OPTION_MASK_64BIT +#endif + +#ifdef TARGET_LITTLE_ENDIAN +#define MASK_LITTLE_ENDIAN OPTION_MASK_LITTLE_ENDIAN +#endif + +#ifdef TARGET_REGNAMES +#define MASK_REGNAMES OPTION_MASK_REGNAMES +#endif + +#ifdef TARGET_PROTOTYPE +#define MASK_PROTOTYPE OPTION_MASK_PROTOTYPE +#endif + +#ifdef TARGET_MODULO +#define RS6000_BTM_MODULO OPTION_MASK_MODULO +#endif + + +/* For power systems, we want to enable Altivec and VSX builtins even if the + user did not use -maltivec or -mvsx to allow the builtins to be used inside + of #pragma GCC target or the target attribute to change the code level for a + given system. The SPE and Paired builtins are only enabled if you configure + the compiler for those builtins, and those machines don't support altivec or + VSX. */ + +#define TARGET_EXTRA_BUILTINS (!TARGET_SPE && !TARGET_PAIRED_FLOAT \ + && ((TARGET_POWERPC64 \ + || TARGET_PPC_GPOPT /* 970/power4 */ \ + || TARGET_POPCNTB /* ISA 2.02 */ \ + || TARGET_CMPB /* ISA 2.05 */ \ + || TARGET_POPCNTD /* ISA 2.06 */ \ + || TARGET_ALTIVEC \ + || TARGET_VSX \ + || TARGET_HARD_FLOAT))) + +/* E500 cores only support plain "sync", not lwsync. */ +#define TARGET_NO_LWSYNC (rs6000_cpu == PROCESSOR_PPC8540 \ + || rs6000_cpu == PROCESSOR_PPC8548) + + +/* Whether SF/DF operations are supported on the E500. */ +#define TARGET_SF_SPE (TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT \ + && !TARGET_FPRS) + +#define TARGET_DF_SPE (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT \ + && !TARGET_FPRS && TARGET_E500_DOUBLE) + +/* Whether SF/DF operations are supported by the normal floating point unit + (or the vector/scalar unit). */ +#define TARGET_SF_FPR (TARGET_HARD_FLOAT && TARGET_FPRS \ + && TARGET_SINGLE_FLOAT) + +#define TARGET_DF_FPR (TARGET_HARD_FLOAT && TARGET_FPRS \ + && TARGET_DOUBLE_FLOAT) + +/* Whether SF/DF operations are supported by any hardware. */ +#define TARGET_SF_INSN (TARGET_SF_FPR || TARGET_SF_SPE) +#define TARGET_DF_INSN (TARGET_DF_FPR || TARGET_DF_SPE) + +/* Which machine supports the various reciprocal estimate instructions. */ +#define TARGET_FRES (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \ + && TARGET_FPRS && TARGET_SINGLE_FLOAT) + +#define TARGET_FRE (TARGET_HARD_FLOAT && TARGET_FPRS \ + && TARGET_DOUBLE_FLOAT \ + && (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode))) + +#define TARGET_FRSQRTES (TARGET_HARD_FLOAT && TARGET_POPCNTB \ + && TARGET_PPC_GFXOPT && TARGET_FPRS \ + && TARGET_SINGLE_FLOAT) + +#define TARGET_FRSQRTE (TARGET_HARD_FLOAT && TARGET_FPRS \ + && TARGET_DOUBLE_FLOAT \ + && (TARGET_PPC_GFXOPT || VECTOR_UNIT_VSX_P (DFmode))) + +/* Conditions to allow TOC fusion for loading/storing integers. */ +#define TARGET_TOC_FUSION_INT (TARGET_P8_FUSION \ + && TARGET_TOC_FUSION \ + && (TARGET_CMODEL != CMODEL_SMALL) \ + && TARGET_POWERPC64) + +/* Conditions to allow TOC fusion for loading/storing floating point. */ +#define TARGET_TOC_FUSION_FP (TARGET_P9_FUSION \ + && TARGET_TOC_FUSION \ + && (TARGET_CMODEL != CMODEL_SMALL) \ + && TARGET_POWERPC64 \ + && TARGET_HARD_FLOAT \ + && TARGET_FPRS \ + && TARGET_SINGLE_FLOAT \ + && TARGET_DOUBLE_FLOAT) + +/* Macro to say whether we can do optimizations where we need to do parts of + the calculation in 64-bit GPRs and then is transfered to the vector + registers. Do not allow -maltivec=be for these optimizations, because it + adds to the complexity of the code. */ +#define TARGET_DIRECT_MOVE_64BIT (TARGET_DIRECT_MOVE \ + && TARGET_P8_VECTOR \ + && TARGET_POWERPC64 \ + && TARGET_UPPER_REGS_DI \ + && (rs6000_altivec_element_order != 2)) + +/* Whether the various reciprocal divide/square root estimate instructions + exist, and whether we should automatically generate code for the instruction + by default. */ +#define RS6000_RECIP_MASK_HAVE_RE 0x1 /* have RE instruction. */ +#define RS6000_RECIP_MASK_AUTO_RE 0x2 /* generate RE by default. */ +#define RS6000_RECIP_MASK_HAVE_RSQRTE 0x4 /* have RSQRTE instruction. */ +#define RS6000_RECIP_MASK_AUTO_RSQRTE 0x8 /* gen. RSQRTE by default. */ + +extern unsigned char rs6000_recip_bits[]; + +#define RS6000_RECIP_HAVE_RE_P(MODE) \ + (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_HAVE_RE) + +#define RS6000_RECIP_AUTO_RE_P(MODE) \ + (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RE) + +#define RS6000_RECIP_HAVE_RSQRTE_P(MODE) \ + (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_HAVE_RSQRTE) + +#define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \ + (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE) + +/* The default CPU for TARGET_OPTION_OVERRIDE. */ +#define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT + +/* Target pragma. */ +#define REGISTER_TARGET_PRAGMAS() do { \ + c_register_pragma (0, "longcall", rs6000_pragma_longcall); \ + targetm.target_option.pragma_parse = rs6000_pragma_target_parse; \ + targetm.resolve_overloaded_builtin = altivec_resolve_overloaded_builtin; \ + rs6000_target_modify_macros_ptr = rs6000_target_modify_macros; \ +} while (0) + +/* Target #defines. */ +#define TARGET_CPU_CPP_BUILTINS() \ + rs6000_cpu_cpp_builtins (pfile) + +/* This is used by rs6000_cpu_cpp_builtins to indicate the byte order + we're compiling for. Some configurations may need to override it. */ +#define RS6000_CPU_CPP_ENDIAN_BUILTINS() \ + do \ + { \ + if (BYTES_BIG_ENDIAN) \ + { \ + builtin_define ("__BIG_ENDIAN__"); \ + builtin_define ("_BIG_ENDIAN"); \ + builtin_assert ("machine=bigendian"); \ + } \ + else \ + { \ + builtin_define ("__LITTLE_ENDIAN__"); \ + builtin_define ("_LITTLE_ENDIAN"); \ + builtin_assert ("machine=littleendian"); \ + } \ + } \ + while (0) + +/* Target machine storage layout. */ + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ + +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < (TARGET_32BIT ? 4 : 8)) \ + (MODE) = TARGET_32BIT ? SImode : DImode; + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +/* That is true on RS/6000. */ +#define BITS_BIG_ENDIAN 1 + +/* Define this if most significant byte of a word is the lowest numbered. */ +/* That is true on RS/6000. */ +#define BYTES_BIG_ENDIAN 1 + +/* Define this if most significant word of a multiword number is lowest + numbered. + + For RS/6000 we can decide arbitrarily since there are no machine + instructions for them. Might as well be consistent with bits and bytes. */ +#define WORDS_BIG_ENDIAN 1 + +/* This says that for the IBM long double the larger magnitude double + comes first. It's really a two element double array, and arrays + don't index differently between little- and big-endian. */ +#define LONG_DOUBLE_LARGE_FIRST 1 + +#define MAX_BITS_PER_WORD 64 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD (! TARGET_POWERPC64 ? 4 : 8) +#ifdef IN_LIBGCC2 +#define MIN_UNITS_PER_WORD UNITS_PER_WORD +#else +#define MIN_UNITS_PER_WORD 4 +#endif +#define UNITS_PER_FP_WORD 8 +#define UNITS_PER_ALTIVEC_WORD 16 +#define UNITS_PER_VSX_WORD 16 +#define UNITS_PER_SPE_WORD 8 +#define UNITS_PER_PAIRED_WORD 8 + +/* Type used for ptrdiff_t, as a string used in a declaration. */ +#define PTRDIFF_TYPE "int" + +/* Type used for size_t, as a string used in a declaration. */ +#define SIZE_TYPE "long unsigned int" + +/* Type used for wchar_t, as a string used in a declaration. */ +#define WCHAR_TYPE "short unsigned int" + +/* Width of wchar_t in bits. */ +#define WCHAR_TYPE_SIZE 16 + +/* A C expression for the size in bits of the type `short' on the + target machine. If you don't define this, the default is half a + word. (If this would be less than one storage unit, it is + rounded up to one unit.) */ +#define SHORT_TYPE_SIZE 16 + +/* A C expression for the size in bits of the type `int' on the + target machine. If you don't define this, the default is one + word. */ +#define INT_TYPE_SIZE 32 + +/* A C expression for the size in bits of the type `long' on the + target machine. If you don't define this, the default is one + word. */ +#define LONG_TYPE_SIZE (TARGET_32BIT ? 32 : 64) + +/* A C expression for the size in bits of the type `long long' on the + target machine. If you don't define this, the default is two + words. */ +#define LONG_LONG_TYPE_SIZE 64 + +/* A C expression for the size in bits of the type `float' on the + target machine. If you don't define this, the default is one + word. */ +#define FLOAT_TYPE_SIZE 32 + +/* A C expression for the size in bits of the type `double' on the + target machine. If you don't define this, the default is two + words. */ +#define DOUBLE_TYPE_SIZE 64 + +/* A C expression for the size in bits of the type `long double' on + the target machine. If you don't define this, the default is two + words. */ +#define LONG_DOUBLE_TYPE_SIZE rs6000_long_double_type_size + +/* Work around rs6000_long_double_type_size dependency in ada/targtyps.c. */ +#define WIDEST_HARDWARE_FP_SIZE 64 + +/* Width in bits of a pointer. + See also the macro `Pmode' defined below. */ +extern unsigned rs6000_pointer_size; +#define POINTER_SIZE rs6000_pointer_size + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY (TARGET_32BIT ? 32 : 64) + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY \ + ((TARGET_32BIT && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI && !TARGET_VSX) \ + ? 64 : 128) + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT 128 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +enum data_align { align_abi, align_opt, align_both }; + +/* A C expression to compute the alignment for a variables in the + local store. TYPE is the data type, and ALIGN is the alignment + that the object would ordinarily have. */ +#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ + rs6000_data_alignment (TYPE, ALIGN, align_both) + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (STRICT_ALIGNMENT || !optimize_size) \ + && (ALIGN) < BITS_PER_WORD \ + ? BITS_PER_WORD \ + : (ALIGN)) + +/* Make arrays of chars word-aligned for the same reasons. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + rs6000_data_alignment (TYPE, ALIGN, align_opt) + +/* Align vectors to 128 bits. Align SPE vectors and E500 v2 doubles to + 64 bits. */ +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ + rs6000_data_alignment (TYPE, ALIGN, align_abi) + +/* Nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 0 + +/* Define this macro to be the value 1 if unaligned accesses have a cost + many times greater than aligned accesses, for example if they are + emulated in a trap handler. */ +/* Altivec vector memory instructions simply ignore the low bits; SPE vector + memory instructions trap on unaligned accesses; VSX memory instructions are + aligned to 4 or 8 bytes. */ +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) \ + (STRICT_ALIGNMENT \ + || (!TARGET_EFFICIENT_UNALIGNED_VSX \ + && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) && (ALIGN) < 32) \ + || ((VECTOR_MODE_P (MODE) || FLOAT128_VECTOR_P (MODE)) \ + && (int) (ALIGN) < VECTOR_ALIGN (MODE))))) + + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + RS/6000 has 32 fixed-point registers, 32 floating-point registers, + a count register, a link register, and 8 condition register fields, + which we view here as separate registers. AltiVec adds 32 vector + registers and a VRsave register. + + In addition, the difference between the frame and argument pointers is + a function of the number of registers saved, so we need to have a + register for AP that will later be eliminated in favor of SP or FP. + This is a normal register, but it is fixed. + + We also create a pseudo register for float/int conversions, that will + really represent the memory location used. It is represented here as + a register, in order to work around problems in allocating stack storage + in inline functions. + + Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame + pointer, which is eventually eliminated in favor of SP or FP. + + The 3 HTM registers aren't also included in DWARF_FRAME_REGISTERS. */ + +#define FIRST_PSEUDO_REGISTER 149 + +/* This must be included for pre gcc 3.0 glibc compatibility. */ +#define PRE_GCC3_DWARF_FRAME_REGISTERS 77 + +/* True if register is an SPE High register. */ +#define SPE_HIGH_REGNO_P(N) \ + ((N) >= FIRST_SPE_HIGH_REGNO && (N) <= LAST_SPE_HIGH_REGNO) + +/* SPE high registers added as hard regs. + The sfp register and 3 HTM registers + aren't included in DWARF_FRAME_REGISTERS. */ +#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 4) + +/* The SPE has an additional 32 synthetic registers, with DWARF debug + info numbering for these registers starting at 1200. While eh_frame + register numbering need not be the same as the debug info numbering, + we choose to number these regs for eh_frame at 1200 too. + + We must map them here to avoid huge unwinder tables mostly consisting + of unused space. */ +#define DWARF_REG_TO_UNWIND_COLUMN(r) \ + ((r) >= 1200 ? ((r) - 1200 + (DWARF_FRAME_REGISTERS - 32)) : (r)) + +/* Use standard DWARF numbering for DWARF debugging information. */ +#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number ((REGNO), 0) + +/* Use gcc hard register numbering for eh_frame. */ +#define DWARF_FRAME_REGNUM(REGNO) (REGNO) + +/* Map register numbers held in the call frame info that gcc has + collected using DWARF_FRAME_REGNUM to those that should be output in + .debug_frame and .eh_frame. */ +#define DWARF2_FRAME_REG_OUT(REGNO, FOR_EH) \ + rs6000_dbx_register_number ((REGNO), (FOR_EH)? 2 : 1) + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + On RS/6000, r1 is used for the stack. On Darwin, r2 is available + as a local register; for all other OS's r2 is the TOC pointer. + + On System V implementations, r13 is fixed and not available for use. */ + +#define FIXED_REGISTERS \ + {0, 1, FIXED_R2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FIXED_R13, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, \ + /* AltiVec registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1 \ + , 1, 1, 1, 1, 1, 1, \ + /* SPE High registers. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ + +#define CALL_USED_REGISTERS \ + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, \ + /* AltiVec registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1 \ + , 1, 1, 1, 1, 1, 1, \ + /* SPE High registers. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 \ +} + +/* Like `CALL_USED_REGISTERS' except this macro doesn't require that + the entire set of `FIXED_REGISTERS' be included. + (`CALL_USED_REGISTERS' must be a superset of `FIXED_REGISTERS'). + This macro is optional. If not specified, it defaults to the value + of `CALL_USED_REGISTERS'. */ + +#define CALL_REALLY_USED_REGISTERS \ + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, \ + /* AltiVec registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0 \ + , 0, 0, 0, 0, 0, 0, \ + /* SPE High registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \ +} + +#define TOTAL_ALTIVEC_REGS (LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1) + +#define FIRST_SAVED_ALTIVEC_REGNO (FIRST_ALTIVEC_REGNO+20) +#define FIRST_SAVED_FP_REGNO (14+32) +#define FIRST_SAVED_GP_REGNO (FIXED_R13 ? 14 : 13) + +/* List the order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. + + We allocate in the following order: + fp0 (not saved or used for anything) + fp13 - fp2 (not saved; incoming fp arg registers) + fp1 (not saved; return value) + fp31 - fp14 (saved; order given to save least number) + cr7, cr5 (not saved or special) + cr6 (not saved, but used for vector operations) + cr1 (not saved, but used for FP operations) + cr0 (not saved, but used for arithmetic operations) + cr4, cr3, cr2 (saved) + r9 (not saved; best for TImode) + r10, r8-r4 (not saved; highest first for less conflict with params) + r3 (not saved; return value register) + r11 (not saved; later alloc to help shrink-wrap) + r0 (not saved; cannot be base reg) + r31 - r13 (saved; order given to save least number) + r12 (not saved; if used for DImode or DFmode would use r13) + ctr (not saved; when we have the choice ctr is better) + lr (saved) + r1, r2, ap, ca (fixed) + v0 - v1 (not saved or used for anything) + v13 - v3 (not saved; incoming vector arg registers) + v2 (not saved; incoming vector arg reg; return value) + v19 - v14 (not saved or used for anything) + v31 - v20 (saved; order given to save least number) + vrsave, vscr (fixed) + spe_acc, spefscr (fixed) + sfp (fixed) + tfhar (fixed) + tfiar (fixed) + texasr (fixed) +*/ + +#if FIXED_R2 == 1 +#define MAYBE_R2_AVAILABLE +#define MAYBE_R2_FIXED 2, +#else +#define MAYBE_R2_AVAILABLE 2, +#define MAYBE_R2_FIXED +#endif + +#if FIXED_R13 == 1 +#define EARLY_R12 12, +#define LATE_R12 +#else +#define EARLY_R12 +#define LATE_R12 12, +#endif + +#define REG_ALLOC_ORDER \ + {32, \ + /* move fr13 (ie 45) later, so if we need TFmode, it does */ \ + /* not use fr14 which is a saved register. */ \ + 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45, \ + 33, \ + 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ + 50, 49, 48, 47, 46, \ + 75, 73, 74, 69, 68, 72, 71, 70, \ + MAYBE_R2_AVAILABLE \ + 9, 10, 8, 7, 6, 5, 4, \ + 3, EARLY_R12 11, 0, \ + 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, \ + 18, 17, 16, 15, 14, 13, LATE_R12 \ + 66, 65, \ + 1, MAYBE_R2_FIXED 67, 76, \ + /* AltiVec registers. */ \ + 77, 78, \ + 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, \ + 79, \ + 96, 95, 94, 93, 92, 91, \ + 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, \ + 109, 110, \ + 111, 112, 113, 114, 115, 116, \ + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, \ + 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, \ + 141, 142, 143, 144, 145, 146, 147, 148 \ +} + +/* True if register is floating-point. */ +#define FP_REGNO_P(N) ((N) >= 32 && (N) <= 63) + +/* True if register is a condition register. */ +#define CR_REGNO_P(N) ((N) >= CR0_REGNO && (N) <= CR7_REGNO) + +/* True if register is a condition register, but not cr0. */ +#define CR_REGNO_NOT_CR0_P(N) ((N) >= CR1_REGNO && (N) <= CR7_REGNO) + +/* True if register is an integer register. */ +#define INT_REGNO_P(N) \ + ((N) <= 31 || (N) == ARG_POINTER_REGNUM || (N) == FRAME_POINTER_REGNUM) + +/* SPE SIMD registers are just the GPRs. */ +#define SPE_SIMD_REGNO_P(N) ((N) <= 31) + +/* PAIRED SIMD registers are just the FPRs. */ +#define PAIRED_SIMD_REGNO_P(N) ((N) >= 32 && (N) <= 63) + +/* True if register is the CA register. */ +#define CA_REGNO_P(N) ((N) == CA_REGNO) + +/* True if register is an AltiVec register. */ +#define ALTIVEC_REGNO_P(N) ((N) >= FIRST_ALTIVEC_REGNO && (N) <= LAST_ALTIVEC_REGNO) + +/* True if register is a VSX register. */ +#define VSX_REGNO_P(N) (FP_REGNO_P (N) || ALTIVEC_REGNO_P (N)) + +/* Alternate name for any vector register supporting floating point, no matter + which instruction set(s) are available. */ +#define VFLOAT_REGNO_P(N) \ + (ALTIVEC_REGNO_P (N) || (TARGET_VSX && FP_REGNO_P (N))) + +/* Alternate name for any vector register supporting integer, no matter which + instruction set(s) are available. */ +#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N) + +/* Alternate name for any vector register supporting logical operations, no + matter which instruction set(s) are available. Allow GPRs as well as the + vector registers. */ +#define VLOGICAL_REGNO_P(N) \ + (INT_REGNO_P (N) || ALTIVEC_REGNO_P (N) \ + || (TARGET_VSX && FP_REGNO_P (N))) \ + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)] + +/* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate + enough space to account for vectors in FP regs. However, TFmode/TDmode + should not use VSX instructions to do a caller save. */ +#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ + ((NREGS) <= rs6000_hard_regno_nregs[MODE][REGNO] \ + ? (MODE) \ + : TARGET_VSX \ + && ((MODE) == VOIDmode || ALTIVEC_OR_VSX_VECTOR_MODE (MODE)) \ + && FP_REGNO_P (REGNO) \ + ? V2DFmode \ + : TARGET_E500_DOUBLE && (MODE) == SImode \ + ? SImode \ + : TARGET_E500_DOUBLE && ((MODE) == VOIDmode || (MODE) == DFmode) \ + ? DFmode \ + : !TARGET_E500_DOUBLE && FLOAT128_IBM_P (MODE) && FP_REGNO_P (REGNO) \ + ? DFmode \ + : !TARGET_E500_DOUBLE && (MODE) == TDmode && FP_REGNO_P (REGNO) \ + ? DImode \ + : choose_hard_reg_mode ((REGNO), (NREGS), false)) + +#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ + (((TARGET_32BIT && TARGET_POWERPC64 \ + && (GET_MODE_SIZE (MODE) > 4) \ + && INT_REGNO_P (REGNO)) ? 1 : 0) \ + || (TARGET_VSX && FP_REGNO_P (REGNO) \ + && GET_MODE_SIZE (MODE) > 8 && !FLOAT128_2REG_P (MODE))) + +#define VSX_VECTOR_MODE(MODE) \ + ((MODE) == V4SFmode \ + || (MODE) == V2DFmode) \ + +/* Note KFmode and possibly TFmode (i.e. IEEE 128-bit floating point) are not + really a vector, but we want to treat it as a vector for moves, and + such. */ + +#define ALTIVEC_VECTOR_MODE(MODE) \ + ((MODE) == V16QImode \ + || (MODE) == V8HImode \ + || (MODE) == V4SFmode \ + || (MODE) == V4SImode \ + || FLOAT128_VECTOR_P (MODE)) + +#define ALTIVEC_OR_VSX_VECTOR_MODE(MODE) \ + (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE) \ + || (MODE) == V2DImode || (MODE) == V1TImode) + +#define SPE_VECTOR_MODE(MODE) \ + ((MODE) == V4HImode \ + || (MODE) == V2SFmode \ + || (MODE) == V1DImode \ + || (MODE) == V2SImode) + +#define PAIRED_VECTOR_MODE(MODE) \ + ((MODE) == V2SFmode) + +/* Value is TRUE if hard register REGNO can hold a value of + machine-mode MODE. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + rs6000_hard_regno_mode_ok_p[(int)(MODE)][REGNO] + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. + + PTImode cannot tie with other modes because PTImode is restricted to even + GPR registers, and TImode can go in any GPR as well as VSX registers (PR + 57744). + + Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE + 128-bit floating point on VSX systems ties with other vectors. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + ((MODE1) == PTImode \ + ? (MODE2) == PTImode \ + : (MODE2) == PTImode \ + ? 0 \ + : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \ + ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \ + : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \ + ? 0 \ + : SCALAR_FLOAT_MODE_P (MODE1) \ + ? SCALAR_FLOAT_MODE_P (MODE2) \ + : SCALAR_FLOAT_MODE_P (MODE2) \ + ? 0 \ + : GET_MODE_CLASS (MODE1) == MODE_CC \ + ? GET_MODE_CLASS (MODE2) == MODE_CC \ + : GET_MODE_CLASS (MODE2) == MODE_CC \ + ? 0 \ + : SPE_VECTOR_MODE (MODE1) \ + ? SPE_VECTOR_MODE (MODE2) \ + : SPE_VECTOR_MODE (MODE2) \ + ? 0 \ + : 1) + +/* Post-reload, we can't use any new AltiVec registers, as we already + emitted the vrsave mask. */ + +#define HARD_REGNO_RENAME_OK(SRC, DST) \ + (! ALTIVEC_REGNO_P (DST) || df_regs_ever_live_p (DST)) + +/* Specify the cost of a branch insn; roughly the number of extra insns that + should be added to avoid a branch. + + Set this to 3 on the RS/6000 since that is roughly the average cost of an + unscheduled conditional branch. */ + +#define BRANCH_COST(speed_p, predictable_p) 3 + +/* Override BRANCH_COST heuristic which empirically produces worse + performance for removing short circuiting from the logical ops. */ + +#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + +/* A fixed register used at epilogue generation to address SPE registers + with negative offsets. The 64-bit load/store instructions on the SPE + only take positive offsets (and small ones at that), so we need to + reserve a register for consing up negative offsets. */ + +#define FIXED_SCRATCH 0 + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* RS/6000 pc isn't overloaded on a register that the compiler knows about. */ +/* #define PC_REGNUM */ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 1 + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM 31 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 113 + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 67 + +/* Place to put static chain when calling a function that requires it. */ +#define STATIC_CHAIN_REGNUM 11 + +/* Base register for access to thread local storage variables. */ +#define TLS_REGNUM ((TARGET_64BIT) ? 13 : 2) + + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +/* The RS/6000 has three types of registers, fixed-point, floating-point, and + condition registers, plus three special registers, CTR, and the link + register. AltiVec adds a vector register class. VSX registers overlap the + FPR registers and the Altivec registers. + + However, r0 is special in that it cannot be used as a base register. + So make a class for registers valid as base registers. + + Also, cr0 is the only condition code register that can be used in + arithmetic insns, so make a separate class for it. */ + +enum reg_class +{ + NO_REGS, + BASE_REGS, + GENERAL_REGS, + FLOAT_REGS, + ALTIVEC_REGS, + VSX_REGS, + VRSAVE_REGS, + VSCR_REGS, + SPE_ACC_REGS, + SPEFSCR_REGS, + SPR_REGS, + NON_SPECIAL_REGS, + LINK_REGS, + CTR_REGS, + LINK_OR_CTR_REGS, + SPECIAL_REGS, + SPEC_OR_GEN_REGS, + CR0_REGS, + CR_REGS, + NON_FLOAT_REGS, + CA_REGS, + SPE_HIGH_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "BASE_REGS", \ + "GENERAL_REGS", \ + "FLOAT_REGS", \ + "ALTIVEC_REGS", \ + "VSX_REGS", \ + "VRSAVE_REGS", \ + "VSCR_REGS", \ + "SPE_ACC_REGS", \ + "SPEFSCR_REGS", \ + "SPR_REGS", \ + "NON_SPECIAL_REGS", \ + "LINK_REGS", \ + "CTR_REGS", \ + "LINK_OR_CTR_REGS", \ + "SPECIAL_REGS", \ + "SPEC_OR_GEN_REGS", \ + "CR0_REGS", \ + "CR_REGS", \ + "NON_FLOAT_REGS", \ + "CA_REGS", \ + "SPE_HIGH_REGS", \ + "ALL_REGS" \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ \ + /* NO_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, \ + /* BASE_REGS. */ \ + { 0xfffffffe, 0x00000000, 0x00000008, 0x00020000, 0x00000000 }, \ + /* GENERAL_REGS. */ \ + { 0xffffffff, 0x00000000, 0x00000008, 0x00020000, 0x00000000 }, \ + /* FLOAT_REGS. */ \ + { 0x00000000, 0xffffffff, 0x00000000, 0x00000000, 0x00000000 }, \ + /* ALTIVEC_REGS. */ \ + { 0x00000000, 0x00000000, 0xffffe000, 0x00001fff, 0x00000000 }, \ + /* VSX_REGS. */ \ + { 0x00000000, 0xffffffff, 0xffffe000, 0x00001fff, 0x00000000 }, \ + /* VRSAVE_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00002000, 0x00000000 }, \ + /* VSCR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00004000, 0x00000000 }, \ + /* SPE_ACC_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00008000, 0x00000000 }, \ + /* SPEFSCR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00010000, 0x00000000 }, \ + /* SPR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00040000, 0x00000000 }, \ + /* NON_SPECIAL_REGS. */ \ + { 0xffffffff, 0xffffffff, 0x00000008, 0x00020000, 0x00000000 }, \ + /* LINK_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000 }, \ + /* CTR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000 }, \ + /* LINK_OR_CTR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000006, 0x00000000, 0x00000000 }, \ + /* SPECIAL_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000006, 0x00002000, 0x00000000 }, \ + /* SPEC_OR_GEN_REGS. */ \ + { 0xffffffff, 0x00000000, 0x0000000e, 0x00022000, 0x00000000 }, \ + /* CR0_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000010, 0x00000000, 0x00000000 }, \ + /* CR_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000ff0, 0x00000000, 0x00000000 }, \ + /* NON_FLOAT_REGS. */ \ + { 0xffffffff, 0x00000000, 0x00000ffe, 0x00020000, 0x00000000 }, \ + /* CA_REGS. */ \ + { 0x00000000, 0x00000000, 0x00001000, 0x00000000, 0x00000000 }, \ + /* SPE_HIGH_REGS. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0xffe00000, 0x001fffff }, \ + /* ALL_REGS. */ \ + { 0xffffffff, 0xffffffff, 0xfffffffe, 0xffe7ffff, 0x001fffff } \ +} + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +extern enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; + +#define REGNO_REG_CLASS(REGNO) \ + (gcc_checking_assert (IN_RANGE ((REGNO), 0, FIRST_PSEUDO_REGISTER-1)),\ + rs6000_regno_regclass[(REGNO)]) + +/* Register classes for various constraints that are based on the target + switches. */ +enum r6000_reg_class_enum { + RS6000_CONSTRAINT_d, /* fpr registers for double values */ + RS6000_CONSTRAINT_f, /* fpr registers for single values */ + RS6000_CONSTRAINT_v, /* Altivec registers */ + RS6000_CONSTRAINT_wa, /* Any VSX register */ + RS6000_CONSTRAINT_wb, /* Altivec register if ISA 3.0 vector. */ + RS6000_CONSTRAINT_wd, /* VSX register for V2DF */ + RS6000_CONSTRAINT_we, /* VSX register if ISA 3.0 vector. */ + RS6000_CONSTRAINT_wf, /* VSX register for V4SF */ + RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */ + RS6000_CONSTRAINT_wh, /* FPR register for direct moves. */ + RS6000_CONSTRAINT_wi, /* FPR/VSX register to hold DImode */ + RS6000_CONSTRAINT_wj, /* FPR/VSX register for DImode direct moves. */ + RS6000_CONSTRAINT_wk, /* FPR/VSX register for DFmode direct moves. */ + RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */ + RS6000_CONSTRAINT_wm, /* VSX register for direct move */ + RS6000_CONSTRAINT_wo, /* VSX register for power9 vector. */ + RS6000_CONSTRAINT_wp, /* VSX reg for IEEE 128-bit fp TFmode. */ + RS6000_CONSTRAINT_wq, /* VSX reg for IEEE 128-bit fp KFmode. */ + RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */ + RS6000_CONSTRAINT_ws, /* VSX register for DF */ + RS6000_CONSTRAINT_wt, /* VSX register for TImode */ + RS6000_CONSTRAINT_wu, /* Altivec register for float load/stores. */ + RS6000_CONSTRAINT_wv, /* Altivec register for double load/stores. */ + RS6000_CONSTRAINT_ww, /* FP or VSX register for vsx float ops. */ + RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */ + RS6000_CONSTRAINT_wy, /* VSX register for SF */ + RS6000_CONSTRAINT_wz, /* FPR register for LFIWZX */ + RS6000_CONSTRAINT_wA, /* BASE_REGS if 64-bit. */ + RS6000_CONSTRAINT_wH, /* Altivec register for 32-bit integers. */ + RS6000_CONSTRAINT_wI, /* VSX register for 32-bit integers. */ + RS6000_CONSTRAINT_wJ, /* VSX register for 8/16-bit integers. */ + RS6000_CONSTRAINT_wK, /* Altivec register for 16/32-bit integers. */ + RS6000_CONSTRAINT_MAX +}; + +extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS GENERAL_REGS +#define BASE_REG_CLASS BASE_REGS + +/* Return whether a given register class can hold VSX objects. */ +#define VSX_REG_CLASS_P(CLASS) \ + ((CLASS) == VSX_REGS || (CLASS) == FLOAT_REGS || (CLASS) == ALTIVEC_REGS) + +/* Return whether a given register class targets general purpose registers. */ +#define GPR_REG_CLASS_P(CLASS) ((CLASS) == GENERAL_REGS || (CLASS) == BASE_REGS) + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. + + On the RS/6000, we have to return NO_REGS when we want to reload a + floating-point CONST_DOUBLE to force it to be copied to memory. + + We also don't want to reload integer values into floating-point + registers if we can at all help it. In fact, this can + cause reload to die, if it tries to generate a reload of CTR + into a FP register and discovers it doesn't have the memory location + required. + + ??? Would it be a good idea to have reload do the converse, that is + try to reload floating modes into FP registers if possible? + */ + +#define PREFERRED_RELOAD_CLASS(X,CLASS) \ + rs6000_preferred_reload_class_ptr (X, CLASS) + +/* Return the register class of a scratch register needed to copy IN into + or out of a register in CLASS in MODE. If it can be done directly, + NO_REGS is returned. */ + +#define SECONDARY_RELOAD_CLASS(CLASS,MODE,IN) \ + rs6000_secondary_reload_class_ptr (CLASS, MODE, IN) + +/* If we are copying between FP or AltiVec registers and anything + else, we need a memory location. The exception is when we are + targeting ppc64 and the move to/from fpr to gpr instructions + are available.*/ + +#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \ + rs6000_secondary_memory_needed_ptr (CLASS1, CLASS2, MODE) + +/* For cpus that cannot load/store SDmode values from the 64-bit + FP registers without using a full 64-bit load/store, we need + to allocate a full 64-bit stack slot for them. */ + +#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \ + rs6000_secondary_memory_needed_rtx (MODE) + +/* Specify the mode to be used for memory when a secondary memory + location is needed. For cpus that cannot load/store SDmode values + from the 64-bit FP registers without using a full 64-bit + load/store, we need a wider mode. */ +#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \ + rs6000_secondary_memory_needed_mode (MODE) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. + + On RS/6000, this is the size of MODE in words, except in the FP regs, where + a single reg is enough for two words, unless we have VSX, where the FP + registers can hold 128 bits. */ +#define CLASS_MAX_NREGS(CLASS, MODE) rs6000_class_max_nregs[(MODE)][(CLASS)] + +/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + rs6000_cannot_change_mode_class_ptr (FROM, TO, CLASS) + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Offsets recorded in opcodes are a multiple of this alignment factor. */ +#define DWARF_CIE_DATA_ALIGNMENT (-((int) (TARGET_32BIT ? 4 : 8))) + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. + + On the RS/6000, we grow upwards, from the area after the outgoing + arguments. */ +#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 \ + || (flag_sanitize & SANITIZE_ADDRESS) != 0) + +/* Size of the fixed area on the stack */ +#define RS6000_SAVE_AREA \ + ((DEFAULT_ABI == ABI_V4 ? 8 : DEFAULT_ABI == ABI_ELFv2 ? 16 : 24) \ + << (TARGET_64BIT ? 1 : 0)) + +/* Stack offset for toc save slot. */ +#define RS6000_TOC_SAVE_SLOT \ + ((DEFAULT_ABI == ABI_ELFv2 ? 12 : 20) << (TARGET_64BIT ? 1 : 0)) + +/* Align an address */ +#define RS6000_ALIGN(n,a) ROUND_UP ((n), (a)) + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. + + On the RS/6000, the frame pointer is the same as the stack pointer, + except for dynamic allocations. So we start after the fixed area and + outgoing parameter area. + + If the function uses dynamic stack space (CALLS_ALLOCA is set), that + space needs to be aligned to STACK_BOUNDARY, i.e. the sum of the + sizes of the fixed area and the parameter area must be a multiple of + STACK_BOUNDARY. */ + +#define STARTING_FRAME_OFFSET \ + (FRAME_GROWS_DOWNWARD \ + ? 0 \ + : (cfun->calls_alloca \ + ? (RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA, \ + (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8 )) \ + : (RS6000_ALIGN (crtl->outgoing_args_size, \ + (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8) \ + + RS6000_SAVE_AREA))) + +/* Offset from the stack pointer register to an item dynamically + allocated on the stack, e.g., by `alloca'. + + The default value for this macro is `STACK_POINTER_OFFSET' plus the + length of the outgoing arguments. The default is correct for most + machines. See `function.c' for details. + + This value must be a multiple of STACK_BOUNDARY (hard coded in + `emit-rtl.c'). */ +#define STACK_DYNAMIC_OFFSET(FUNDECL) \ + RS6000_ALIGN (crtl->outgoing_args_size + STACK_POINTER_OFFSET, \ + (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8) + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. + On RS/6000, don't define this because there are no push insns. */ +/* #define PUSH_ROUNDING(BYTES) */ + +/* Offset of first parameter from the argument pointer register value. + On the RS/6000, we define the argument pointer to the start of the fixed + area. */ +#define FIRST_PARM_OFFSET(FNDECL) RS6000_SAVE_AREA + +/* Offset from the argument pointer register value to the top of + stack. This is different from FIRST_PARM_OFFSET because of the + register save area. */ +#define ARG_POINTER_CFA_OFFSET(FNDECL) 0 + +/* Define this if stack space is still allocated for a parameter passed + in a register. The value is the number of bytes allocated to this + area. */ +#define REG_PARM_STACK_SPACE(FNDECL) \ + rs6000_reg_parm_stack_space ((FNDECL), false) + +/* Define this macro if space guaranteed when compiling a function body + is different to space required when making a call, a situation that + can arise with K&R style function definitions. */ +#define INCOMING_REG_PARM_STACK_SPACE(FNDECL) \ + rs6000_reg_parm_stack_space ((FNDECL), true) + +/* Define this if the above stack space is to be considered part of the + space allocated by the caller. */ +#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 + +/* This is the difference between the logical top of stack and the actual sp. + + For the RS/6000, sp points past the fixed area. */ +#define STACK_POINTER_OFFSET RS6000_SAVE_AREA + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ + +#define LIBCALL_VALUE(MODE) rs6000_libcall_value ((MODE)) + +/* DRAFT_V4_STRUCT_RET defaults off. */ +#define DRAFT_V4_STRUCT_RET 0 + +/* Let TARGET_RETURN_IN_MEMORY control what happens. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Mode of stack savearea. + FUNCTION is VOIDmode because calling convention maintains SP. + BLOCK needs Pmode for SP. + NONLOCAL needs twice Pmode to maintain both backchain and SP. */ +#define STACK_SAVEAREA_MODE(LEVEL) \ + (LEVEL == SAVE_FUNCTION ? VOIDmode \ + : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : PTImode) : Pmode) + +/* Minimum and maximum general purpose registers used to hold arguments. */ +#define GP_ARG_MIN_REG 3 +#define GP_ARG_MAX_REG 10 +#define GP_ARG_NUM_REG (GP_ARG_MAX_REG - GP_ARG_MIN_REG + 1) + +/* Minimum and maximum floating point registers used to hold arguments. */ +#define FP_ARG_MIN_REG 33 +#define FP_ARG_AIX_MAX_REG 45 +#define FP_ARG_V4_MAX_REG 40 +#define FP_ARG_MAX_REG (DEFAULT_ABI == ABI_V4 \ + ? FP_ARG_V4_MAX_REG : FP_ARG_AIX_MAX_REG) +#define FP_ARG_NUM_REG (FP_ARG_MAX_REG - FP_ARG_MIN_REG + 1) + +/* Minimum and maximum AltiVec registers used to hold arguments. */ +#define ALTIVEC_ARG_MIN_REG (FIRST_ALTIVEC_REGNO + 2) +#define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11) +#define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1) + +/* Maximum number of registers per ELFv2 homogeneous aggregate argument. */ +#define AGGR_ARG_NUM_REG 8 + +/* Return registers */ +#define GP_ARG_RETURN GP_ARG_MIN_REG +#define FP_ARG_RETURN FP_ARG_MIN_REG +#define ALTIVEC_ARG_RETURN (FIRST_ALTIVEC_REGNO + 2) +#define FP_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? FP_ARG_RETURN \ + : (FP_ARG_RETURN + AGGR_ARG_NUM_REG - 1)) +#define ALTIVEC_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 \ + ? (ALTIVEC_ARG_RETURN \ + + (TARGET_FLOAT128_TYPE ? 1 : 0)) \ + : (ALTIVEC_ARG_RETURN + AGGR_ARG_NUM_REG - 1)) + +/* Flags for the call/call_value rtl operations set up by function_arg */ +#define CALL_NORMAL 0x00000000 /* no special processing */ +/* Bits in 0x00000001 are unused. */ +#define CALL_V4_CLEAR_FP_ARGS 0x00000002 /* V.4, no FP args passed */ +#define CALL_V4_SET_FP_ARGS 0x00000004 /* V.4, FP args were passed */ +#define CALL_LONG 0x00000008 /* always call indirect */ +#define CALL_LIBCALL 0x00000010 /* libcall */ + +/* We don't have prologue and epilogue functions to save/restore + everything for most ABIs. */ +#define WORLD_SAVE_P(INFO) 0 + +/* 1 if N is a possible register number for a function value + as seen by the caller. + + On RS/6000, this is r3, fp1, and v2 (for AltiVec). */ +#define FUNCTION_VALUE_REGNO_P(N) \ + ((N) == GP_ARG_RETURN \ + || (IN_RANGE ((N), FP_ARG_RETURN, FP_ARG_MAX_RETURN) \ + && TARGET_HARD_FLOAT && TARGET_FPRS) \ + || (IN_RANGE ((N), ALTIVEC_ARG_RETURN, ALTIVEC_ARG_MAX_RETURN) \ + && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)) + +/* 1 if N is a possible register number for function argument passing. + On RS/6000, these are r3-r10 and fp1-fp13. + On AltiVec, v2 - v13 are used for passing vectors. */ +#define FUNCTION_ARG_REGNO_P(N) \ + (IN_RANGE ((N), GP_ARG_MIN_REG, GP_ARG_MAX_REG) \ + || (IN_RANGE ((N), ALTIVEC_ARG_MIN_REG, ALTIVEC_ARG_MAX_REG) \ + && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI) \ + || (IN_RANGE ((N), FP_ARG_MIN_REG, FP_ARG_MAX_REG) \ + && TARGET_HARD_FLOAT && TARGET_FPRS)) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On the RS/6000, this is a structure. The first element is the number of + total argument words, the second is used to store the next + floating-point register number, and the third says how many more args we + have prototype types for. + + For ABI_V4, we treat these slightly differently -- `sysv_gregno' is + the next available GP register, `fregno' is the next available FP + register, and `words' is the number of words used on the stack. + + The varargs/stdarg support requires that this structure's size + be a multiple of sizeof(int). */ + +typedef struct rs6000_args +{ + int words; /* # words used for passing GP registers */ + int fregno; /* next available FP register */ + int vregno; /* next available AltiVec register */ + int nargs_prototype; /* # args left in the current prototype */ + int prototype; /* Whether a prototype was defined */ + int stdarg; /* Whether function is a stdarg function. */ + int call_cookie; /* Do special things for this call */ + int sysv_gregno; /* next available GP register */ + int intoffset; /* running offset in struct (darwin64) */ + int use_stack; /* any part of struct on stack (darwin64) */ + int floats_in_gpr; /* count of SFmode floats taking up + GPR space (darwin64) */ + int named; /* false for varargs params */ + int escapes; /* if function visible outside tu */ + int libcall; /* If this is a compiler generated call. */ +} CUMULATIVE_ARGS; + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + init_cumulative_args (&CUM, FNTYPE, LIBNAME, FALSE, FALSE, \ + N_NAMED_ARGS, FNDECL, VOIDmode) + +/* Similar, but when scanning the definition of a procedure. We always + set NARGS_PROTOTYPE large so we never return an EXPR_LIST. */ + +#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \ + init_cumulative_args (&CUM, FNTYPE, LIBNAME, TRUE, FALSE, \ + 1000, current_function_decl, VOIDmode) + +/* Like INIT_CUMULATIVE_ARGS' but only used for outgoing libcalls. */ + +#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \ + init_cumulative_args (&CUM, NULL_TREE, LIBNAME, FALSE, TRUE, \ + 0, NULL_TREE, MODE) + +/* If defined, a C expression which determines whether, and in which + direction, to pad out an argument with extra space. The value + should be of type `enum direction': either `upward' to pad above + the argument, `downward' to pad below, or `none' to inhibit + padding. */ + +#define FUNCTION_ARG_PADDING(MODE, TYPE) function_arg_padding (MODE, TYPE) + +#define PAD_VARARGS_DOWN \ + (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward) + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + output_function_profiler ((FILE), (LABELNO)); + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. No definition is equivalent to + always zero. + + On the RS/6000, this is nonzero because we can restore the stack from + its backpointer, which we maintain. */ +#define EXIT_IGNORE_STACK 1 + +/* Define this macro as a C expression that is nonzero for registers + that are used by the epilogue or the return' pattern. The stack + and frame pointer registers are already be assumed to be used as + needed. */ + +#define EPILOGUE_USES(REGNO) \ + ((reload_completed && (REGNO) == LR_REGNO) \ + || (TARGET_ALTIVEC && (REGNO) == VRSAVE_REGNO) \ + || (crtl->calls_eh_return \ + && TARGET_AIX \ + && (REGNO) == 2)) + + +/* Length in units of the trampoline for entering a nested function. */ + +#define TRAMPOLINE_SIZE rs6000_trampoline_size () + +/* Definitions for __builtin_return_address and __builtin_frame_address. + __builtin_return_address (0) should give link register (LR_REGNO), enable + this. */ +/* This should be uncommented, so that the link register is used, but + currently this would result in unmatched insns and spilling fixed + registers so we'll leave it for another day. When these problems are + taken care of one additional fetch will be necessary in RETURN_ADDR_RTX. + (mrs) */ +/* #define RETURN_ADDR_IN_PREVIOUS_FRAME */ + +/* Number of bytes into the frame return addresses can be found. See + rs6000_stack_info in powerpcspe.c for more information on how the different + abi's store the return address. */ +#define RETURN_ADDRESS_OFFSET \ + ((DEFAULT_ABI == ABI_V4 ? 4 : 8) << (TARGET_64BIT ? 1 : 0)) + +/* The current return address is in link register (65). The return address + of anything farther back is accessed normally at an offset of 8 from the + frame pointer. */ +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + (rs6000_return_addr (COUNT, FRAME)) + + +/* Definitions for register eliminations. + + We have two registers that can be eliminated on the RS/6000. First, the + frame pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the argument pointer register can always be + eliminated; it is replaced with either the stack or frame pointer. + + In addition, we use the elimination mechanism to see if r30 is needed + Initially we assume that it isn't. If it is, we spill it. This is done + by making it an eliminable register. We replace it with itself so that + if it isn't needed, then existing uses won't be modified. */ + +/* This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. */ +#define ELIMINABLE_REGS \ +{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { RS6000_PIC_OFFSET_TABLE_REGNUM, RS6000_PIC_OFFSET_TABLE_REGNUM } } + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = rs6000_initial_elimination_offset(FROM, TO)) + +/* Addressing modes, and classification of registers for them. */ + +#define HAVE_PRE_DECREMENT 1 +#define HAVE_PRE_INCREMENT 1 +#define HAVE_PRE_MODIFY_DISP 1 +#define HAVE_PRE_MODIFY_REG 1 + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in reginfo.c during register + allocation. */ + +#define REGNO_OK_FOR_INDEX_P(REGNO) \ +((REGNO) < FIRST_PSEUDO_REGISTER \ + ? (REGNO) <= 31 || (REGNO) == 67 \ + || (REGNO) == FRAME_POINTER_REGNUM \ + : (reg_renumber[REGNO] >= 0 \ + && (reg_renumber[REGNO] <= 31 || reg_renumber[REGNO] == 67 \ + || reg_renumber[REGNO] == FRAME_POINTER_REGNUM))) + +#define REGNO_OK_FOR_BASE_P(REGNO) \ +((REGNO) < FIRST_PSEUDO_REGISTER \ + ? ((REGNO) > 0 && (REGNO) <= 31) || (REGNO) == 67 \ + || (REGNO) == FRAME_POINTER_REGNUM \ + : (reg_renumber[REGNO] > 0 \ + && (reg_renumber[REGNO] <= 31 || reg_renumber[REGNO] == 67 \ + || reg_renumber[REGNO] == FRAME_POINTER_REGNUM))) + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg in the non-strict case. */ +#define INT_REG_OK_FOR_INDEX_P(X, STRICT) \ + ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER) \ + || REGNO_OK_FOR_INDEX_P (REGNO (X))) + +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg in the non-strict case. */ +#define INT_REG_OK_FOR_BASE_P(X, STRICT) \ + ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER) \ + || REGNO_OK_FOR_BASE_P (REGNO (X))) + + +/* Maximum number of registers that can appear in a valid memory address. */ + +#define MAX_REGS_PER_ADDRESS 2 + +/* Recognize any constant value that is a valid address. */ + +#define CONSTANT_ADDRESS_P(X) \ + (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST \ + || GET_CODE (X) == HIGH) + +#define EASY_VECTOR_15(n) ((n) >= -16 && (n) <= 15) +#define EASY_VECTOR_15_ADD_SELF(n) (!EASY_VECTOR_15((n)) \ + && EASY_VECTOR_15((n) >> 1) \ + && ((n) & 1) == 0) + +#define EASY_VECTOR_MSB(n,mode) \ + ((((unsigned HOST_WIDE_INT) (n)) & GET_MODE_MASK (mode)) == \ + ((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1)) + + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. + + Implemented on rs6000 by rs6000_legitimize_reload_address. + Note that (X) is evaluated twice; this is safe in current usage. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \ +do { \ + int win; \ + (X) = rs6000_legitimize_reload_address_ptr ((X), (MODE), (OPNUM), \ + (int)(TYPE), (IND_LEVELS), &win); \ + if ( win ) \ + goto WIN; \ +} while (0) + +#define FIND_BASE_TERM rs6000_find_base_term + +/* The register number of the register used to address a table of + static data addresses in memory. In some cases this register is + defined by a processor's "application binary interface" (ABI). + When this macro is defined, RTL is generated for this register + once, as with the stack pointer and frame pointer registers. If + this macro is not defined, it is up to the machine-dependent files + to allocate such a register (if necessary). */ + +#define RS6000_PIC_OFFSET_TABLE_REGNUM 30 +#define PIC_OFFSET_TABLE_REGNUM \ + (TARGET_TOC ? TOC_REGISTER \ + : flag_pic ? RS6000_PIC_OFFSET_TABLE_REGNUM \ + : INVALID_REGNUM) + +#define TOC_REGISTER (TARGET_MINIMAL_TOC ? RS6000_PIC_OFFSET_TABLE_REGNUM : 2) + +/* Define this macro if the register defined by + `PIC_OFFSET_TABLE_REGNUM' is clobbered by calls. Do not define + this macro if `PIC_OFFSET_TABLE_REGNUM' is not defined. */ + +/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */ + +/* A C expression that is nonzero if X is a legitimate immediate + operand on the target machine when generating position independent + code. You can assume that X satisfies `CONSTANT_P', so you need + not check this. You can also assume FLAG_PIC is true, so you need + not check it either. You need not define this macro if all + constants (including `SYMBOL_REF') can be immediate operands when + generating position independent code. */ + +/* #define LEGITIMATE_PIC_OPERAND_P (X) */ + +/* Define this if some processing needs to be done immediately before + emitting code for an insn. */ + +#define FINAL_PRESCAN_INSN(INSN,OPERANDS,NOPERANDS) \ + rs6000_final_prescan_insn (INSN, OPERANDS, NOPERANDS) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE SImode + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + Do not define this if the table should contain absolute addresses. */ +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 0 + +/* An integer expression for the size in bits of the largest integer machine + mode that should actually be used. */ + +/* Allow pairs of registers to be used, which is the intent of the default. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_POWERPC64 ? TImode : DImode) + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX (! TARGET_POWERPC64 ? 4 : 8) +#define MAX_MOVE_MAX 8 + +/* Nonzero if access to memory by bytes is no faster than for words. + Also nonzero if doing byte operations (specifically shifts) in registers + is undesirable. */ +#define SLOW_BYTE_ACCESS 1 + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* Define if loading short immediate values into registers sign extends. */ +#define SHORT_IMMEDIATES_SIGN_EXTEND 1 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_BITSIZE (MODE), 2) + +/* The CTZ patterns that are implemented in terms of CLZ return -1 for input of + zero. The hardware instructions added in Power9 and the sequences using + popcount return 32 or 64. */ +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + (TARGET_CTZ || TARGET_POPCNTD \ + ? ((VALUE) = GET_MODE_BITSIZE (MODE), 2) \ + : ((VALUE) = -1, 2)) + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +extern unsigned rs6000_pmode; +#define Pmode ((machine_mode)rs6000_pmode) + +/* Supply definition of STACK_SIZE_MODE for allocate_dynamic_stack_space. */ +#define STACK_SIZE_MODE (TARGET_32BIT ? SImode : DImode) + +/* Mode of a function address in a call instruction (for indexing purposes). + Doesn't matter on RS/6000. */ +#define FUNCTION_MODE SImode + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. */ +#define NO_FUNCTION_CSE 1 + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. + + The sle and sre instructions which allow SHIFT_COUNT_TRUNCATED + have been dropped from the PowerPC architecture. */ +#define SHIFT_COUNT_TRUNCATED 0 + +/* Adjust the length of an INSN. LENGTH is the currently-computed length and + should be adjusted to reflect any required changes. This macro is used when + there is some systematic length adjustment required that would be difficult + to express in the length attribute. */ + +/* #define ADJUST_INSN_LENGTH(X,LENGTH) */ + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a + COMPARE, return the mode to be used for the comparison. For + floating-point, CCFPmode should be used. CCUNSmode should be used + for unsigned comparisons. CCEQmode should be used when we are + doing an inequality comparison on the result of a + comparison. CCmode should be used in all other cases. */ + +#define SELECT_CC_MODE(OP,X,Y) \ + (SCALAR_FLOAT_MODE_P (GET_MODE (X)) ? CCFPmode \ + : (OP) == GTU || (OP) == LTU || (OP) == GEU || (OP) == LEU ? CCUNSmode \ + : (((OP) == EQ || (OP) == NE) && COMPARISON_P (X) \ + ? CCEQmode : CCmode)) + +/* Can the condition code MODE be safely reversed? This is safe in + all cases on this port, because at present it doesn't use the + trapping FP comparisons (fcmpo). */ +#define REVERSIBLE_CC_MODE(MODE) 1 + +/* Given a condition code and a mode, return the inverse condition. */ +#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) + + +/* Control the assembler format that we output. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at + the end of the line. */ +#define ASM_COMMENT_START " #" + +/* Flag to say the TOC is initialized */ +extern int toc_initialized; + +/* Macro to output a special constant pool entry. Go to WIN if we output + it. Otherwise, it is written the usual way. + + On the RS/6000, toc entries are handled this way. */ + +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, WIN) \ +{ if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (X, MODE)) \ + { \ + output_toc (FILE, X, LABELNO, MODE); \ + goto WIN; \ + } \ +} + +#ifdef HAVE_GAS_WEAK +#define RS6000_WEAK 1 +#else +#define RS6000_WEAK 0 +#endif + +#if RS6000_WEAK +/* Used in lieu of ASM_WEAKEN_LABEL. */ +#define ASM_WEAKEN_DECL(FILE, DECL, NAME, VAL) \ + rs6000_asm_weaken_decl ((FILE), (DECL), (NAME), (VAL)) +#endif + +#if HAVE_GAS_WEAKREF +#define ASM_OUTPUT_WEAKREF(FILE, DECL, NAME, VALUE) \ + do \ + { \ + fputs ("\t.weakref\t", (FILE)); \ + RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \ + fputs (", ", (FILE)); \ + RS6000_OUTPUT_BASENAME ((FILE), (VALUE)); \ + if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL \ + && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) \ + { \ + fputs ("\n\t.weakref\t.", (FILE)); \ + RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \ + fputs (", .", (FILE)); \ + RS6000_OUTPUT_BASENAME ((FILE), (VALUE)); \ + } \ + fputc ('\n', (FILE)); \ + } while (0) +#endif + +/* This implements the `alias' attribute. */ +#undef ASM_OUTPUT_DEF_FROM_DECLS +#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \ + do \ + { \ + const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + const char *name = IDENTIFIER_POINTER (TARGET); \ + if (TREE_CODE (DECL) == FUNCTION_DECL \ + && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) \ + { \ + if (TREE_PUBLIC (DECL)) \ + { \ + if (!RS6000_WEAK || !DECL_WEAK (DECL)) \ + { \ + fputs ("\t.globl\t.", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, alias); \ + putc ('\n', FILE); \ + } \ + } \ + else if (TARGET_XCOFF) \ + { \ + if (!RS6000_WEAK || !DECL_WEAK (DECL)) \ + { \ + fputs ("\t.lglobl\t.", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, alias); \ + putc ('\n', FILE); \ + fputs ("\t.lglobl\t", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, alias); \ + putc ('\n', FILE); \ + } \ + } \ + fputs ("\t.set\t.", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, alias); \ + fputs (",.", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, name); \ + fputc ('\n', FILE); \ + } \ + ASM_OUTPUT_DEF (FILE, alias, name); \ + } \ + while (0) + +#define TARGET_ASM_FILE_START rs6000_file_start + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ + +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ + +#define ASM_APP_OFF "" + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */ + +#define REGISTER_NAMES \ +{ \ + &rs6000_reg_names[ 0][0], /* r0 */ \ + &rs6000_reg_names[ 1][0], /* r1 */ \ + &rs6000_reg_names[ 2][0], /* r2 */ \ + &rs6000_reg_names[ 3][0], /* r3 */ \ + &rs6000_reg_names[ 4][0], /* r4 */ \ + &rs6000_reg_names[ 5][0], /* r5 */ \ + &rs6000_reg_names[ 6][0], /* r6 */ \ + &rs6000_reg_names[ 7][0], /* r7 */ \ + &rs6000_reg_names[ 8][0], /* r8 */ \ + &rs6000_reg_names[ 9][0], /* r9 */ \ + &rs6000_reg_names[10][0], /* r10 */ \ + &rs6000_reg_names[11][0], /* r11 */ \ + &rs6000_reg_names[12][0], /* r12 */ \ + &rs6000_reg_names[13][0], /* r13 */ \ + &rs6000_reg_names[14][0], /* r14 */ \ + &rs6000_reg_names[15][0], /* r15 */ \ + &rs6000_reg_names[16][0], /* r16 */ \ + &rs6000_reg_names[17][0], /* r17 */ \ + &rs6000_reg_names[18][0], /* r18 */ \ + &rs6000_reg_names[19][0], /* r19 */ \ + &rs6000_reg_names[20][0], /* r20 */ \ + &rs6000_reg_names[21][0], /* r21 */ \ + &rs6000_reg_names[22][0], /* r22 */ \ + &rs6000_reg_names[23][0], /* r23 */ \ + &rs6000_reg_names[24][0], /* r24 */ \ + &rs6000_reg_names[25][0], /* r25 */ \ + &rs6000_reg_names[26][0], /* r26 */ \ + &rs6000_reg_names[27][0], /* r27 */ \ + &rs6000_reg_names[28][0], /* r28 */ \ + &rs6000_reg_names[29][0], /* r29 */ \ + &rs6000_reg_names[30][0], /* r30 */ \ + &rs6000_reg_names[31][0], /* r31 */ \ + \ + &rs6000_reg_names[32][0], /* fr0 */ \ + &rs6000_reg_names[33][0], /* fr1 */ \ + &rs6000_reg_names[34][0], /* fr2 */ \ + &rs6000_reg_names[35][0], /* fr3 */ \ + &rs6000_reg_names[36][0], /* fr4 */ \ + &rs6000_reg_names[37][0], /* fr5 */ \ + &rs6000_reg_names[38][0], /* fr6 */ \ + &rs6000_reg_names[39][0], /* fr7 */ \ + &rs6000_reg_names[40][0], /* fr8 */ \ + &rs6000_reg_names[41][0], /* fr9 */ \ + &rs6000_reg_names[42][0], /* fr10 */ \ + &rs6000_reg_names[43][0], /* fr11 */ \ + &rs6000_reg_names[44][0], /* fr12 */ \ + &rs6000_reg_names[45][0], /* fr13 */ \ + &rs6000_reg_names[46][0], /* fr14 */ \ + &rs6000_reg_names[47][0], /* fr15 */ \ + &rs6000_reg_names[48][0], /* fr16 */ \ + &rs6000_reg_names[49][0], /* fr17 */ \ + &rs6000_reg_names[50][0], /* fr18 */ \ + &rs6000_reg_names[51][0], /* fr19 */ \ + &rs6000_reg_names[52][0], /* fr20 */ \ + &rs6000_reg_names[53][0], /* fr21 */ \ + &rs6000_reg_names[54][0], /* fr22 */ \ + &rs6000_reg_names[55][0], /* fr23 */ \ + &rs6000_reg_names[56][0], /* fr24 */ \ + &rs6000_reg_names[57][0], /* fr25 */ \ + &rs6000_reg_names[58][0], /* fr26 */ \ + &rs6000_reg_names[59][0], /* fr27 */ \ + &rs6000_reg_names[60][0], /* fr28 */ \ + &rs6000_reg_names[61][0], /* fr29 */ \ + &rs6000_reg_names[62][0], /* fr30 */ \ + &rs6000_reg_names[63][0], /* fr31 */ \ + \ + &rs6000_reg_names[64][0], /* was mq */ \ + &rs6000_reg_names[65][0], /* lr */ \ + &rs6000_reg_names[66][0], /* ctr */ \ + &rs6000_reg_names[67][0], /* ap */ \ + \ + &rs6000_reg_names[68][0], /* cr0 */ \ + &rs6000_reg_names[69][0], /* cr1 */ \ + &rs6000_reg_names[70][0], /* cr2 */ \ + &rs6000_reg_names[71][0], /* cr3 */ \ + &rs6000_reg_names[72][0], /* cr4 */ \ + &rs6000_reg_names[73][0], /* cr5 */ \ + &rs6000_reg_names[74][0], /* cr6 */ \ + &rs6000_reg_names[75][0], /* cr7 */ \ + \ + &rs6000_reg_names[76][0], /* ca */ \ + \ + &rs6000_reg_names[77][0], /* v0 */ \ + &rs6000_reg_names[78][0], /* v1 */ \ + &rs6000_reg_names[79][0], /* v2 */ \ + &rs6000_reg_names[80][0], /* v3 */ \ + &rs6000_reg_names[81][0], /* v4 */ \ + &rs6000_reg_names[82][0], /* v5 */ \ + &rs6000_reg_names[83][0], /* v6 */ \ + &rs6000_reg_names[84][0], /* v7 */ \ + &rs6000_reg_names[85][0], /* v8 */ \ + &rs6000_reg_names[86][0], /* v9 */ \ + &rs6000_reg_names[87][0], /* v10 */ \ + &rs6000_reg_names[88][0], /* v11 */ \ + &rs6000_reg_names[89][0], /* v12 */ \ + &rs6000_reg_names[90][0], /* v13 */ \ + &rs6000_reg_names[91][0], /* v14 */ \ + &rs6000_reg_names[92][0], /* v15 */ \ + &rs6000_reg_names[93][0], /* v16 */ \ + &rs6000_reg_names[94][0], /* v17 */ \ + &rs6000_reg_names[95][0], /* v18 */ \ + &rs6000_reg_names[96][0], /* v19 */ \ + &rs6000_reg_names[97][0], /* v20 */ \ + &rs6000_reg_names[98][0], /* v21 */ \ + &rs6000_reg_names[99][0], /* v22 */ \ + &rs6000_reg_names[100][0], /* v23 */ \ + &rs6000_reg_names[101][0], /* v24 */ \ + &rs6000_reg_names[102][0], /* v25 */ \ + &rs6000_reg_names[103][0], /* v26 */ \ + &rs6000_reg_names[104][0], /* v27 */ \ + &rs6000_reg_names[105][0], /* v28 */ \ + &rs6000_reg_names[106][0], /* v29 */ \ + &rs6000_reg_names[107][0], /* v30 */ \ + &rs6000_reg_names[108][0], /* v31 */ \ + &rs6000_reg_names[109][0], /* vrsave */ \ + &rs6000_reg_names[110][0], /* vscr */ \ + &rs6000_reg_names[111][0], /* spe_acc */ \ + &rs6000_reg_names[112][0], /* spefscr */ \ + &rs6000_reg_names[113][0], /* sfp */ \ + &rs6000_reg_names[114][0], /* tfhar */ \ + &rs6000_reg_names[115][0], /* tfiar */ \ + &rs6000_reg_names[116][0], /* texasr */ \ + \ + &rs6000_reg_names[117][0], /* SPE rh0. */ \ + &rs6000_reg_names[118][0], /* SPE rh1. */ \ + &rs6000_reg_names[119][0], /* SPE rh2. */ \ + &rs6000_reg_names[120][0], /* SPE rh3. */ \ + &rs6000_reg_names[121][0], /* SPE rh4. */ \ + &rs6000_reg_names[122][0], /* SPE rh5. */ \ + &rs6000_reg_names[123][0], /* SPE rh6. */ \ + &rs6000_reg_names[124][0], /* SPE rh7. */ \ + &rs6000_reg_names[125][0], /* SPE rh8. */ \ + &rs6000_reg_names[126][0], /* SPE rh9. */ \ + &rs6000_reg_names[127][0], /* SPE rh10. */ \ + &rs6000_reg_names[128][0], /* SPE rh11. */ \ + &rs6000_reg_names[129][0], /* SPE rh12. */ \ + &rs6000_reg_names[130][0], /* SPE rh13. */ \ + &rs6000_reg_names[131][0], /* SPE rh14. */ \ + &rs6000_reg_names[132][0], /* SPE rh15. */ \ + &rs6000_reg_names[133][0], /* SPE rh16. */ \ + &rs6000_reg_names[134][0], /* SPE rh17. */ \ + &rs6000_reg_names[135][0], /* SPE rh18. */ \ + &rs6000_reg_names[136][0], /* SPE rh19. */ \ + &rs6000_reg_names[137][0], /* SPE rh20. */ \ + &rs6000_reg_names[138][0], /* SPE rh21. */ \ + &rs6000_reg_names[139][0], /* SPE rh22. */ \ + &rs6000_reg_names[140][0], /* SPE rh22. */ \ + &rs6000_reg_names[141][0], /* SPE rh24. */ \ + &rs6000_reg_names[142][0], /* SPE rh25. */ \ + &rs6000_reg_names[143][0], /* SPE rh26. */ \ + &rs6000_reg_names[144][0], /* SPE rh27. */ \ + &rs6000_reg_names[145][0], /* SPE rh28. */ \ + &rs6000_reg_names[146][0], /* SPE rh29. */ \ + &rs6000_reg_names[147][0], /* SPE rh30. */ \ + &rs6000_reg_names[148][0], /* SPE rh31. */ \ +} + +/* Table of additional register names to use in user input. */ + +#define ADDITIONAL_REGISTER_NAMES \ + {{"r0", 0}, {"r1", 1}, {"r2", 2}, {"r3", 3}, \ + {"r4", 4}, {"r5", 5}, {"r6", 6}, {"r7", 7}, \ + {"r8", 8}, {"r9", 9}, {"r10", 10}, {"r11", 11}, \ + {"r12", 12}, {"r13", 13}, {"r14", 14}, {"r15", 15}, \ + {"r16", 16}, {"r17", 17}, {"r18", 18}, {"r19", 19}, \ + {"r20", 20}, {"r21", 21}, {"r22", 22}, {"r23", 23}, \ + {"r24", 24}, {"r25", 25}, {"r26", 26}, {"r27", 27}, \ + {"r28", 28}, {"r29", 29}, {"r30", 30}, {"r31", 31}, \ + {"fr0", 32}, {"fr1", 33}, {"fr2", 34}, {"fr3", 35}, \ + {"fr4", 36}, {"fr5", 37}, {"fr6", 38}, {"fr7", 39}, \ + {"fr8", 40}, {"fr9", 41}, {"fr10", 42}, {"fr11", 43}, \ + {"fr12", 44}, {"fr13", 45}, {"fr14", 46}, {"fr15", 47}, \ + {"fr16", 48}, {"fr17", 49}, {"fr18", 50}, {"fr19", 51}, \ + {"fr20", 52}, {"fr21", 53}, {"fr22", 54}, {"fr23", 55}, \ + {"fr24", 56}, {"fr25", 57}, {"fr26", 58}, {"fr27", 59}, \ + {"fr28", 60}, {"fr29", 61}, {"fr30", 62}, {"fr31", 63}, \ + {"v0", 77}, {"v1", 78}, {"v2", 79}, {"v3", 80}, \ + {"v4", 81}, {"v5", 82}, {"v6", 83}, {"v7", 84}, \ + {"v8", 85}, {"v9", 86}, {"v10", 87}, {"v11", 88}, \ + {"v12", 89}, {"v13", 90}, {"v14", 91}, {"v15", 92}, \ + {"v16", 93}, {"v17", 94}, {"v18", 95}, {"v19", 96}, \ + {"v20", 97}, {"v21", 98}, {"v22", 99}, {"v23", 100}, \ + {"v24", 101},{"v25", 102},{"v26", 103},{"v27", 104}, \ + {"v28", 105},{"v29", 106},{"v30", 107},{"v31", 108}, \ + {"vrsave", 109}, {"vscr", 110}, \ + {"spe_acc", 111}, {"spefscr", 112}, \ + /* no additional names for: lr, ctr, ap */ \ + {"cr0", 68}, {"cr1", 69}, {"cr2", 70}, {"cr3", 71}, \ + {"cr4", 72}, {"cr5", 73}, {"cr6", 74}, {"cr7", 75}, \ + {"cc", 68}, {"sp", 1}, {"toc", 2}, \ + /* CA is only part of XER, but we do not model the other parts (yet). */ \ + {"xer", 76}, \ + /* VSX registers overlaid on top of FR, Altivec registers */ \ + {"vs0", 32}, {"vs1", 33}, {"vs2", 34}, {"vs3", 35}, \ + {"vs4", 36}, {"vs5", 37}, {"vs6", 38}, {"vs7", 39}, \ + {"vs8", 40}, {"vs9", 41}, {"vs10", 42}, {"vs11", 43}, \ + {"vs12", 44}, {"vs13", 45}, {"vs14", 46}, {"vs15", 47}, \ + {"vs16", 48}, {"vs17", 49}, {"vs18", 50}, {"vs19", 51}, \ + {"vs20", 52}, {"vs21", 53}, {"vs22", 54}, {"vs23", 55}, \ + {"vs24", 56}, {"vs25", 57}, {"vs26", 58}, {"vs27", 59}, \ + {"vs28", 60}, {"vs29", 61}, {"vs30", 62}, {"vs31", 63}, \ + {"vs32", 77}, {"vs33", 78}, {"vs34", 79}, {"vs35", 80}, \ + {"vs36", 81}, {"vs37", 82}, {"vs38", 83}, {"vs39", 84}, \ + {"vs40", 85}, {"vs41", 86}, {"vs42", 87}, {"vs43", 88}, \ + {"vs44", 89}, {"vs45", 90}, {"vs46", 91}, {"vs47", 92}, \ + {"vs48", 93}, {"vs49", 94}, {"vs50", 95}, {"vs51", 96}, \ + {"vs52", 97}, {"vs53", 98}, {"vs54", 99}, {"vs55", 100}, \ + {"vs56", 101},{"vs57", 102},{"vs58", 103},{"vs59", 104}, \ + {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108}, \ + /* Transactional Memory Facility (HTM) Registers. */ \ + {"tfhar", 114}, {"tfiar", 115}, {"texasr", 116}, \ + /* SPE high registers. */ \ + {"rh0", 117}, {"rh1", 118}, {"rh2", 119}, {"rh3", 120}, \ + {"rh4", 121}, {"rh5", 122}, {"rh6", 123}, {"rh7", 124}, \ + {"rh8", 125}, {"rh9", 126}, {"rh10", 127}, {"rh11", 128}, \ + {"rh12", 129}, {"rh13", 130}, {"rh14", 131}, {"rh15", 132}, \ + {"rh16", 133}, {"rh17", 134}, {"rh18", 135}, {"rh19", 136}, \ + {"rh20", 137}, {"rh21", 138}, {"rh22", 139}, {"rh23", 140}, \ + {"rh24", 141}, {"rh25", 142}, {"rh26", 143}, {"rh27", 144}, \ + {"rh28", 145}, {"rh29", 146}, {"rh30", 147}, {"rh31", 148}, \ +} + +/* This is how to output an element of a case-vector that is relative. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + do { char buf[100]; \ + fputs ("\t.long ", FILE); \ + ASM_GENERATE_INTERNAL_LABEL (buf, "L", VALUE); \ + assemble_name (FILE, buf); \ + putc ('-', FILE); \ + ASM_GENERATE_INTERNAL_LABEL (buf, "L", REL); \ + assemble_name (FILE, buf); \ + putc ('\n', FILE); \ + } while (0) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d\n", (LOG)) + +/* How to align the given loop. */ +#define LOOP_ALIGN(LABEL) rs6000_loop_align(LABEL) + +/* Alignment guaranteed by __builtin_malloc. */ +/* FIXME: 128-bit alignment is guaranteed by glibc for TARGET_64BIT. + However, specifying the stronger guarantee currently leads to + a regression in SPEC CPU2006 437.leslie3d. The stronger + guarantee should be implemented here once that's fixed. */ +#define MALLOC_ABI_ALIGNMENT (64) + +/* Pick up the return address upon entry to a procedure. Used for + dwarf2 unwind information. This also enables the table driven + mechanism. */ + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNO) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LR_REGNO) + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 3 : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 10) + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) + +/* Define which CODE values are valid. */ + +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) ((CODE) == '&') + +/* Print a memory address as an operand to reference that memory location. */ + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR) + +/* For switching between functions with different target attributes. */ +#define SWITCHABLE_TARGET 1 + +/* uncomment for disabling the corresponding default options */ +/* #define MACHINE_no_sched_interblock */ +/* #define MACHINE_no_sched_speculative */ +/* #define MACHINE_no_sched_speculative_load */ + +/* General flags. */ +extern int frame_pointer_needed; + +/* Classification of the builtin functions as to which switches enable the + builtin, and what attributes it should have. We used to use the target + flags macros, but we've run out of bits, so we now map the options into new + settings used here. */ + +/* Builtin attributes. */ +#define RS6000_BTC_SPECIAL 0x00000000 /* Special function. */ +#define RS6000_BTC_UNARY 0x00000001 /* normal unary function. */ +#define RS6000_BTC_BINARY 0x00000002 /* normal binary function. */ +#define RS6000_BTC_TERNARY 0x00000003 /* normal ternary function. */ +#define RS6000_BTC_PREDICATE 0x00000004 /* predicate function. */ +#define RS6000_BTC_ABS 0x00000005 /* Altivec/VSX ABS function. */ +#define RS6000_BTC_EVSEL 0x00000006 /* SPE EVSEL function. */ +#define RS6000_BTC_DST 0x00000007 /* Altivec DST function. */ +#define RS6000_BTC_TYPE_MASK 0x0000000f /* Mask to isolate types */ + +#define RS6000_BTC_MISC 0x00000000 /* No special attributes. */ +#define RS6000_BTC_CONST 0x00000100 /* Neither uses, nor + modifies global state. */ +#define RS6000_BTC_PURE 0x00000200 /* reads global + state/mem and does + not modify global state. */ +#define RS6000_BTC_FP 0x00000400 /* depends on rounding mode. */ +#define RS6000_BTC_ATTR_MASK 0x00000700 /* Mask of the attributes. */ + +/* Miscellaneous information. */ +#define RS6000_BTC_SPR 0x01000000 /* function references SPRs. */ +#define RS6000_BTC_VOID 0x02000000 /* function has no return value. */ +#define RS6000_BTC_CR 0x04000000 /* function references a CR. */ +#define RS6000_BTC_OVERLOADED 0x08000000 /* function is overloaded. */ +#define RS6000_BTC_MISC_MASK 0x1f000000 /* Mask of the misc info. */ + +/* Convenience macros to document the instruction type. */ +#define RS6000_BTC_MEM RS6000_BTC_MISC /* load/store touches mem. */ +#define RS6000_BTC_SAT RS6000_BTC_MISC /* saturate sets VSCR. */ + +/* Builtin targets. For now, we reuse the masks for those options that are in + target flags, and pick three random bits for SPE, paired and ldbl128 which + aren't in target_flags. */ +#define RS6000_BTM_ALWAYS 0 /* Always enabled. */ +#define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */ +#define RS6000_BTM_CMPB MASK_CMPB /* ISA 2.05: compare bytes. */ +#define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */ +#define RS6000_BTM_P8_VECTOR MASK_P8_VECTOR /* ISA 2.07 vector. */ +#define RS6000_BTM_P9_VECTOR MASK_P9_VECTOR /* ISA 3.0 vector. */ +#define RS6000_BTM_P9_MISC MASK_P9_MISC /* ISA 3.0 misc. non-vector */ +#define RS6000_BTM_CRYPTO MASK_CRYPTO /* crypto funcs. */ +#define RS6000_BTM_HTM MASK_HTM /* hardware TM funcs. */ +#define RS6000_BTM_SPE MASK_STRING /* E500 */ +#define RS6000_BTM_PAIRED MASK_MULHW /* 750CL paired insns. */ +#define RS6000_BTM_FRE MASK_POPCNTB /* FRE instruction. */ +#define RS6000_BTM_FRES MASK_PPC_GFXOPT /* FRES instruction. */ +#define RS6000_BTM_FRSQRTE MASK_PPC_GFXOPT /* FRSQRTE instruction. */ +#define RS6000_BTM_FRSQRTES MASK_POPCNTB /* FRSQRTES instruction. */ +#define RS6000_BTM_POPCNTD MASK_POPCNTD /* Target supports ISA 2.06. */ +#define RS6000_BTM_CELL MASK_FPRND /* Target is cell powerpc. */ +#define RS6000_BTM_DFP MASK_DFP /* Decimal floating point. */ +#define RS6000_BTM_HARD_FLOAT MASK_SOFT_FLOAT /* Hardware floating point. */ +#define RS6000_BTM_LDBL128 MASK_MULTIPLE /* 128-bit long double. */ +#define RS6000_BTM_64BIT MASK_64BIT /* 64-bit addressing. */ +#define RS6000_BTM_FLOAT128 MASK_FLOAT128_TYPE /* IEEE 128-bit float. */ + +#define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \ + | RS6000_BTM_VSX \ + | RS6000_BTM_P8_VECTOR \ + | RS6000_BTM_P9_VECTOR \ + | RS6000_BTM_P9_MISC \ + | RS6000_BTM_MODULO \ + | RS6000_BTM_CRYPTO \ + | RS6000_BTM_FRE \ + | RS6000_BTM_FRES \ + | RS6000_BTM_FRSQRTE \ + | RS6000_BTM_FRSQRTES \ + | RS6000_BTM_HTM \ + | RS6000_BTM_POPCNTD \ + | RS6000_BTM_CELL \ + | RS6000_BTM_DFP \ + | RS6000_BTM_HARD_FLOAT \ + | RS6000_BTM_LDBL128 \ + | RS6000_BTM_FLOAT128) + +/* Define builtin enum index. */ + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) ENUM, +#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) ENUM, + +enum rs6000_builtins +{ +#include "powerpcspe-builtin.def" + + RS6000_BUILTIN_COUNT +}; + +#undef RS6000_BUILTIN_0 +#undef RS6000_BUILTIN_1 +#undef RS6000_BUILTIN_2 +#undef RS6000_BUILTIN_3 +#undef RS6000_BUILTIN_A +#undef RS6000_BUILTIN_D +#undef RS6000_BUILTIN_E +#undef RS6000_BUILTIN_H +#undef RS6000_BUILTIN_P +#undef RS6000_BUILTIN_Q +#undef RS6000_BUILTIN_S +#undef RS6000_BUILTIN_X + +enum rs6000_builtin_type_index +{ + RS6000_BTI_NOT_OPAQUE, + RS6000_BTI_opaque_V2SI, + RS6000_BTI_opaque_V2SF, + RS6000_BTI_opaque_p_V2SI, + RS6000_BTI_opaque_V4SI, + RS6000_BTI_V16QI, + RS6000_BTI_V1TI, + RS6000_BTI_V2SI, + RS6000_BTI_V2SF, + RS6000_BTI_V2DI, + RS6000_BTI_V2DF, + RS6000_BTI_V4HI, + RS6000_BTI_V4SI, + RS6000_BTI_V4SF, + RS6000_BTI_V8HI, + RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V2DI, + RS6000_BTI_bool_char, /* __bool char */ + RS6000_BTI_bool_short, /* __bool short */ + RS6000_BTI_bool_int, /* __bool int */ + RS6000_BTI_bool_long, /* __bool long */ + RS6000_BTI_pixel, /* __pixel */ + RS6000_BTI_bool_V16QI, /* __vector __bool char */ + RS6000_BTI_bool_V8HI, /* __vector __bool short */ + RS6000_BTI_bool_V4SI, /* __vector __bool int */ + RS6000_BTI_bool_V2DI, /* __vector __bool long */ + RS6000_BTI_pixel_V8HI, /* __vector __pixel */ + RS6000_BTI_long, /* long_integer_type_node */ + RS6000_BTI_unsigned_long, /* long_unsigned_type_node */ + RS6000_BTI_long_long, /* long_long_integer_type_node */ + RS6000_BTI_unsigned_long_long, /* long_long_unsigned_type_node */ + RS6000_BTI_INTQI, /* intQI_type_node */ + RS6000_BTI_UINTQI, /* unsigned_intQI_type_node */ + RS6000_BTI_INTHI, /* intHI_type_node */ + RS6000_BTI_UINTHI, /* unsigned_intHI_type_node */ + RS6000_BTI_INTSI, /* intSI_type_node */ + RS6000_BTI_UINTSI, /* unsigned_intSI_type_node */ + RS6000_BTI_INTDI, /* intDI_type_node */ + RS6000_BTI_UINTDI, /* unsigned_intDI_type_node */ + RS6000_BTI_INTTI, /* intTI_type_node */ + RS6000_BTI_UINTTI, /* unsigned_intTI_type_node */ + RS6000_BTI_float, /* float_type_node */ + RS6000_BTI_double, /* double_type_node */ + RS6000_BTI_long_double, /* long_double_type_node */ + RS6000_BTI_dfloat64, /* dfloat64_type_node */ + RS6000_BTI_dfloat128, /* dfloat128_type_node */ + RS6000_BTI_void, /* void_type_node */ + RS6000_BTI_ieee128_float, /* ieee 128-bit floating point */ + RS6000_BTI_ibm128_float, /* IBM 128-bit floating point */ + RS6000_BTI_const_str, /* pointer to const char * */ + RS6000_BTI_MAX +}; + + +#define opaque_V2SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_V2SI]) +#define opaque_V2SF_type_node (rs6000_builtin_types[RS6000_BTI_opaque_V2SF]) +#define opaque_p_V2SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_p_V2SI]) +#define opaque_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_V4SI]) +#define V16QI_type_node (rs6000_builtin_types[RS6000_BTI_V16QI]) +#define V1TI_type_node (rs6000_builtin_types[RS6000_BTI_V1TI]) +#define V2DI_type_node (rs6000_builtin_types[RS6000_BTI_V2DI]) +#define V2DF_type_node (rs6000_builtin_types[RS6000_BTI_V2DF]) +#define V2SI_type_node (rs6000_builtin_types[RS6000_BTI_V2SI]) +#define V2SF_type_node (rs6000_builtin_types[RS6000_BTI_V2SF]) +#define V4HI_type_node (rs6000_builtin_types[RS6000_BTI_V4HI]) +#define V4SI_type_node (rs6000_builtin_types[RS6000_BTI_V4SI]) +#define V4SF_type_node (rs6000_builtin_types[RS6000_BTI_V4SF]) +#define V8HI_type_node (rs6000_builtin_types[RS6000_BTI_V8HI]) +#define unsigned_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V16QI]) +#define unsigned_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V1TI]) +#define unsigned_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V8HI]) +#define unsigned_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V4SI]) +#define unsigned_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V2DI]) +#define bool_char_type_node (rs6000_builtin_types[RS6000_BTI_bool_char]) +#define bool_short_type_node (rs6000_builtin_types[RS6000_BTI_bool_short]) +#define bool_int_type_node (rs6000_builtin_types[RS6000_BTI_bool_int]) +#define bool_long_type_node (rs6000_builtin_types[RS6000_BTI_bool_long]) +#define pixel_type_node (rs6000_builtin_types[RS6000_BTI_pixel]) +#define bool_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V16QI]) +#define bool_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V8HI]) +#define bool_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V4SI]) +#define bool_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V2DI]) +#define pixel_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_pixel_V8HI]) + +#define long_long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long_long]) +#define long_long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long_long]) +#define long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long]) +#define long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long]) +#define intQI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTQI]) +#define uintQI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTQI]) +#define intHI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTHI]) +#define uintHI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTHI]) +#define intSI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTSI]) +#define uintSI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTSI]) +#define intDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTDI]) +#define uintDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTDI]) +#define intTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTTI]) +#define uintTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTTI]) +#define float_type_internal_node (rs6000_builtin_types[RS6000_BTI_float]) +#define double_type_internal_node (rs6000_builtin_types[RS6000_BTI_double]) +#define long_double_type_internal_node (rs6000_builtin_types[RS6000_BTI_long_double]) +#define dfloat64_type_internal_node (rs6000_builtin_types[RS6000_BTI_dfloat64]) +#define dfloat128_type_internal_node (rs6000_builtin_types[RS6000_BTI_dfloat128]) +#define void_type_internal_node (rs6000_builtin_types[RS6000_BTI_void]) +#define ieee128_float_type_node (rs6000_builtin_types[RS6000_BTI_ieee128_float]) +#define ibm128_float_type_node (rs6000_builtin_types[RS6000_BTI_ibm128_float]) +#define const_str_type_node (rs6000_builtin_types[RS6000_BTI_const_str]) + +extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX]; +extern GTY(()) tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT]; + +#define TARGET_SUPPORTS_WIDE_INT 1 + +#if (GCC_VERSION >= 3000) +#pragma GCC poison TARGET_FLOAT128 OPTION_MASK_FLOAT128 MASK_FLOAT128 +#endif diff --git a/gcc/config/powerpcspe/powerpcspe.md b/gcc/config/powerpcspe/powerpcspe.md new file mode 100644 index 000000000000..799d786edfed --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe.md @@ -0,0 +1,14770 @@ +;; Machine description for IBM RISC System 6000 (POWER) for GNU C compiler +;; Copyright (C) 1990-2017 Free Software Foundation, Inc. +;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; +;; REGNOS +;; + +(define_constants + [(FIRST_GPR_REGNO 0) + (STACK_POINTER_REGNUM 1) + (TOC_REGNUM 2) + (STATIC_CHAIN_REGNUM 11) + (HARD_FRAME_POINTER_REGNUM 31) + (LAST_GPR_REGNO 31) + (FIRST_FPR_REGNO 32) + (LAST_FPR_REGNO 63) + (LR_REGNO 65) + (CTR_REGNO 66) + (ARG_POINTER_REGNUM 67) + (CR0_REGNO 68) + (CR1_REGNO 69) + (CR2_REGNO 70) + (CR3_REGNO 71) + (CR4_REGNO 72) + (CR5_REGNO 73) + (CR6_REGNO 74) + (CR7_REGNO 75) + (MAX_CR_REGNO 75) + (CA_REGNO 76) + (FIRST_ALTIVEC_REGNO 77) + (LAST_ALTIVEC_REGNO 108) + (VRSAVE_REGNO 109) + (VSCR_REGNO 110) + (SPE_ACC_REGNO 111) + (SPEFSCR_REGNO 112) + (FRAME_POINTER_REGNUM 113) + (TFHAR_REGNO 114) + (TFIAR_REGNO 115) + (TEXASR_REGNO 116) + (FIRST_SPE_HIGH_REGNO 117) + (LAST_SPE_HIGH_REGNO 148) + ]) + +;; +;; UNSPEC usage +;; + +(define_c_enum "unspec" + [UNSPEC_FRSP ; frsp for POWER machines + UNSPEC_PROBE_STACK ; probe stack memory reference + UNSPEC_TOCPTR ; address of a word pointing to the TOC + UNSPEC_TOC ; address of the TOC (more-or-less) + UNSPEC_TOCSLOT ; offset from r1 of toc pointer save slot + UNSPEC_MOVSI_GOT + UNSPEC_MV_CR_OV ; move_from_CR_ov_bit + UNSPEC_FCTIWZ + UNSPEC_FRIM + UNSPEC_FRIN + UNSPEC_FRIP + UNSPEC_FRIZ + UNSPEC_XSRDPI + UNSPEC_LD_MPIC ; load_macho_picbase + UNSPEC_RELD_MPIC ; re-load_macho_picbase + UNSPEC_MPIC_CORRECT ; macho_correct_pic + UNSPEC_TLSGD + UNSPEC_TLSLD + UNSPEC_MOVESI_FROM_CR + UNSPEC_MOVESI_TO_CR + UNSPEC_TLSDTPREL + UNSPEC_TLSDTPRELHA + UNSPEC_TLSDTPRELLO + UNSPEC_TLSGOTDTPREL + UNSPEC_TLSTPREL + UNSPEC_TLSTPRELHA + UNSPEC_TLSTPRELLO + UNSPEC_TLSGOTTPREL + UNSPEC_TLSTLS + UNSPEC_FIX_TRUNC_TF ; fadd, rounding towards zero + UNSPEC_MV_CR_GT ; move_from_CR_gt_bit + UNSPEC_STFIWX + UNSPEC_POPCNTB + UNSPEC_FRES + UNSPEC_SP_SET + UNSPEC_SP_TEST + UNSPEC_SYNC + UNSPEC_LWSYNC + UNSPEC_SYNC_OP + UNSPEC_ATOMIC + UNSPEC_CMPXCHG + UNSPEC_XCHG + UNSPEC_AND + UNSPEC_DLMZB + UNSPEC_DLMZB_CR + UNSPEC_DLMZB_STRLEN + UNSPEC_RSQRT + UNSPEC_TOCREL + UNSPEC_MACHOPIC_OFFSET + UNSPEC_BPERM + UNSPEC_COPYSIGN + UNSPEC_PARITY + UNSPEC_CMPB + UNSPEC_FCTIW + UNSPEC_FCTID + UNSPEC_LFIWAX + UNSPEC_LFIWZX + UNSPEC_FCTIWUZ + UNSPEC_NOP + UNSPEC_GRP_END_NOP + UNSPEC_P8V_FMRGOW + UNSPEC_P8V_MTVSRWZ + UNSPEC_P8V_RELOAD_FROM_GPR + UNSPEC_P8V_MTVSRD + UNSPEC_P8V_XXPERMDI + UNSPEC_P8V_RELOAD_FROM_VSX + UNSPEC_ADDG6S + UNSPEC_CDTBCD + UNSPEC_CBCDTD + UNSPEC_DIVE + UNSPEC_DIVEO + UNSPEC_DIVEU + UNSPEC_DIVEUO + UNSPEC_UNPACK_128BIT + UNSPEC_PACK_128BIT + UNSPEC_LSQ + UNSPEC_FUSION_GPR + UNSPEC_STACK_CHECK + UNSPEC_FUSION_P9 + UNSPEC_FUSION_ADDIS + UNSPEC_ROUND_TO_ODD + UNSPEC_SIGNBIT + UNSPEC_SF_FROM_SI + UNSPEC_SI_FROM_SF + ]) + +;; +;; UNSPEC_VOLATILE usage +;; + +(define_c_enum "unspecv" + [UNSPECV_BLOCK + UNSPECV_LL ; load-locked + UNSPECV_SC ; store-conditional + UNSPECV_PROBE_STACK_RANGE ; probe range of stack addresses + UNSPECV_EH_RR ; eh_reg_restore + UNSPECV_ISYNC ; isync instruction + UNSPECV_MFTB ; move from time base + UNSPECV_NLGR ; non-local goto receiver + UNSPECV_MFFS ; Move from FPSCR + UNSPECV_MTFSF ; Move to FPSCR Fields + UNSPECV_SPLIT_STACK_RETURN ; A camouflaged return + ]) + + +;; Define an insn type attribute. This is used in function unit delay +;; computations. +(define_attr "type" + "integer,two,three, + add,logical,shift,insert, + mul,halfmul,div, + exts,cntlz,popcnt,isel, + load,store,fpload,fpstore,vecload,vecstore, + cmp, + branch,jmpreg,mfjmpr,mtjmpr,trap,isync,sync,load_l,store_c, + cr_logical,delayed_cr,mfcr,mfcrf,mtcr, + fpcompare,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt, + brinc, + vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm, + vecfloat,vecfdiv,vecdouble,mffgpr,mftgpr,crypto, + veclogical,veccmpfx,vecexts,vecmove, + htm,htmsimple,dfp" + (const_string "integer")) + +;; What data size does this instruction work on? +;; This is used for insert, mul and others as necessary. +(define_attr "size" "8,16,32,64,128" (const_string "32")) + +;; Is this instruction record form ("dot", signed compare to 0, writing CR0)? +;; This is used for add, logical, shift, exts, mul. +(define_attr "dot" "no,yes" (const_string "no")) + +;; Does this instruction sign-extend its result? +;; This is used for load insns. +(define_attr "sign_extend" "no,yes" (const_string "no")) + +;; Does this instruction use indexed (that is, reg+reg) addressing? +;; This is used for load and store insns. If operand 0 or 1 is a MEM +;; it is automatically set based on that. If a load or store instruction +;; has fewer than two operands it needs to set this attribute manually +;; or the compiler will crash. +(define_attr "indexed" "no,yes" + (if_then_else (ior (match_operand 0 "indexed_address_mem") + (match_operand 1 "indexed_address_mem")) + (const_string "yes") + (const_string "no"))) + +;; Does this instruction use update addressing? +;; This is used for load and store insns. See the comments for "indexed". +(define_attr "update" "no,yes" + (if_then_else (ior (match_operand 0 "update_address_mem") + (match_operand 1 "update_address_mem")) + (const_string "yes") + (const_string "no"))) + +;; Is this instruction using operands[2] as shift amount, and can that be a +;; register? +;; This is used for shift insns. +(define_attr "maybe_var_shift" "no,yes" (const_string "no")) + +;; Is this instruction using a shift amount from a register? +;; This is used for shift insns. +(define_attr "var_shift" "no,yes" + (if_then_else (and (eq_attr "type" "shift") + (eq_attr "maybe_var_shift" "yes")) + (if_then_else (match_operand 2 "gpc_reg_operand") + (const_string "yes") + (const_string "no")) + (const_string "no"))) + +;; Is copying of this instruction disallowed? +(define_attr "cannot_copy" "no,yes" (const_string "no")) + +;; Define floating point instruction sub-types for use with Xfpu.md +(define_attr "fp_type" "fp_default,fp_addsub_s,fp_addsub_d,fp_mul_s,fp_mul_d,fp_div_s,fp_div_d,fp_maddsub_s,fp_maddsub_d,fp_sqrt_s,fp_sqrt_d" (const_string "fp_default")) + +;; Length (in bytes). +; '(pc)' in the following doesn't include the instruction itself; it is +; calculated as if the instruction had zero size. +(define_attr "length" "" + (if_then_else (eq_attr "type" "branch") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -32768)) + (lt (minus (match_dup 0) (pc)) + (const_int 32764))) + (const_int 4) + (const_int 8)) + (const_int 4))) + +;; Processor type -- this attribute must exactly match the processor_type +;; enumeration in rs6000-opts.h. +(define_attr "cpu" + "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630, + ppc750,ppc7400,ppc7450, + ppc403,ppc405,ppc440,ppc476, + ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500, + power4,power5,power6,power7,power8,power9, + rs64a,mpccore,cell,ppca2,titan" + (const (symbol_ref "rs6000_cpu_attr"))) + + +;; If this instruction is microcoded on the CELL processor +; The default for load extended, the recorded instructions and rotate/shifts by a variable is always microcoded +(define_attr "cell_micro" "not,conditional,always" + (if_then_else (ior (and (eq_attr "type" "shift,exts,mul") + (eq_attr "dot" "yes")) + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes")) + (and (eq_attr "type" "shift") + (eq_attr "var_shift" "yes"))) + (const_string "always") + (const_string "not"))) + +(automata_option "ndfa") + +(include "rs64.md") +(include "mpc.md") +(include "40x.md") +(include "440.md") +(include "476.md") +(include "601.md") +(include "603.md") +(include "6xx.md") +(include "7xx.md") +(include "7450.md") +(include "8540.md") +(include "e300c2c3.md") +(include "e500mc.md") +(include "e500mc64.md") +(include "e5500.md") +(include "e6500.md") +(include "power4.md") +(include "power5.md") +(include "power6.md") +(include "power7.md") +(include "power8.md") +(include "power9.md") +(include "cell.md") +(include "xfpu.md") +(include "a2.md") +(include "titan.md") + +(include "predicates.md") +(include "constraints.md") + +(include "darwin.md") + + +;; Mode iterators + +; This mode iterator allows :GPR to be used to indicate the allowable size +; of whole values in GPRs. +(define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")]) + +; Any supported integer mode. +(define_mode_iterator INT [QI HI SI DI TI PTI]) + +; Any supported integer mode that fits in one register. +(define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")]) + +; Integer modes supported in VSX registers with ISA 3.0 instructions +(define_mode_iterator INT_ISA3 [QI HI SI DI]) + +; Everything we can extend QImode to. +(define_mode_iterator EXTQI [SI (DI "TARGET_POWERPC64")]) + +; Everything we can extend HImode to. +(define_mode_iterator EXTHI [SI (DI "TARGET_POWERPC64")]) + +; Everything we can extend SImode to. +(define_mode_iterator EXTSI [(DI "TARGET_POWERPC64")]) + +; QImode or HImode for small integer moves and small atomic ops +(define_mode_iterator QHI [QI HI]) + +; QImode, HImode, SImode for fused ops only for GPR loads +(define_mode_iterator QHSI [QI HI SI]) + +; HImode or SImode for sign extended fusion ops +(define_mode_iterator HSI [HI SI]) + +; SImode or DImode, even if DImode doesn't fit in GPRs. +(define_mode_iterator SDI [SI DI]) + +; Types that can be fused with an ADDIS instruction to load or store a GPR +; register that has reg+offset addressing. +(define_mode_iterator GPR_FUSION [QI + HI + SI + (DI "TARGET_POWERPC64") + SF + (DF "TARGET_POWERPC64")]) + +; Types that can be fused with an ADDIS instruction to load or store a FPR +; register that has reg+offset addressing. +(define_mode_iterator FPR_FUSION [DI SF DF]) + +; The size of a pointer. Also, the size of the value that a record-condition +; (one with a '.') will compare; and the size used for arithmetic carries. +(define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")]) + +; Iterator to add PTImode along with TImode (TImode can go in VSX registers, +; PTImode is GPR only) +(define_mode_iterator TI2 [TI PTI]) + +; Any hardware-supported floating-point mode +(define_mode_iterator FP [ + (SF "TARGET_HARD_FLOAT + && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) || TARGET_E500_SINGLE)") + (DF "TARGET_HARD_FLOAT + && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)") + (TF "TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128") + (IF "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128") + (KF "TARGET_FLOAT128_TYPE") + (DD "TARGET_DFP") + (TD "TARGET_DFP")]) + +; Any fma capable floating-point mode. +(define_mode_iterator FMA_F [ + (SF "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT") + (DF "(TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) + || VECTOR_UNIT_VSX_P (DFmode)") + (V2SF "TARGET_PAIRED_FLOAT") + (V4SF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)") + (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)") + (KF "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (KFmode)") + (TF "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (TFmode)") + ]) + +; Floating point move iterators to combine binary and decimal moves +(define_mode_iterator FMOVE32 [SF SD]) +(define_mode_iterator FMOVE64 [DF DD]) +(define_mode_iterator FMOVE64X [DI DF DD]) +(define_mode_iterator FMOVE128 [(TF "TARGET_LONG_DOUBLE_128") + (IF "FLOAT128_IBM_P (IFmode)") + (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) + +(define_mode_iterator FMOVE128_FPR [(TF "FLOAT128_2REG_P (TFmode)") + (IF "FLOAT128_2REG_P (IFmode)") + (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) + +; Iterators for 128 bit types for direct move +(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE") + (V16QI "") + (V8HI "") + (V4SI "") + (V4SF "") + (V2DI "") + (V2DF "") + (V1TI "") + (KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) + +; Iterator for 128-bit VSX types for pack/unpack +(define_mode_iterator FMOVE128_VSX [V1TI KF]) + +; Whether a floating point move is ok, don't allow SD without hardware FP +(define_mode_attr fmove_ok [(SF "") + (DF "") + (SD "TARGET_HARD_FLOAT && TARGET_FPRS") + (DD "")]) + +; Convert REAL_VALUE to the appropriate bits +(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE") + (DF "REAL_VALUE_TO_TARGET_DOUBLE") + (SD "REAL_VALUE_TO_TARGET_DECIMAL32") + (DD "REAL_VALUE_TO_TARGET_DECIMAL64")]) + +; Whether 0.0 has an all-zero bit pattern +(define_mode_attr zero_fp [(SF "j") + (DF "j") + (TF "j") + (IF "j") + (KF "j") + (SD "wn") + (DD "wn") + (TD "wn")]) + +; Definitions for 64-bit VSX +(define_mode_attr f64_vsx [(DF "ws") (DD "wn")]) + +; Definitions for 64-bit direct move +(define_mode_attr f64_dm [(DF "wk") (DD "wh")]) + +; Definitions for 64-bit use of altivec registers +(define_mode_attr f64_av [(DF "wv") (DD "wn")]) + +; Definitions for 64-bit access to ISA 3.0 (power9) vector +(define_mode_attr f64_p9 [(DF "wb") (DD "wn")]) + +; These modes do not fit in integer registers in 32-bit mode. +; but on e500v2, the gpr are 64 bit registers +(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD]) + +; Iterator for reciprocal estimate instructions +(define_mode_iterator RECIPF [SF DF V4SF V2DF]) + +; Iterator for just SF/DF +(define_mode_iterator SFDF [SF DF]) + +; Like SFDF, but a different name to match conditional move where the +; comparison operands may be a different mode than the input operands. +(define_mode_iterator SFDF2 [SF DF]) + +; Iterator for 128-bit floating point that uses the IBM double-double format +(define_mode_iterator IBM128 [(IF "FLOAT128_IBM_P (IFmode)") + (TF "FLOAT128_IBM_P (TFmode)")]) + +; Iterator for 128-bit floating point that uses IEEE 128-bit float +(define_mode_iterator IEEE128 [(KF "FLOAT128_IEEE_P (KFmode)") + (TF "FLOAT128_IEEE_P (TFmode)")]) + +; Iterator for 128-bit floating point +(define_mode_iterator FLOAT128 [(KF "TARGET_FLOAT128_TYPE") + (IF "TARGET_FLOAT128_TYPE") + (TF "TARGET_LONG_DOUBLE_128")]) + +; Iterator for signbit on 64-bit machines with direct move +(define_mode_iterator SIGNBIT [(KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) + +; Iterator for ISA 3.0 supported floating point types +(define_mode_iterator FP_ISA3 [SF DF]) + +; SF/DF suffix for traditional floating instructions +(define_mode_attr Ftrad [(SF "s") (DF "")]) + +; SF/DF suffix for VSX instructions +(define_mode_attr Fvsx [(SF "sp") (DF "dp")]) + +; SF/DF constraint for arithmetic on traditional floating point registers +(define_mode_attr Ff [(SF "f") (DF "d") (DI "d")]) + +; SF/DF constraint for arithmetic on VSX registers using instructions added in +; ISA 2.06 (power7). This includes instructions that normally target DF mode, +; but are used on SFmode, since internally SFmode values are kept in the DFmode +; format. +(define_mode_attr Fv [(SF "ww") (DF "ws") (DI "wi")]) + +; SF/DF constraint for arithmetic on VSX registers. This is intended to be +; used for DFmode instructions added in ISA 2.06 (power7) and SFmode +; instructions added in ISA 2.07 (power8) +(define_mode_attr Fv2 [(SF "wy") (DF "ws") (DI "wi")]) + +; SF/DF constraint for arithmetic on altivec registers +(define_mode_attr Fa [(SF "wu") (DF "wv")]) + +; s/d suffix for things like fp_addsub_s/fp_addsub_d +(define_mode_attr Fs [(SF "s") (DF "d")]) + +; FRE/FRES support +(define_mode_attr Ffre [(SF "fres") (DF "fre")]) +(define_mode_attr FFRE [(SF "FRES") (DF "FRE")]) + +; Conditional returns. +(define_code_iterator any_return [return simple_return]) +(define_code_attr return_pred [(return "direct_return ()") + (simple_return "1")]) +(define_code_attr return_str [(return "") (simple_return "simple_")]) + +; Logical operators. +(define_code_iterator iorxor [ior xor]) +(define_code_iterator and_ior_xor [and ior xor]) + +; Signed/unsigned variants of ops. +(define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_iterator any_fix [fix unsigned_fix]) +(define_code_iterator any_float [float unsigned_float]) + +(define_code_attr u [(sign_extend "") + (zero_extend "u") + (fix "") + (unsigned_fix "u")]) + +(define_code_attr su [(sign_extend "s") + (zero_extend "u") + (fix "s") + (unsigned_fix "s") + (float "s") + (unsigned_float "u")]) + +(define_code_attr az [(sign_extend "a") + (zero_extend "z") + (fix "a") + (unsigned_fix "z") + (float "a") + (unsigned_float "z")]) + +(define_code_attr uns [(fix "") + (unsigned_fix "uns") + (float "") + (unsigned_float "uns")]) + +; Various instructions that come in SI and DI forms. +; A generic w/d attribute, for things like cmpw/cmpd. +(define_mode_attr wd [(QI "b") + (HI "h") + (SI "w") + (DI "d") + (V16QI "b") + (V8HI "h") + (V4SI "w") + (V2DI "d") + (V1TI "q") + (TI "q")]) + +;; How many bits in this mode? +(define_mode_attr bits [(QI "8") (HI "16") (SI "32") (DI "64")]) + +; DImode bits +(define_mode_attr dbits [(QI "56") (HI "48") (SI "32")]) + +;; ISEL/ISEL64 target selection +(define_mode_attr sel [(SI "") (DI "64")]) + +;; Bitmask for shift instructions +(define_mode_attr hH [(SI "h") (DI "H")]) + +;; A mode twice the size of the given mode +(define_mode_attr dmode [(SI "di") (DI "ti")]) +(define_mode_attr DMODE [(SI "DI") (DI "TI")]) + +;; Suffix for reload patterns +(define_mode_attr ptrsize [(SI "32bit") + (DI "64bit")]) + +(define_mode_attr tptrsize [(SI "TARGET_32BIT") + (DI "TARGET_64BIT")]) + +(define_mode_attr mptrsize [(SI "si") + (DI "di")]) + +(define_mode_attr ptrload [(SI "lwz") + (DI "ld")]) + +(define_mode_attr ptrm [(SI "m") + (DI "Y")]) + +(define_mode_attr rreg [(SF "f") + (DF "ws") + (TF "f") + (TD "f") + (V4SF "wf") + (V2DF "wd")]) + +(define_mode_attr rreg2 [(SF "f") + (DF "d")]) + +(define_mode_attr SI_CONVERT_FP [(SF "TARGET_FCFIDS") + (DF "TARGET_FCFID")]) + +(define_mode_attr E500_CONVERT [(SF "!TARGET_FPRS") + (DF "TARGET_E500_DOUBLE")]) + +(define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT") + (DF "TARGET_DOUBLE_FLOAT")]) + +;; Mode iterator for logical operations on 128-bit types +(define_mode_iterator BOOL_128 [TI + PTI + (V16QI "TARGET_ALTIVEC") + (V8HI "TARGET_ALTIVEC") + (V4SI "TARGET_ALTIVEC") + (V4SF "TARGET_ALTIVEC") + (V2DI "TARGET_ALTIVEC") + (V2DF "TARGET_ALTIVEC") + (V1TI "TARGET_ALTIVEC")]) + +;; For the GPRs we use 3 constraints for register outputs, two that are the +;; same as the output register, and a third where the output register is an +;; early clobber, so we don't have to deal with register overlaps. For the +;; vector types, we prefer to use the vector registers. For TI mode, allow +;; either. + +;; Mode attribute for boolean operation register constraints for output +(define_mode_attr BOOL_REGS_OUTPUT [(TI "&r,r,r,wt,v") + (PTI "&r,r,r") + (V16QI "wa,v,&?r,?r,?r") + (V8HI "wa,v,&?r,?r,?r") + (V4SI "wa,v,&?r,?r,?r") + (V4SF "wa,v,&?r,?r,?r") + (V2DI "wa,v,&?r,?r,?r") + (V2DF "wa,v,&?r,?r,?r") + (V1TI "wa,v,&?r,?r,?r")]) + +;; Mode attribute for boolean operation register constraints for operand1 +(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wt,v") + (PTI "r,0,r") + (V16QI "wa,v,r,0,r") + (V8HI "wa,v,r,0,r") + (V4SI "wa,v,r,0,r") + (V4SF "wa,v,r,0,r") + (V2DI "wa,v,r,0,r") + (V2DF "wa,v,r,0,r") + (V1TI "wa,v,r,0,r")]) + +;; Mode attribute for boolean operation register constraints for operand2 +(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wt,v") + (PTI "r,r,0") + (V16QI "wa,v,r,r,0") + (V8HI "wa,v,r,r,0") + (V4SI "wa,v,r,r,0") + (V4SF "wa,v,r,r,0") + (V2DI "wa,v,r,r,0") + (V2DF "wa,v,r,r,0") + (V1TI "wa,v,r,r,0")]) + +;; Mode attribute for boolean operation register constraints for operand1 +;; for one_cmpl. To simplify things, we repeat the constraint where 0 +;; is used for operand1 or operand2 +(define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wt,v") + (PTI "r,0,0") + (V16QI "wa,v,r,0,0") + (V8HI "wa,v,r,0,0") + (V4SI "wa,v,r,0,0") + (V4SF "wa,v,r,0,0") + (V2DI "wa,v,r,0,0") + (V2DF "wa,v,r,0,0") + (V1TI "wa,v,r,0,0")]) + +;; Reload iterator for creating the function to allocate a base register to +;; supplement addressing modes. +(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI + SF SD SI DF DD DI TI PTI KF IF TF]) + +;; Iterate over smin, smax +(define_code_iterator fp_minmax [smin smax]) + +(define_code_attr minmax [(smin "min") + (smax "max")]) + +(define_code_attr SMINMAX [(smin "SMIN") + (smax "SMAX")]) + +;; Iterator to optimize the following cases: +;; D-form load to FPR register & move to Altivec register +;; Move Altivec register to FPR register and store +(define_mode_iterator ALTIVEC_DFORM [DI DF SF]) + + +;; Start with fixed-point load and store insns. Here we put only the more +;; complex forms. Basic data transfer is done later. + +(define_insn "zero_extendqi<mode>2" + [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,r,^wJwK,^wK") + (zero_extend:EXTQI (match_operand:QI 1 "reg_or_mem_operand" "m,r,Z,wK")))] + "" + "@ + lbz%U1%X1 %0,%1 + rlwinm %0,%1,0,0xff + lxsibzx %x0,%y1 + vextractub %0,%1,7" + [(set_attr "type" "load,shift,fpload,vecperm")]) + +(define_insn_and_split "*zero_extendqi<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (zero_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:EXTQI 0 "=r,r"))] + "rs6000_gen_cell_microcode" + "@ + andi. %0,%1,0xff + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (zero_extend:EXTQI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*zero_extendqi<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (zero_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,r") + (zero_extend:EXTQI (match_dup 1)))] + "rs6000_gen_cell_microcode" + "@ + andi. %0,%1,0xff + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (zero_extend:EXTQI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "zero_extendhi<mode>2" + [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r,^wJwK,^wK") + (zero_extend:EXTHI (match_operand:HI 1 "reg_or_mem_operand" "m,r,Z,wK")))] + "" + "@ + lhz%U1%X1 %0,%1 + rlwinm %0,%1,0,0xffff + lxsihzx %x0,%y1 + vextractuh %0,%1,6" + [(set_attr "type" "load,shift,fpload,vecperm")]) + +(define_insn_and_split "*zero_extendhi<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (zero_extend:EXTHI (match_operand:HI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:EXTHI 0 "=r,r"))] + "rs6000_gen_cell_microcode" + "@ + andi. %0,%1,0xffff + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (zero_extend:EXTHI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*zero_extendhi<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (zero_extend:EXTHI (match_operand:HI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r") + (zero_extend:EXTHI (match_dup 1)))] + "rs6000_gen_cell_microcode" + "@ + andi. %0,%1,0xffff + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (zero_extend:EXTHI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "zero_extendsi<mode>2" + [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r,wz,wu,wj,r,wJwK") + (zero_extend:EXTSI (match_operand:SI 1 "reg_or_mem_operand" "m,r,Z,Z,r,wIwH,wJwK")))] + "" + "@ + lwz%U1%X1 %0,%1 + rldicl %0,%1,0,32 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + mtvsrwz %x0,%1 + mfvsrwz %0,%x1 + xxextractuw %x0,%x1,4" + [(set_attr "type" "load,shift,fpload,fpload,mffgpr,mftgpr,vecexts")]) + +(define_insn_and_split "*zero_extendsi<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (zero_extend:EXTSI (match_operand:SI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:EXTSI 0 "=r,r"))] + "rs6000_gen_cell_microcode" + "@ + rldicl. %0,%1,0,32 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (zero_extend:DI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*zero_extendsi<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (zero_extend:EXTSI (match_operand:SI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r") + (zero_extend:EXTSI (match_dup 1)))] + "rs6000_gen_cell_microcode" + "@ + rldicl. %0,%1,0,32 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (zero_extend:EXTSI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "extendqi<mode>2" + [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*wK") + (sign_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r,?*wK")))] + "" + "@ + extsb %0,%1 + vextsb2d %0,%1" + [(set_attr "type" "exts,vecperm")]) + +(define_insn_and_split "*extendqi<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (sign_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:EXTQI 0 "=r,r"))] + "rs6000_gen_cell_microcode" + "@ + extsb. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (sign_extend:EXTQI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "exts") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*extendqi<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (sign_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,r") + (sign_extend:EXTQI (match_dup 1)))] + "rs6000_gen_cell_microcode" + "@ + extsb. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (sign_extend:EXTQI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "exts") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_expand "extendhi<mode>2" + [(set (match_operand:EXTHI 0 "gpc_reg_operand") + (sign_extend:EXTHI (match_operand:HI 1 "gpc_reg_operand")))] + "" + "") + +(define_insn "*extendhi<mode>2" + [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r,?*wK,?*wK") + (sign_extend:EXTHI (match_operand:HI 1 "reg_or_mem_operand" "m,r,Z,wK")))] + "rs6000_gen_cell_microcode || TARGET_VSX_SMALL_INTEGER" + "@ + lha%U1%X1 %0,%1 + extsh %0,%1 + # + vextsh2d %0,%1" + [(set_attr "type" "load,exts,fpload,vecperm") + (set_attr "sign_extend" "yes") + (set_attr "length" "4,4,8,4")]) + +(define_split + [(set (match_operand:EXTHI 0 "altivec_register_operand") + (sign_extend:EXTHI + (match_operand:HI 1 "indexed_or_indirect_operand")))] + "TARGET_P9_VECTOR && reload_completed" + [(set (match_dup 2) + (match_dup 1)) + (set (match_dup 0) + (sign_extend:EXTHI (match_dup 2)))] +{ + operands[2] = gen_rtx_REG (HImode, REGNO (operands[1])); +}) + +(define_insn "*extendhi<mode>2_noload" + [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") + (sign_extend:EXTHI (match_operand:HI 1 "gpc_reg_operand" "r")))] + "!rs6000_gen_cell_microcode" + "extsh %0,%1" + [(set_attr "type" "exts")]) + +(define_insn_and_split "*extendhi<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (sign_extend:EXTHI (match_operand:HI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:EXTHI 0 "=r,r"))] + "rs6000_gen_cell_microcode" + "@ + extsh. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (sign_extend:EXTHI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "exts") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*extendhi<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (sign_extend:EXTHI (match_operand:HI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r") + (sign_extend:EXTHI (match_dup 1)))] + "rs6000_gen_cell_microcode" + "@ + extsh. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (sign_extend:EXTHI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "exts") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "extendsi<mode>2" + [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r,wl,wu,wj,wK,wH") + (sign_extend:EXTSI (match_operand:SI 1 "lwa_operand" "Y,r,Z,Z,r,wK,wH")))] + "" + "@ + lwa%U1%X1 %0,%1 + extsw %0,%1 + lfiwax %0,%y1 + lxsiwax %x0,%y1 + mtvsrwa %x0,%1 + vextsw2d %0,%1 + #" + [(set_attr "type" "load,exts,fpload,fpload,mffgpr,vecexts,vecperm") + (set_attr "sign_extend" "yes") + (set_attr "length" "4,4,4,4,4,4,8")]) + +(define_split + [(set (match_operand:DI 0 "altivec_register_operand") + (sign_extend:DI (match_operand:SI 1 "altivec_register_operand")))] + "TARGET_VSX_SMALL_INTEGER && TARGET_P8_VECTOR && !TARGET_P9_VECTOR + && reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + int dest_regno = REGNO (dest); + int src_regno = REGNO (src); + rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno); + rtx src_v4si = gen_rtx_REG (V4SImode, src_regno); + + if (VECTOR_ELT_ORDER_BIG) + { + emit_insn (gen_altivec_vupkhsw (dest_v2di, src_v4si)); + emit_insn (gen_vsx_xxspltd_v2di (dest_v2di, dest_v2di, const1_rtx)); + } + else + { + emit_insn (gen_altivec_vupklsw (dest_v2di, src_v4si)); + emit_insn (gen_vsx_xxspltd_v2di (dest_v2di, dest_v2di, const0_rtx)); + } + DONE; +}) + +(define_insn_and_split "*extendsi<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (sign_extend:EXTSI (match_operand:SI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:EXTSI 0 "=r,r"))] + "rs6000_gen_cell_microcode" + "@ + extsw. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (sign_extend:EXTSI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "exts") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*extendsi<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (sign_extend:EXTSI (match_operand:SI 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r") + (sign_extend:EXTSI (match_dup 1)))] + "rs6000_gen_cell_microcode" + "@ + extsw. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (sign_extend:EXTSI (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "exts") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +;; IBM 405, 440, 464 and 476 half-word multiplication operations. + +(define_insn "*macchwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (plus:SI (mult:SI (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (match_operand:SI 4 "gpc_reg_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (ashiftrt:SI + (match_dup 2) + (const_int 16)) + (sign_extend:SI + (match_dup 1))) + (match_dup 4)))] + "TARGET_MULHW" + "macchw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*macchw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (match_operand:SI 3 "gpc_reg_operand" "0")))] + "TARGET_MULHW" + "macchw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*macchwuc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (plus:SI (mult:SI (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (match_operand:SI 4 "gpc_reg_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (lshiftrt:SI + (match_dup 2) + (const_int 16)) + (zero_extend:SI + (match_dup 1))) + (match_dup 4)))] + "TARGET_MULHW" + "macchwu. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*macchwu" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (match_operand:SI 3 "gpc_reg_operand" "0")))] + "TARGET_MULHW" + "macchwu %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*machhwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (plus:SI (mult:SI (ashiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))) + (match_operand:SI 4 "gpc_reg_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (ashiftrt:SI + (match_dup 1) + (const_int 16)) + (ashiftrt:SI + (match_dup 2) + (const_int 16))) + (match_dup 4)))] + "TARGET_MULHW" + "machhw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*machhw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (ashiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))) + (match_operand:SI 3 "gpc_reg_operand" "0")))] + "TARGET_MULHW" + "machhw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*machhwuc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (plus:SI (mult:SI (lshiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))) + (match_operand:SI 4 "gpc_reg_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (lshiftrt:SI + (match_dup 1) + (const_int 16)) + (lshiftrt:SI + (match_dup 2) + (const_int 16))) + (match_dup 4)))] + "TARGET_MULHW" + "machhwu. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*machhwu" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (lshiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))) + (match_operand:SI 3 "gpc_reg_operand" "0")))] + "TARGET_MULHW" + "machhwu %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*maclhwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (plus:SI (mult:SI (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))) + (match_operand:SI 4 "gpc_reg_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (sign_extend:SI + (match_dup 1)) + (sign_extend:SI + (match_dup 2))) + (match_dup 4)))] + "TARGET_MULHW" + "maclhw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*maclhw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))) + (match_operand:SI 3 "gpc_reg_operand" "0")))] + "TARGET_MULHW" + "maclhw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*maclhwuc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (plus:SI (mult:SI (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (zero_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))) + (match_operand:SI 4 "gpc_reg_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (zero_extend:SI + (match_dup 1)) + (zero_extend:SI + (match_dup 2))) + (match_dup 4)))] + "TARGET_MULHW" + "maclhwu. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*maclhwu" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (mult:SI (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (zero_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))) + (match_operand:SI 3 "gpc_reg_operand" "0")))] + "TARGET_MULHW" + "maclhwu %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*nmacchwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0") + (mult:SI (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r")))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (minus:SI (match_dup 4) + (mult:SI (ashiftrt:SI + (match_dup 2) + (const_int 16)) + (sign_extend:SI + (match_dup 1)))))] + "TARGET_MULHW" + "nmacchw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*nmacchw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0") + (mult:SI (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r")))))] + "TARGET_MULHW" + "nmacchw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*nmachhwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (minus:SI (match_dup 4) + (mult:SI (ashiftrt:SI + (match_dup 1) + (const_int 16)) + (ashiftrt:SI + (match_dup 2) + (const_int 16)))))] + "TARGET_MULHW" + "nmachhw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*nmachhw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)))))] + "TARGET_MULHW" + "nmachhw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*nmaclhwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r")))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (minus:SI (match_dup 4) + (mult:SI (sign_extend:SI + (match_dup 1)) + (sign_extend:SI + (match_dup 2)))))] + "TARGET_MULHW" + "nmaclhw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*nmaclhw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r")))))] + "TARGET_MULHW" + "nmaclhw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulchwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (mult:SI (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (ashiftrt:SI + (match_dup 2) + (const_int 16)) + (sign_extend:SI + (match_dup 1))))] + "TARGET_MULHW" + "mulchw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulchw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))))] + "TARGET_MULHW" + "mulchw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulchwuc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (mult:SI (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (lshiftrt:SI + (match_dup 2) + (const_int 16)) + (zero_extend:SI + (match_dup 1))))] + "TARGET_MULHW" + "mulchwu. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulchwu" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16)) + (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "r"))))] + "TARGET_MULHW" + "mulchwu %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulhhwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (mult:SI (ashiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (ashiftrt:SI + (match_dup 1) + (const_int 16)) + (ashiftrt:SI + (match_dup 2) + (const_int 16))))] + "TARGET_MULHW" + "mulhhw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulhhw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))))] + "TARGET_MULHW" + "mulhhw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulhhwuc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (mult:SI (lshiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (lshiftrt:SI + (match_dup 1) + (const_int 16)) + (lshiftrt:SI + (match_dup 2) + (const_int 16))))] + "TARGET_MULHW" + "mulhhwu. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mulhhwu" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "gpc_reg_operand" "%r") + (const_int 16)) + (lshiftrt:SI + (match_operand:SI 2 "gpc_reg_operand" "r") + (const_int 16))))] + "TARGET_MULHW" + "mulhhwu %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mullhwc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (mult:SI (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (sign_extend:SI + (match_dup 1)) + (sign_extend:SI + (match_dup 2))))] + "TARGET_MULHW" + "mullhw. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mullhw" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))))] + "TARGET_MULHW" + "mullhw %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mullhwuc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC (mult:SI (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (zero_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))) + (const_int 0))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (zero_extend:SI + (match_dup 1)) + (zero_extend:SI + (match_dup 2))))] + "TARGET_MULHW" + "mullhwu. %0,%1,%2" + [(set_attr "type" "halfmul")]) + +(define_insn "*mullhwu" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "gpc_reg_operand" "%r")) + (zero_extend:SI + (match_operand:HI 2 "gpc_reg_operand" "r"))))] + "TARGET_MULHW" + "mullhwu %0,%1,%2" + [(set_attr "type" "halfmul")]) + +;; IBM 405, 440, 464 and 476 string-search dlmzb instruction support. +(define_insn "dlmzb" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_DLMZB_CR)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_dup 1) + (match_dup 2)] + UNSPEC_DLMZB))] + "TARGET_DLMZB" + "dlmzb. %0,%1,%2") + +(define_expand "strlensi" + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (unspec:SI [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")] + UNSPEC_DLMZB_STRLEN)) + (clobber (match_scratch:CC 4 "=x"))] + "TARGET_DLMZB && WORDS_BIG_ENDIAN && !optimize_size" +{ + rtx result = operands[0]; + rtx src = operands[1]; + rtx search_char = operands[2]; + rtx align = operands[3]; + rtx addr, scratch_string, word1, word2, scratch_dlmzb; + rtx loop_label, end_label, mem, cr0, cond; + if (search_char != const0_rtx + || GET_CODE (align) != CONST_INT + || INTVAL (align) < 8) + FAIL; + word1 = gen_reg_rtx (SImode); + word2 = gen_reg_rtx (SImode); + scratch_dlmzb = gen_reg_rtx (SImode); + scratch_string = gen_reg_rtx (Pmode); + loop_label = gen_label_rtx (); + end_label = gen_label_rtx (); + addr = force_reg (Pmode, XEXP (src, 0)); + emit_move_insn (scratch_string, addr); + emit_label (loop_label); + mem = change_address (src, SImode, scratch_string); + emit_move_insn (word1, mem); + emit_move_insn (word2, adjust_address (mem, SImode, 4)); + cr0 = gen_rtx_REG (CCmode, CR0_REGNO); + emit_insn (gen_dlmzb (scratch_dlmzb, word1, word2, cr0)); + cond = gen_rtx_NE (VOIDmode, cr0, const0_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + cond, + gen_rtx_LABEL_REF + (VOIDmode, + end_label), + pc_rtx))); + emit_insn (gen_addsi3 (scratch_string, scratch_string, GEN_INT (8))); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_LABEL_REF (VOIDmode, loop_label))); + emit_barrier (); + emit_label (end_label); + emit_insn (gen_addsi3 (scratch_string, scratch_string, scratch_dlmzb)); + emit_insn (gen_subsi3 (result, scratch_string, addr)); + emit_insn (gen_addsi3 (result, result, constm1_rtx)); + DONE; +}) + +;; Fixed-point arithmetic insns. + +(define_expand "add<mode>3" + [(set (match_operand:SDI 0 "gpc_reg_operand" "") + (plus:SDI (match_operand:SDI 1 "gpc_reg_operand" "") + (match_operand:SDI 2 "reg_or_add_cint_operand" "")))] + "" +{ + if (<MODE>mode == DImode && !TARGET_POWERPC64) + { + rtx lo0 = gen_lowpart (SImode, operands[0]); + rtx lo1 = gen_lowpart (SImode, operands[1]); + rtx lo2 = gen_lowpart (SImode, operands[2]); + rtx hi0 = gen_highpart (SImode, operands[0]); + rtx hi1 = gen_highpart (SImode, operands[1]); + rtx hi2 = gen_highpart_mode (SImode, DImode, operands[2]); + + if (!reg_or_short_operand (lo2, SImode)) + lo2 = force_reg (SImode, lo2); + if (!adde_operand (hi2, SImode)) + hi2 = force_reg (SImode, hi2); + + emit_insn (gen_addsi3_carry (lo0, lo1, lo2)); + emit_insn (gen_addsi3_carry_in (hi0, hi1, hi2)); + DONE; + } + + if (CONST_INT_P (operands[2]) && !add_operand (operands[2], <MODE>mode)) + { + rtx tmp = ((!can_create_pseudo_p () + || rtx_equal_p (operands[0], operands[1])) + ? operands[0] : gen_reg_rtx (<MODE>mode)); + + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode); + + if (<MODE>mode == DImode && !satisfies_constraint_L (GEN_INT (rest))) + FAIL; + + /* The ordering here is important for the prolog expander. + When space is allocated from the stack, adding 'low' first may + produce a temporary deallocation (which would be bad). */ + emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest))); + emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low))); + DONE; + } +}) + +(define_insn "*add<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r") + (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b") + (match_operand:GPR 2 "add_operand" "r,I,L")))] + "" + "@ + add %0,%1,%2 + addi %0,%1,%2 + addis %0,%1,%v2" + [(set_attr "type" "add")]) + +(define_insn "addsi3_high" + [(set (match_operand:SI 0 "gpc_reg_operand" "=b") + (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (high:SI (match_operand 2 "" ""))))] + "TARGET_MACHO && !TARGET_64BIT" + "addis %0,%1,ha16(%2)" + [(set_attr "type" "add")]) + +(define_insn_and_split "*add<mode>3_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode" + "@ + add. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (plus:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*add<mode>3_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (plus:GPR (match_dup 1) + (match_dup 2)))] + "<MODE>mode == Pmode" + "@ + add. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (plus:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*add<mode>3_imm_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b") + (match_operand:GPR 2 "short_cint_operand" "I,I")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r")) + (clobber (reg:GPR CA_REGNO))] + "<MODE>mode == Pmode" + "@ + addic. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (plus:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*add<mode>3_imm_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b") + (match_operand:GPR 2 "short_cint_operand" "I,I")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (plus:GPR (match_dup 1) + (match_dup 2))) + (clobber (reg:GPR CA_REGNO))] + "<MODE>mode == Pmode" + "@ + addic. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (plus:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +;; Split an add that we can't do in one insn into two insns, each of which +;; does one 16-bit part. This is used by combine. Note that the low-order +;; add should be last in case the result gets used in an address. + +(define_split + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "non_add_cint_operand" "")))] + "" + [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode); + + operands[4] = GEN_INT (low); + if (<MODE>mode == SImode || satisfies_constraint_L (GEN_INT (rest))) + operands[3] = GEN_INT (rest); + else if (can_create_pseudo_p ()) + { + operands[3] = gen_reg_rtx (DImode); + emit_move_insn (operands[3], operands[2]); + emit_insn (gen_adddi3 (operands[0], operands[1], operands[3])); + DONE; + } + else + FAIL; +}) + + +(define_insn "add<mode>3_carry" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (plus:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "reg_or_short_operand" "rI"))) + (set (reg:P CA_REGNO) + (ltu:P (plus:P (match_dup 1) + (match_dup 2)) + (match_dup 1)))] + "" + "add%I2c %0,%1,%2" + [(set_attr "type" "add")]) + +(define_insn "*add<mode>3_imm_carry_pos" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (plus:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "short_cint_operand" "n"))) + (set (reg:P CA_REGNO) + (geu:P (match_dup 1) + (match_operand:P 3 "const_int_operand" "n")))] + "INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + INTVAL (operands[3]) == 0" + "addic %0,%1,%2" + [(set_attr "type" "add")]) + +(define_insn "*add<mode>3_imm_carry_0" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (match_operand:P 1 "gpc_reg_operand" "r")) + (set (reg:P CA_REGNO) + (const_int 0))] + "" + "addic %0,%1,0" + [(set_attr "type" "add")]) + +(define_insn "*add<mode>3_imm_carry_m1" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (plus:P (match_operand:P 1 "gpc_reg_operand" "r") + (const_int -1))) + (set (reg:P CA_REGNO) + (ne:P (match_dup 1) + (const_int 0)))] + "" + "addic %0,%1,-1" + [(set_attr "type" "add")]) + +(define_insn "*add<mode>3_imm_carry_neg" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (plus:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "short_cint_operand" "n"))) + (set (reg:P CA_REGNO) + (gtu:P (match_dup 1) + (match_operand:P 3 "const_int_operand" "n")))] + "INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) + INTVAL (operands[3]) == -1" + "addic %0,%1,%2" + [(set_attr "type" "add")]) + + +(define_expand "add<mode>3_carry_in" + [(parallel [ + (set (match_operand:GPR 0 "gpc_reg_operand") + (plus:GPR (plus:GPR (match_operand:GPR 1 "gpc_reg_operand") + (match_operand:GPR 2 "adde_operand")) + (reg:GPR CA_REGNO))) + (clobber (reg:GPR CA_REGNO))])] + "" +{ + if (operands[2] == const0_rtx) + { + emit_insn (gen_add<mode>3_carry_in_0 (operands[0], operands[1])); + DONE; + } + if (operands[2] == constm1_rtx) + { + emit_insn (gen_add<mode>3_carry_in_m1 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*add<mode>3_carry_in_internal" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (plus:GPR (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")) + (reg:GPR CA_REGNO))) + (clobber (reg:GPR CA_REGNO))] + "" + "adde %0,%1,%2" + [(set_attr "type" "add")]) + +(define_insn "add<mode>3_carry_in_0" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (reg:GPR CA_REGNO))) + (clobber (reg:GPR CA_REGNO))] + "" + "addze %0,%1" + [(set_attr "type" "add")]) + +(define_insn "add<mode>3_carry_in_m1" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (plus:GPR (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (reg:GPR CA_REGNO)) + (const_int -1))) + (clobber (reg:GPR CA_REGNO))] + "" + "addme %0,%1" + [(set_attr "type" "add")]) + + +(define_expand "one_cmpl<mode>2" + [(set (match_operand:SDI 0 "gpc_reg_operand" "") + (not:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))] + "" +{ + if (<MODE>mode == DImode && !TARGET_POWERPC64) + { + rs6000_split_logical (operands, NOT, false, false, false); + DONE; + } +}) + +(define_insn "*one_cmpl<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] + "" + "not %0,%1") + +(define_insn_and_split "*one_cmpl<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + not. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (not:GPR (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*one_cmpl<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (not:GPR (match_dup 1)))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + not. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (not:GPR (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_expand "sub<mode>3" + [(set (match_operand:SDI 0 "gpc_reg_operand" "") + (minus:SDI (match_operand:SDI 1 "reg_or_short_operand" "") + (match_operand:SDI 2 "gpc_reg_operand" "")))] + "" +{ + if (<MODE>mode == DImode && !TARGET_POWERPC64) + { + rtx lo0 = gen_lowpart (SImode, operands[0]); + rtx lo1 = gen_lowpart (SImode, operands[1]); + rtx lo2 = gen_lowpart (SImode, operands[2]); + rtx hi0 = gen_highpart (SImode, operands[0]); + rtx hi1 = gen_highpart_mode (SImode, DImode, operands[1]); + rtx hi2 = gen_highpart (SImode, operands[2]); + + if (!reg_or_short_operand (lo1, SImode)) + lo1 = force_reg (SImode, lo1); + if (!adde_operand (hi1, SImode)) + hi1 = force_reg (SImode, hi1); + + emit_insn (gen_subfsi3_carry (lo0, lo2, lo1)); + emit_insn (gen_subfsi3_carry_in (hi0, hi2, hi1)); + DONE; + } + + if (short_cint_operand (operands[1], <MODE>mode)) + { + emit_insn (gen_subf<mode>3_imm (operands[0], operands[2], operands[1])); + DONE; + } +}) + +(define_insn "*subf<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (minus:GPR (match_operand:GPR 2 "gpc_reg_operand" "r") + (match_operand:GPR 1 "gpc_reg_operand" "r")))] + "" + "subf %0,%1,%2" + [(set_attr "type" "add")]) + +(define_insn_and_split "*subf<mode>3_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (minus:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r") + (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode" + "@ + subf. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (minus:GPR (match_dup 2) + (match_dup 1))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*subf<mode>3_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (minus:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r") + (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (minus:GPR (match_dup 2) + (match_dup 1)))] + "<MODE>mode == Pmode" + "@ + subf. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (minus:GPR (match_dup 2) + (match_dup 1))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn "subf<mode>3_imm" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (minus:GPR (match_operand:GPR 2 "short_cint_operand" "I") + (match_operand:GPR 1 "gpc_reg_operand" "r"))) + (clobber (reg:GPR CA_REGNO))] + "" + "subfic %0,%1,%2" + [(set_attr "type" "add")]) + +(define_insn_and_split "subf<mode>3_carry_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (minus:P (match_operand:P 2 "gpc_reg_operand" "r,r") + (match_operand:P 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:P 0 "gpc_reg_operand" "=r,r") + (minus:P (match_dup 2) + (match_dup 1))) + (set (reg:P CA_REGNO) + (leu:P (match_dup 1) + (match_dup 2)))] + "<MODE>mode == Pmode" + "@ + subfc. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (minus:P (match_dup 2) + (match_dup 1))) + (set (reg:P CA_REGNO) + (leu:P (match_dup 1) + (match_dup 2)))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn "subf<mode>3_carry" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (minus:P (match_operand:P 2 "reg_or_short_operand" "rI") + (match_operand:P 1 "gpc_reg_operand" "r"))) + (set (reg:P CA_REGNO) + (leu:P (match_dup 1) + (match_dup 2)))] + "" + "subf%I2c %0,%1,%2" + [(set_attr "type" "add")]) + +(define_insn "*subf<mode>3_imm_carry_0" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (neg:P (match_operand:P 1 "gpc_reg_operand" "r"))) + (set (reg:P CA_REGNO) + (eq:P (match_dup 1) + (const_int 0)))] + "" + "subfic %0,%1,0" + [(set_attr "type" "add")]) + +(define_insn "*subf<mode>3_imm_carry_m1" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (not:P (match_operand:P 1 "gpc_reg_operand" "r"))) + (set (reg:P CA_REGNO) + (const_int 1))] + "" + "subfic %0,%1,-1" + [(set_attr "type" "add")]) + + +(define_expand "subf<mode>3_carry_in" + [(parallel [ + (set (match_operand:GPR 0 "gpc_reg_operand") + (plus:GPR (plus:GPR (not:GPR (match_operand:GPR 1 "gpc_reg_operand")) + (reg:GPR CA_REGNO)) + (match_operand:GPR 2 "adde_operand"))) + (clobber (reg:GPR CA_REGNO))])] + "" +{ + if (operands[2] == const0_rtx) + { + emit_insn (gen_subf<mode>3_carry_in_0 (operands[0], operands[1])); + DONE; + } + if (operands[2] == constm1_rtx) + { + emit_insn (gen_subf<mode>3_carry_in_m1 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*subf<mode>3_carry_in_internal" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (plus:GPR (plus:GPR (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")) + (reg:GPR CA_REGNO)) + (match_operand:GPR 2 "gpc_reg_operand" "r"))) + (clobber (reg:GPR CA_REGNO))] + "" + "subfe %0,%1,%2" + [(set_attr "type" "add")]) + +(define_insn "subf<mode>3_carry_in_0" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (plus:GPR (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")) + (reg:GPR CA_REGNO))) + (clobber (reg:GPR CA_REGNO))] + "" + "subfze %0,%1" + [(set_attr "type" "add")]) + +(define_insn "subf<mode>3_carry_in_m1" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (plus:GPR (minus:GPR (reg:GPR CA_REGNO) + (match_operand:GPR 1 "gpc_reg_operand" "r")) + (const_int -2))) + (clobber (reg:GPR CA_REGNO))] + "" + "subfme %0,%1" + [(set_attr "type" "add")]) + +(define_insn "subf<mode>3_carry_in_xx" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (plus:GPR (reg:GPR CA_REGNO) + (const_int -1))) + (clobber (reg:GPR CA_REGNO))] + "" + "subfe %0,%0,%0" + [(set_attr "type" "add")]) + + +(define_insn "neg<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] + "" + "neg %0,%1" + [(set_attr "type" "add")]) + +(define_insn_and_split "*neg<mode>2_dot" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode" + "@ + neg. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (neg:GPR (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*neg<mode>2_dot2" + [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") + (compare:CC (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (neg:GPR (match_dup 1)))] + "<MODE>mode == Pmode" + "@ + neg. %0,%1 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[2], CCmode)" + [(set (match_dup 0) + (neg:GPR (match_dup 1))) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "clz<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (clz:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] + "" + "cntlz<wd> %0,%1" + [(set_attr "type" "cntlz")]) + +(define_expand "ctz<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ctz:GPR (match_operand:GPR 1 "gpc_reg_operand")))] + "" +{ + if (TARGET_CTZ) + { + emit_insn (gen_ctz<mode>2_hw (operands[0], operands[1])); + DONE; + } + + rtx tmp1 = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + rtx tmp3 = gen_reg_rtx (<MODE>mode); + + if (TARGET_POPCNTD) + { + emit_insn (gen_add<mode>3 (tmp1, operands[1], constm1_rtx)); + emit_insn (gen_one_cmpl<mode>2 (tmp2, operands[1])); + emit_insn (gen_and<mode>3 (tmp3, tmp1, tmp2)); + emit_insn (gen_popcntd<mode>2 (operands[0], tmp3)); + } + else + { + emit_insn (gen_neg<mode>2 (tmp1, operands[1])); + emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1)); + emit_insn (gen_clz<mode>2 (tmp3, tmp2)); + emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits> - 1), tmp3)); + } + + DONE; +}) + +(define_insn "ctz<mode>2_hw" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ctz:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] + "TARGET_CTZ" + "cnttz<wd> %0,%1" + [(set_attr "type" "cntlz")]) + +(define_expand "ffs<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ffs:GPR (match_operand:GPR 1 "gpc_reg_operand")))] + "" +{ + rtx tmp1 = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + rtx tmp3 = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (tmp1, operands[1])); + emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1)); + emit_insn (gen_clz<mode>2 (tmp3, tmp2)); + emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits>), tmp3)); + DONE; +}) + + +(define_expand "popcount<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))] + "TARGET_POPCNTB || TARGET_POPCNTD" +{ + rs6000_emit_popcount (operands[0], operands[1]); + DONE; +}) + +(define_insn "popcntb<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] + UNSPEC_POPCNTB))] + "TARGET_POPCNTB" + "popcntb %0,%1" + [(set_attr "type" "popcnt")]) + +(define_insn "popcntd<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] + "TARGET_POPCNTD" + "popcnt<wd> %0,%1" + [(set_attr "type" "popcnt")]) + + +(define_expand "parity<mode>2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (parity:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))] + "TARGET_POPCNTB" +{ + rs6000_emit_parity (operands[0], operands[1]); + DONE; +}) + +(define_insn "parity<mode>2_cmpb" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))] + "TARGET_CMPB && TARGET_POPCNTB" + "prty<wd> %0,%1" + [(set_attr "type" "popcnt")]) + +(define_insn "cmpb<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")] UNSPEC_CMPB))] + "TARGET_CMPB" + "cmpb %0,%1,%2" + [(set_attr "type" "cmp")]) + +;; Since the hardware zeros the upper part of the register, save generating the +;; AND immediate if we are converting to unsigned +(define_insn "*bswap<mode>2_extenddi" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (zero_extend:DI + (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z"))))] + "TARGET_POWERPC64" + "l<wd>brx %0,%y1" + [(set_attr "length" "4") + (set_attr "type" "load")]) + +(define_insn "*bswaphi2_extendsi" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (zero_extend:SI + (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))] + "" + "lhbrx %0,%y1" + [(set_attr "length" "4") + (set_attr "type" "load")]) + +;; Separate the bswap patterns into load, store, and gpr<-gpr. This prevents +;; the register allocator from converting a gpr<-gpr swap into a store and then +;; load with byte swap, which can be slower than doing it in the registers. It +;; also prevents certain failures with the RELOAD register allocator. + +(define_expand "bswap<mode>2" + [(use (match_operand:HSI 0 "reg_or_mem_operand")) + (use (match_operand:HSI 1 "reg_or_mem_operand"))] + "" +{ + rtx dest = operands[0]; + rtx src = operands[1]; + + if (!REG_P (dest) && !REG_P (src)) + src = force_reg (<MODE>mode, src); + + if (MEM_P (src)) + emit_insn (gen_bswap<mode>2_load (dest, src)); + else if (MEM_P (dest)) + emit_insn (gen_bswap<mode>2_store (dest, src)); + else + emit_insn (gen_bswap<mode>2_reg (dest, src)); + DONE; +}) + +(define_insn "bswap<mode>2_load" + [(set (match_operand:HSI 0 "gpc_reg_operand" "=r") + (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z")))] + "" + "l<wd>brx %0,%y1" + [(set_attr "type" "load")]) + +(define_insn "bswap<mode>2_store" + [(set (match_operand:HSI 0 "memory_operand" "=Z") + (bswap:HSI (match_operand:HSI 1 "gpc_reg_operand" "r")))] + "" + "st<wd>brx %1,%y0" + [(set_attr "type" "store")]) + +(define_insn_and_split "bswaphi2_reg" + [(set (match_operand:HI 0 "gpc_reg_operand" "=&r") + (bswap:HI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (clobber (match_scratch:SI 2 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup 3) + (and:SI (lshiftrt:SI (match_dup 4) + (const_int 8)) + (const_int 255))) + (set (match_dup 2) + (and:SI (ashift:SI (match_dup 4) + (const_int 8)) + (const_int 65280))) ;; 0xff00 + (set (match_dup 3) + (ior:SI (match_dup 3) + (match_dup 2)))] +{ + operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0); + operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0); +} + [(set_attr "length" "12") + (set_attr "type" "*")]) + +;; We are always BITS_BIG_ENDIAN, so the bit positions below in +;; zero_extract insns do not change for -mlittle. +(define_insn_and_split "bswapsi2_reg" + [(set (match_operand:SI 0 "gpc_reg_operand" "=&r") + (bswap:SI + (match_operand:SI 1 "gpc_reg_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 0) ; DABC + (rotate:SI (match_dup 1) + (const_int 24))) + (set (match_dup 0) ; DCBC + (ior:SI (and:SI (ashift:SI (match_dup 1) + (const_int 8)) + (const_int 16711680)) + (and:SI (match_dup 0) + (const_int -16711681)))) + (set (match_dup 0) ; DCBA + (ior:SI (and:SI (lshiftrt:SI (match_dup 1) + (const_int 24)) + (const_int 255)) + (and:SI (match_dup 0) + (const_int -256))))] + "") + +;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like +;; we do for L{H,W}BRX and ST{H,W}BRX above. If not, we have to generate more +;; complex code. + +(define_expand "bswapdi2" + [(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "") + (bswap:DI + (match_operand:DI 1 "reg_or_mem_operand" ""))) + (clobber (match_scratch:DI 2 "")) + (clobber (match_scratch:DI 3 ""))])] + "" +{ + rtx dest = operands[0]; + rtx src = operands[1]; + + if (!REG_P (dest) && !REG_P (src)) + operands[1] = src = force_reg (DImode, src); + + if (TARGET_POWERPC64 && TARGET_LDBRX) + { + if (MEM_P (src)) + emit_insn (gen_bswapdi2_load (dest, src)); + else if (MEM_P (dest)) + emit_insn (gen_bswapdi2_store (dest, src)); + else + emit_insn (gen_bswapdi2_reg (dest, src)); + DONE; + } + + if (!TARGET_POWERPC64) + { + /* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode + that uses 64-bit registers needs the same scratch registers as 64-bit + mode. */ + emit_insn (gen_bswapdi2_32bit (dest, src)); + DONE; + } +}) + +;; Power7/cell has ldbrx/stdbrx, so use it directly +(define_insn "bswapdi2_load" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (bswap:DI (match_operand:DI 1 "memory_operand" "Z")))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "ldbrx %0,%y1" + [(set_attr "type" "load")]) + +(define_insn "bswapdi2_store" + [(set (match_operand:DI 0 "memory_operand" "=Z") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "stdbrx %1,%y0" + [(set_attr "type" "store")]) + +(define_insn "bswapdi2_reg" + [(set (match_operand:DI 0 "gpc_reg_operand" "=&r") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r"))) + (clobber (match_scratch:DI 2 "=&r")) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "#" + [(set_attr "length" "36")]) + +;; Non-power7/cell, fall back to use lwbrx/stwbrx +(define_insn "*bswapdi2_64bit" + [(set (match_operand:DI 0 "reg_or_mem_operand" "=r,Z,&r") + (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r"))) + (clobber (match_scratch:DI 2 "=&b,&b,&r")) + (clobber (match_scratch:DI 3 "=&r,&r,&r"))] + "TARGET_POWERPC64 && !TARGET_LDBRX + && (REG_P (operands[0]) || REG_P (operands[1])) + && !(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0])) + && !(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))" + "#" + [(set_attr "length" "16,12,36")]) + +(define_split + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" ""))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "")) + (clobber (match_operand:DI 3 "gpc_reg_operand" ""))] + "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed" + [(const_int 0)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + rtx op3_32 = simplify_gen_subreg (SImode, op3, DImode, + BYTES_BIG_ENDIAN ? 4 : 0); + rtx dest_32 = simplify_gen_subreg (SImode, dest, DImode, + BYTES_BIG_ENDIAN ? 4 : 0); + rtx addr1; + rtx addr2; + rtx word1; + rtx word2; + + addr1 = XEXP (src, 0); + if (GET_CODE (addr1) == PLUS) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4))); + if (TARGET_AVOID_XFORM) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2)); + addr2 = op2; + } + else + addr2 = gen_rtx_PLUS (Pmode, op2, XEXP (addr1, 1)); + } + else if (TARGET_AVOID_XFORM) + { + emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4))); + addr2 = op2; + } + else + { + emit_move_insn (op2, GEN_INT (4)); + addr2 = gen_rtx_PLUS (Pmode, op2, addr1); + } + + word1 = change_address (src, SImode, addr1); + word2 = change_address (src, SImode, addr2); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_bswapsi2 (op3_32, word2)); + emit_insn (gen_bswapsi2 (dest_32, word1)); + } + else + { + emit_insn (gen_bswapsi2 (op3_32, word1)); + emit_insn (gen_bswapsi2 (dest_32, word2)); + } + + emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32))); + emit_insn (gen_iordi3 (dest, dest, op3)); + DONE; +}") + +(define_split + [(set (match_operand:DI 0 "indexed_or_indirect_operand" "") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" ""))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "")) + (clobber (match_operand:DI 3 "gpc_reg_operand" ""))] + "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed" + [(const_int 0)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + rtx src_si = simplify_gen_subreg (SImode, src, DImode, + BYTES_BIG_ENDIAN ? 4 : 0); + rtx op3_si = simplify_gen_subreg (SImode, op3, DImode, + BYTES_BIG_ENDIAN ? 4 : 0); + rtx addr1; + rtx addr2; + rtx word1; + rtx word2; + + addr1 = XEXP (dest, 0); + if (GET_CODE (addr1) == PLUS) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4))); + if (TARGET_AVOID_XFORM) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2)); + addr2 = op2; + } + else + addr2 = gen_rtx_PLUS (Pmode, op2, XEXP (addr1, 1)); + } + else if (TARGET_AVOID_XFORM) + { + emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4))); + addr2 = op2; + } + else + { + emit_move_insn (op2, GEN_INT (4)); + addr2 = gen_rtx_PLUS (Pmode, op2, addr1); + } + + word1 = change_address (dest, SImode, addr1); + word2 = change_address (dest, SImode, addr2); + + emit_insn (gen_lshrdi3 (op3, src, GEN_INT (32))); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_bswapsi2 (word1, src_si)); + emit_insn (gen_bswapsi2 (word2, op3_si)); + } + else + { + emit_insn (gen_bswapsi2 (word2, src_si)); + emit_insn (gen_bswapsi2 (word1, op3_si)); + } + DONE; +}") + +(define_split + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" ""))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "")) + (clobber (match_operand:DI 3 "gpc_reg_operand" ""))] + "TARGET_POWERPC64 && reload_completed" + [(const_int 0)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + int lo_off = BYTES_BIG_ENDIAN ? 4 : 0; + rtx dest_si = simplify_gen_subreg (SImode, dest, DImode, lo_off); + rtx src_si = simplify_gen_subreg (SImode, src, DImode, lo_off); + rtx op2_si = simplify_gen_subreg (SImode, op2, DImode, lo_off); + rtx op3_si = simplify_gen_subreg (SImode, op3, DImode, lo_off); + + emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32))); + emit_insn (gen_bswapsi2 (dest_si, src_si)); + emit_insn (gen_bswapsi2 (op3_si, op2_si)); + emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32))); + emit_insn (gen_iordi3 (dest, dest, op3)); + DONE; +}") + +(define_insn "bswapdi2_32bit" + [(set (match_operand:DI 0 "reg_or_mem_operand" "=r,Z,?&r") + (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r"))) + (clobber (match_scratch:SI 2 "=&b,&b,X"))] + "!TARGET_POWERPC64 && (REG_P (operands[0]) || REG_P (operands[1]))" + "#" + [(set_attr "length" "16,12,36")]) + +(define_split + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" ""))) + (clobber (match_operand:SI 2 "gpc_reg_operand" ""))] + "!TARGET_POWERPC64 && reload_completed" + [(const_int 0)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx op2 = operands[2]; + rtx dest1 = simplify_gen_subreg (SImode, dest, DImode, 0); + rtx dest2 = simplify_gen_subreg (SImode, dest, DImode, 4); + rtx addr1; + rtx addr2; + rtx word1; + rtx word2; + + addr1 = XEXP (src, 0); + if (GET_CODE (addr1) == PLUS) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4))); + if (TARGET_AVOID_XFORM + || REGNO (XEXP (addr1, 1)) == REGNO (dest2)) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2)); + addr2 = op2; + } + else + addr2 = gen_rtx_PLUS (SImode, op2, XEXP (addr1, 1)); + } + else if (TARGET_AVOID_XFORM + || REGNO (addr1) == REGNO (dest2)) + { + emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4))); + addr2 = op2; + } + else + { + emit_move_insn (op2, GEN_INT (4)); + addr2 = gen_rtx_PLUS (SImode, op2, addr1); + } + + word1 = change_address (src, SImode, addr1); + word2 = change_address (src, SImode, addr2); + + emit_insn (gen_bswapsi2 (dest2, word1)); + /* The REGNO (dest2) tests above ensure that addr2 has not been trashed, + thus allowing us to omit an early clobber on the output. */ + emit_insn (gen_bswapsi2 (dest1, word2)); + DONE; +}") + +(define_split + [(set (match_operand:DI 0 "indexed_or_indirect_operand" "") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" ""))) + (clobber (match_operand:SI 2 "gpc_reg_operand" ""))] + "!TARGET_POWERPC64 && reload_completed" + [(const_int 0)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx op2 = operands[2]; + rtx src1 = simplify_gen_subreg (SImode, src, DImode, 0); + rtx src2 = simplify_gen_subreg (SImode, src, DImode, 4); + rtx addr1; + rtx addr2; + rtx word1; + rtx word2; + + addr1 = XEXP (dest, 0); + if (GET_CODE (addr1) == PLUS) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4))); + if (TARGET_AVOID_XFORM) + { + emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2)); + addr2 = op2; + } + else + addr2 = gen_rtx_PLUS (SImode, op2, XEXP (addr1, 1)); + } + else if (TARGET_AVOID_XFORM) + { + emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4))); + addr2 = op2; + } + else + { + emit_move_insn (op2, GEN_INT (4)); + addr2 = gen_rtx_PLUS (SImode, op2, addr1); + } + + word1 = change_address (dest, SImode, addr1); + word2 = change_address (dest, SImode, addr2); + + emit_insn (gen_bswapsi2 (word2, src1)); + emit_insn (gen_bswapsi2 (word1, src2)); + DONE; +}") + +(define_split + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" ""))) + (clobber (match_operand:SI 2 "" ""))] + "!TARGET_POWERPC64 && reload_completed" + [(const_int 0)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx src1 = simplify_gen_subreg (SImode, src, DImode, 0); + rtx src2 = simplify_gen_subreg (SImode, src, DImode, 4); + rtx dest1 = simplify_gen_subreg (SImode, dest, DImode, 0); + rtx dest2 = simplify_gen_subreg (SImode, dest, DImode, 4); + + emit_insn (gen_bswapsi2 (dest1, src2)); + emit_insn (gen_bswapsi2 (dest2, src1)); + DONE; +}") + + +(define_insn "mul<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "reg_or_short_operand" "r,I")))] + "" + "@ + mull<wd> %0,%1,%2 + mulli %0,%1,%2" + [(set_attr "type" "mul") + (set (attr "size") + (cond [(match_operand:GPR 2 "s8bit_cint_operand" "") + (const_string "8") + (match_operand:GPR 2 "short_cint_operand" "") + (const_string "16")] + (const_string "<bits>")))]) + +(define_insn_and_split "*mul<mode>3_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "gpc_reg_operand" "r,r")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + mull<wd>. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (mult:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "mul") + (set_attr "size" "<bits>") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*mul<mode>3_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (mult:GPR (match_dup 1) + (match_dup 2)))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + mull<wd>. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (mult:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "mul") + (set_attr "size" "<bits>") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_expand "<su>mul<mode>3_highpart" + [(set (match_operand:GPR 0 "gpc_reg_operand") + (subreg:GPR + (mult:<DMODE> (any_extend:<DMODE> + (match_operand:GPR 1 "gpc_reg_operand")) + (any_extend:<DMODE> + (match_operand:GPR 2 "gpc_reg_operand"))) + 0))] + "" +{ + if (<MODE>mode == SImode && TARGET_POWERPC64) + { + emit_insn (gen_<su>mulsi3_highpart_64 (operands[0], operands[1], + operands[2])); + DONE; + } + + if (!WORDS_BIG_ENDIAN) + { + emit_insn (gen_<su>mul<mode>3_highpart_le (operands[0], operands[1], + operands[2])); + DONE; + } +}) + +(define_insn "*<su>mul<mode>3_highpart" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (subreg:GPR + (mult:<DMODE> (any_extend:<DMODE> + (match_operand:GPR 1 "gpc_reg_operand" "r")) + (any_extend:<DMODE> + (match_operand:GPR 2 "gpc_reg_operand" "r"))) + 0))] + "WORDS_BIG_ENDIAN && !(<MODE>mode == SImode && TARGET_POWERPC64)" + "mulh<wd><u> %0,%1,%2" + [(set_attr "type" "mul") + (set_attr "size" "<bits>")]) + +(define_insn "<su>mulsi3_highpart_le" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (subreg:SI + (mult:DI (any_extend:DI + (match_operand:SI 1 "gpc_reg_operand" "r")) + (any_extend:DI + (match_operand:SI 2 "gpc_reg_operand" "r"))) + 4))] + "!WORDS_BIG_ENDIAN && !TARGET_POWERPC64" + "mulhw<u> %0,%1,%2" + [(set_attr "type" "mul")]) + +(define_insn "<su>muldi3_highpart_le" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (subreg:DI + (mult:TI (any_extend:TI + (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI + (match_operand:DI 2 "gpc_reg_operand" "r"))) + 8))] + "!WORDS_BIG_ENDIAN && TARGET_POWERPC64" + "mulhd<u> %0,%1,%2" + [(set_attr "type" "mul") + (set_attr "size" "64")]) + +(define_insn "<su>mulsi3_highpart_64" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI (any_extend:DI + (match_operand:SI 1 "gpc_reg_operand" "r")) + (any_extend:DI + (match_operand:SI 2 "gpc_reg_operand" "r"))) + (const_int 32))))] + "TARGET_POWERPC64" + "mulhw<u> %0,%1,%2" + [(set_attr "type" "mul")]) + +(define_expand "<u>mul<mode><dmode>3" + [(set (match_operand:<DMODE> 0 "gpc_reg_operand") + (mult:<DMODE> (any_extend:<DMODE> + (match_operand:GPR 1 "gpc_reg_operand")) + (any_extend:<DMODE> + (match_operand:GPR 2 "gpc_reg_operand"))))] + "!(<MODE>mode == SImode && TARGET_POWERPC64)" +{ + rtx l = gen_reg_rtx (<MODE>mode); + rtx h = gen_reg_rtx (<MODE>mode); + emit_insn (gen_mul<mode>3 (l, operands[1], operands[2])); + emit_insn (gen_<su>mul<mode>3_highpart (h, operands[1], operands[2])); + emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l); + emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h); + DONE; +}) + +(define_insn "*maddld4" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (plus:DI (mult:DI (match_operand:DI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")) + (match_operand:DI 3 "gpc_reg_operand" "r")))] + "TARGET_MADDLD" + "maddld %0,%1,%2,%3" + [(set_attr "type" "mul")]) + +(define_insn "udiv<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")))] + "" + "div<wd>u %0,%1,%2" + [(set_attr "type" "div") + (set_attr "size" "<bits>")]) + + +;; For powers of two we can do sra[wd]i/addze for divide and then adjust for +;; modulus. If it isn't a power of two, force operands into register and do +;; a normal divide. +(define_expand "div<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "reg_or_cint_operand" "")))] + "" +{ + if (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) > 0 + && exact_log2 (INTVAL (operands[2])) >= 0) + { + emit_insn (gen_div<mode>3_sra (operands[0], operands[1], operands[2])); + DONE; + } + + operands[2] = force_reg (<MODE>mode, operands[2]); +}) + +(define_insn "*div<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")))] + "" + "div<wd> %0,%1,%2" + [(set_attr "type" "div") + (set_attr "size" "<bits>")]) + +(define_insn "div<mode>3_sra" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "exact_log2_cint_operand" "N"))) + (clobber (reg:GPR CA_REGNO))] + "" + "sra<wd>i %0,%1,%p2\;addze %0,%0" + [(set_attr "type" "two") + (set_attr "length" "8")]) + +(define_insn_and_split "*div<mode>3_sra_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "exact_log2_cint_operand" "N,N")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r")) + (clobber (reg:GPR CA_REGNO))] + "<MODE>mode == Pmode" + "@ + sra<wd>i %0,%1,%p2\;addze. %0,%0 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (div:GPR (match_dup 1) + (match_dup 2))) + (clobber (reg:GPR CA_REGNO))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "two") + (set_attr "length" "8,12") + (set_attr "cell_micro" "not")]) + +(define_insn_and_split "*div<mode>3_sra_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "exact_log2_cint_operand" "N,N")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (div:GPR (match_dup 1) + (match_dup 2))) + (clobber (reg:GPR CA_REGNO))] + "<MODE>mode == Pmode" + "@ + sra<wd>i %0,%1,%p2\;addze. %0,%0 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (div:GPR (match_dup 1) + (match_dup 2))) + (clobber (reg:GPR CA_REGNO))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "two") + (set_attr "length" "8,12") + (set_attr "cell_micro" "not")]) + +(define_expand "mod<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand") + (mod:GPR (match_operand:GPR 1 "gpc_reg_operand") + (match_operand:GPR 2 "reg_or_cint_operand")))] + "" +{ + int i; + rtx temp1; + rtx temp2; + + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) <= 0 + || (i = exact_log2 (INTVAL (operands[2]))) < 0) + { + if (!TARGET_MODULO) + FAIL; + + operands[2] = force_reg (<MODE>mode, operands[2]); + } + else + { + temp1 = gen_reg_rtx (<MODE>mode); + temp2 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2])); + emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i))); + emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2)); + DONE; + } +}) + +;; In order to enable using a peephole2 for combining div/mod to eliminate the +;; mod, prefer putting the result of mod into a different register +(define_insn "*mod<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r") + (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")))] + "TARGET_MODULO" + "mods<wd> %0,%1,%2" + [(set_attr "type" "div") + (set_attr "size" "<bits>")]) + + +(define_insn "umod<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r") + (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")))] + "TARGET_MODULO" + "modu<wd> %0,%1,%2" + [(set_attr "type" "div") + (set_attr "size" "<bits>")]) + +;; On machines with modulo support, do a combined div/mod the old fashioned +;; method, since the multiply/subtract is faster than doing the mod instruction +;; after a divide. + +(define_peephole2 + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "gpc_reg_operand" ""))) + (set (match_operand:GPR 3 "gpc_reg_operand" "") + (mod:GPR (match_dup 1) + (match_dup 2)))] + "TARGET_MODULO + && ! reg_mentioned_p (operands[0], operands[1]) + && ! reg_mentioned_p (operands[0], operands[2]) + && ! reg_mentioned_p (operands[3], operands[1]) + && ! reg_mentioned_p (operands[3], operands[2])" + [(set (match_dup 0) + (div:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (mult:GPR (match_dup 0) + (match_dup 2))) + (set (match_dup 3) + (minus:GPR (match_dup 1) + (match_dup 3)))]) + +(define_peephole2 + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "gpc_reg_operand" ""))) + (set (match_operand:GPR 3 "gpc_reg_operand" "") + (umod:GPR (match_dup 1) + (match_dup 2)))] + "TARGET_MODULO + && ! reg_mentioned_p (operands[0], operands[1]) + && ! reg_mentioned_p (operands[0], operands[2]) + && ! reg_mentioned_p (operands[3], operands[1]) + && ! reg_mentioned_p (operands[3], operands[2])" + [(set (match_dup 0) + (udiv:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (mult:GPR (match_dup 0) + (match_dup 2))) + (set (match_dup 3) + (minus:GPR (match_dup 1) + (match_dup 3)))]) + + +;; Logical instructions +;; The logical instructions are mostly combined by using match_operator, +;; but the plain AND insns are somewhat different because there is no +;; plain 'andi' (only 'andi.'), no plain 'andis', and there are all +;; those rotate-and-mask operations. Thus, the AND insns come first. + +(define_expand "and<mode>3" + [(set (match_operand:SDI 0 "gpc_reg_operand" "") + (and:SDI (match_operand:SDI 1 "gpc_reg_operand" "") + (match_operand:SDI 2 "reg_or_cint_operand" "")))] + "" +{ + if (<MODE>mode == DImode && !TARGET_POWERPC64) + { + rs6000_split_logical (operands, AND, false, false, false); + DONE; + } + + if (CONST_INT_P (operands[2])) + { + if (rs6000_is_valid_and_mask (operands[2], <MODE>mode)) + { + emit_insn (gen_and<mode>3_mask (operands[0], operands[1], operands[2])); + DONE; + } + + if (logical_const_operand (operands[2], <MODE>mode) + && rs6000_gen_cell_microcode) + { + emit_insn (gen_and<mode>3_imm (operands[0], operands[1], operands[2])); + DONE; + } + + if (rs6000_is_valid_2insn_and (operands[2], <MODE>mode)) + { + rs6000_emit_2insn_and (<MODE>mode, operands, true, 0); + DONE; + } + + operands[2] = force_reg (<MODE>mode, operands[2]); + } +}) + + +(define_insn "and<mode>3_imm" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r") + (match_operand:GPR 2 "logical_const_operand" "n"))) + (clobber (match_scratch:CC 3 "=x"))] + "rs6000_gen_cell_microcode + && !rs6000_is_valid_and_mask (operands[2], <MODE>mode)" + "andi%e2. %0,%1,%u2" + [(set_attr "type" "logical") + (set_attr "dot" "yes")]) + +(define_insn_and_split "*and<mode>3_imm_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,??y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "logical_const_operand" "n,n")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r")) + (clobber (match_scratch:CC 4 "=X,x"))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && !rs6000_is_valid_and_mask (operands[2], <MODE>mode)" + "@ + andi%e2. %0,%1,%u2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (and:GPR (match_dup 1) + (match_dup 2))) + (clobber (match_dup 4))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*and<mode>3_imm_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,??y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "logical_const_operand" "n,n")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (and:GPR (match_dup 1) + (match_dup 2))) + (clobber (match_scratch:CC 4 "=X,x"))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && !rs6000_is_valid_and_mask (operands[2], <MODE>mode)" + "@ + andi%e2. %0,%1,%u2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (and:GPR (match_dup 1) + (match_dup 2))) + (clobber (match_dup 4))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*and<mode>3_imm_mask_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,??y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "logical_const_operand" "n,n")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && rs6000_is_valid_and_mask (operands[2], <MODE>mode)" + "@ + andi%e2. %0,%1,%u2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (and:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*and<mode>3_imm_mask_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,??y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "logical_const_operand" "n,n")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (and:GPR (match_dup 1) + (match_dup 2)))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && rs6000_is_valid_and_mask (operands[2], <MODE>mode)" + "@ + andi%e2. %0,%1,%u2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (and:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn "*and<mode>3_imm_dot_shifted" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (compare:CC + (and:GPR + (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r") + (match_operand:SI 4 "const_int_operand" "n")) + (match_operand:GPR 2 "const_int_operand" "n")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r"))] + "logical_const_operand (GEN_INT (UINTVAL (operands[2]) + << INTVAL (operands[4])), + DImode) + && (<MODE>mode == Pmode + || (UINTVAL (operands[2]) << INTVAL (operands[4])) <= 0x7fffffff) + && rs6000_gen_cell_microcode" +{ + operands[2] = GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4])); + return "andi%e2. %0,%1,%u2"; +} + [(set_attr "type" "logical") + (set_attr "dot" "yes")]) + + +(define_insn "and<mode>3_mask" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r") + (match_operand:GPR 2 "const_int_operand" "n")))] + "rs6000_is_valid_and_mask (operands[2], <MODE>mode)" +{ + return rs6000_insn_for_and_mask (<MODE>mode, operands, false); +} + [(set_attr "type" "shift")]) + +(define_insn_and_split "*and<mode>3_mask_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "const_int_operand" "n,n")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && !logical_const_operand (operands[2], <MODE>mode) + && rs6000_is_valid_and_mask (operands[2], <MODE>mode)" +{ + if (which_alternative == 0) + return rs6000_insn_for_and_mask (<MODE>mode, operands, true); + else + return "#"; +} + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (and:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*and<mode>3_mask_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "const_int_operand" "n,n")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (and:GPR (match_dup 1) + (match_dup 2)))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && !logical_const_operand (operands[2], <MODE>mode) + && rs6000_is_valid_and_mask (operands[2], <MODE>mode)" +{ + if (which_alternative == 0) + return rs6000_insn_for_and_mask (<MODE>mode, operands, true); + else + return "#"; +} + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (and:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn_and_split "*and<mode>3_2insn" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r") + (match_operand:GPR 2 "const_int_operand" "n")))] + "rs6000_is_valid_2insn_and (operands[2], <MODE>mode) + && !(rs6000_is_valid_and_mask (operands[2], <MODE>mode) + || (logical_const_operand (operands[2], <MODE>mode) + && rs6000_gen_cell_microcode))" + "#" + "&& 1" + [(pc)] +{ + rs6000_emit_2insn_and (<MODE>mode, operands, false, 0); + DONE; +} + [(set_attr "type" "shift") + (set_attr "length" "8")]) + +(define_insn_and_split "*and<mode>3_2insn_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "const_int_operand" "n,n")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && rs6000_is_valid_2insn_and (operands[2], <MODE>mode) + && !(rs6000_is_valid_and_mask (operands[2], <MODE>mode) + || (logical_const_operand (operands[2], <MODE>mode) + && rs6000_gen_cell_microcode))" + "#" + "&& reload_completed" + [(pc)] +{ + rs6000_emit_2insn_and (<MODE>mode, operands, false, 1); + DONE; +} + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "8,12")]) + +(define_insn_and_split "*and<mode>3_2insn_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") + (match_operand:GPR 2 "const_int_operand" "n,n")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (and:GPR (match_dup 1) + (match_dup 2)))] + "(<MODE>mode == Pmode || UINTVAL (operands[2]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && rs6000_is_valid_2insn_and (operands[2], <MODE>mode) + && !(rs6000_is_valid_and_mask (operands[2], <MODE>mode) + || (logical_const_operand (operands[2], <MODE>mode) + && rs6000_gen_cell_microcode))" + "#" + "&& reload_completed" + [(pc)] +{ + rs6000_emit_2insn_and (<MODE>mode, operands, false, 2); + DONE; +} + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "8,12")]) + + +(define_expand "<code><mode>3" + [(set (match_operand:SDI 0 "gpc_reg_operand" "") + (iorxor:SDI (match_operand:SDI 1 "gpc_reg_operand" "") + (match_operand:SDI 2 "reg_or_cint_operand" "")))] + "" +{ + if (<MODE>mode == DImode && !TARGET_POWERPC64) + { + rs6000_split_logical (operands, <CODE>, false, false, false); + DONE; + } + + if (non_logical_cint_operand (operands[2], <MODE>mode)) + { + rtx tmp = ((!can_create_pseudo_p () + || rtx_equal_p (operands[0], operands[1])) + ? operands[0] : gen_reg_rtx (<MODE>mode)); + + HOST_WIDE_INT value = INTVAL (operands[2]); + HOST_WIDE_INT lo = value & 0xffff; + HOST_WIDE_INT hi = value - lo; + + emit_insn (gen_<code><mode>3 (tmp, operands[1], GEN_INT (hi))); + emit_insn (gen_<code><mode>3 (operands[0], tmp, GEN_INT (lo))); + DONE; + } + + if (!reg_or_logical_cint_operand (operands[2], <MODE>mode)) + operands[2] = force_reg (<MODE>mode, operands[2]); +}) + +(define_split + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (iorxor:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "non_logical_cint_operand" "")))] + "" + [(set (match_dup 3) + (iorxor:GPR (match_dup 1) + (match_dup 4))) + (set (match_dup 0) + (iorxor:GPR (match_dup 3) + (match_dup 5)))] +{ + operands[3] = ((!can_create_pseudo_p () + || rtx_equal_p (operands[0], operands[1])) + ? operands[0] : gen_reg_rtx (<MODE>mode)); + + HOST_WIDE_INT value = INTVAL (operands[2]); + HOST_WIDE_INT lo = value & 0xffff; + HOST_WIDE_INT hi = value - lo; + + operands[4] = GEN_INT (hi); + operands[5] = GEN_INT (lo); +}) + +(define_insn "*bool<mode>3_imm" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (match_operator:GPR 3 "boolean_or_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "%r") + (match_operand:GPR 2 "logical_const_operand" "n")]))] + "" + "%q3i%e2 %0,%1,%u2" + [(set_attr "type" "logical")]) + +(define_insn "*bool<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (match_operator:GPR 3 "boolean_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")]))] + "" + "%q3 %0,%1,%2" + [(set_attr "type" "logical")]) + +(define_insn_and_split "*bool<mode>3_dot" + [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + (compare:CC (match_operator:GPR 3 "boolean_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "gpc_reg_operand" "r,r")]) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + %q3. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 4) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*bool<mode>3_dot2" + [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + (compare:CC (match_operator:GPR 3 "boolean_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:GPR 2 "gpc_reg_operand" "r,r")]) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (match_dup 3))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + %q3. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 4) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "*boolc<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (match_operator:GPR 3 "boolean_operator" + [(not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r")) + (match_operand:GPR 1 "gpc_reg_operand" "r")]))] + "" + "%q3 %0,%1,%2" + [(set_attr "type" "logical")]) + +(define_insn_and_split "*boolc<mode>3_dot" + [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + (compare:CC (match_operator:GPR 3 "boolean_operator" + [(not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r")) + (match_operand:GPR 1 "gpc_reg_operand" "r,r")]) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + %q3. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 4) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*boolc<mode>3_dot2" + [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + (compare:CC (match_operator:GPR 3 "boolean_operator" + [(not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r")) + (match_operand:GPR 1 "gpc_reg_operand" "r,r")]) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (match_dup 3))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + %q3. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 4) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "*boolcc<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (match_operator:GPR 3 "boolean_operator" + [(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")) + (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r"))]))] + "" + "%q3 %0,%1,%2" + [(set_attr "type" "logical")]) + +(define_insn_and_split "*boolcc<mode>3_dot" + [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + (compare:CC (match_operator:GPR 3 "boolean_operator" + [(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r"))]) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + %q3. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 4) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*boolcc<mode>3_dot2" + [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y") + (compare:CC (match_operator:GPR 3 "boolean_operator" + [(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")) + (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r"))]) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (match_dup 3))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + %q3. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[4], CCmode)" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 4) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "logical") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +;; TODO: Should have dots of this as well. +(define_insn "*eqv<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (not:GPR (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r"))))] + "" + "eqv %0,%1,%2" + [(set_attr "type" "logical")]) + +;; Rotate-and-mask and insert. + +(define_insn "*rotl<mode>3_mask" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (and:GPR (match_operator:GPR 4 "rotate_mask_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn")]) + (match_operand:GPR 3 "const_int_operand" "n")))] + "rs6000_is_valid_shift_mask (operands[3], operands[4], <MODE>mode)" +{ + return rs6000_insn_for_shift_mask (<MODE>mode, operands, false); +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn_and_split "*rotl<mode>3_mask_dot" + [(set (match_operand:CC 5 "cc_reg_operand" "=x,?y") + (compare:CC + (and:GPR (match_operator:GPR 4 "rotate_mask_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")]) + (match_operand:GPR 3 "const_int_operand" "n,n")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "(<MODE>mode == Pmode || UINTVAL (operands[3]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && rs6000_is_valid_shift_mask (operands[3], operands[4], <MODE>mode)" +{ + if (which_alternative == 0) + return rs6000_insn_for_shift_mask (<MODE>mode, operands, true); + else + return "#"; +} + "&& reload_completed && cc_reg_not_cr0_operand (operands[5], CCmode)" + [(set (match_dup 0) + (and:GPR (match_dup 4) + (match_dup 3))) + (set (match_dup 5) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*rotl<mode>3_mask_dot2" + [(set (match_operand:CC 5 "cc_reg_operand" "=x,?y") + (compare:CC + (and:GPR (match_operator:GPR 4 "rotate_mask_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")]) + (match_operand:GPR 3 "const_int_operand" "n,n")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (and:GPR (match_dup 4) + (match_dup 3)))] + "(<MODE>mode == Pmode || UINTVAL (operands[3]) <= 0x7fffffff) + && rs6000_gen_cell_microcode + && rs6000_is_valid_shift_mask (operands[3], operands[4], <MODE>mode)" +{ + if (which_alternative == 0) + return rs6000_insn_for_shift_mask (<MODE>mode, operands, true); + else + return "#"; +} + "&& reload_completed && cc_reg_not_cr0_operand (operands[5], CCmode)" + [(set (match_dup 0) + (and:GPR (match_dup 4) + (match_dup 3))) + (set (match_dup 5) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +; Special case for less-than-0. We can do it with just one machine +; instruction, but the generic optimizers do not realise it is cheap. +(define_insn "*lt0_disi" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (lt:DI (match_operand:SI 1 "gpc_reg_operand" "r") + (const_int 0)))] + "TARGET_POWERPC64" + "rlwinm %0,%1,1,31,31" + [(set_attr "type" "shift")]) + + + +; Two forms for insert (the two arms of the IOR are not canonicalized, +; both are an AND so are the same precedence). +(define_insn "*rotl<mode>3_insert" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ior:GPR (and:GPR (match_operator:GPR 4 "rotate_mask_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")]) + (match_operand:GPR 3 "const_int_operand" "n")) + (and:GPR (match_operand:GPR 5 "gpc_reg_operand" "0") + (match_operand:GPR 6 "const_int_operand" "n"))))] + "rs6000_is_valid_insert_mask (operands[3], operands[4], <MODE>mode) + && UINTVAL (operands[3]) + UINTVAL (operands[6]) + 1 == 0" +{ + return rs6000_insn_for_insert_mask (<MODE>mode, operands, false); +} + [(set_attr "type" "insert")]) +; FIXME: this needs an attr "size", so that the scheduler can see the +; difference between rlwimi and rldimi. We also might want dot forms, +; but not for rlwimi on POWER4 and similar processors. + +(define_insn "*rotl<mode>3_insert_2" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ior:GPR (and:GPR (match_operand:GPR 5 "gpc_reg_operand" "0") + (match_operand:GPR 6 "const_int_operand" "n")) + (and:GPR (match_operator:GPR 4 "rotate_mask_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")]) + (match_operand:GPR 3 "const_int_operand" "n"))))] + "rs6000_is_valid_insert_mask (operands[3], operands[4], <MODE>mode) + && UINTVAL (operands[3]) + UINTVAL (operands[6]) + 1 == 0" +{ + return rs6000_insn_for_insert_mask (<MODE>mode, operands, false); +} + [(set_attr "type" "insert")]) + +; There are also some forms without one of the ANDs. +(define_insn "*rotl<mode>3_insert_3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0") + (match_operand:GPR 4 "const_int_operand" "n")) + (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "n"))))] + "INTVAL (operands[2]) == exact_log2 (UINTVAL (operands[4]) + 1)" +{ + if (<MODE>mode == SImode) + return "rlwimi %0,%1,%h2,0,31-%h2"; + else + return "rldimi %0,%1,%H2,0"; +} + [(set_attr "type" "insert")]) + +(define_insn "*rotl<mode>3_insert_4" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0") + (match_operand:GPR 4 "const_int_operand" "n")) + (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "n"))))] + "<MODE>mode == SImode && + GET_MODE_PRECISION (<MODE>mode) + == INTVAL (operands[2]) + exact_log2 (-UINTVAL (operands[4]))" +{ + operands[2] = GEN_INT (GET_MODE_PRECISION (<MODE>mode) + - INTVAL (operands[2])); + if (<MODE>mode == SImode) + return "rlwimi %0,%1,%h2,32-%h2,31"; + else + return "rldimi %0,%1,%H2,64-%H2"; +} + [(set_attr "type" "insert")]) + +(define_insn "*rotlsi3_insert_5" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (ior:SI (and:SI (match_operand:SI 1 "gpc_reg_operand" "0,r") + (match_operand:SI 2 "const_int_operand" "n,n")) + (and:SI (match_operand:SI 3 "gpc_reg_operand" "r,0") + (match_operand:SI 4 "const_int_operand" "n,n"))))] + "rs6000_is_valid_mask (operands[2], NULL, NULL, SImode) + && UINTVAL (operands[2]) != 0 && UINTVAL (operands[4]) != 0 + && UINTVAL (operands[2]) + UINTVAL (operands[4]) + 1 == 0" + "@ + rlwimi %0,%3,0,%4 + rlwimi %0,%1,0,%2" + [(set_attr "type" "insert")]) + +(define_insn "*rotldi3_insert_6" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (ior:DI (and:DI (match_operand:DI 1 "gpc_reg_operand" "0") + (match_operand:DI 2 "const_int_operand" "n")) + (and:DI (match_operand:DI 3 "gpc_reg_operand" "r") + (match_operand:DI 4 "const_int_operand" "n"))))] + "exact_log2 (-UINTVAL (operands[2])) > 0 + && UINTVAL (operands[2]) + UINTVAL (operands[4]) + 1 == 0" +{ + operands[5] = GEN_INT (64 - exact_log2 (-UINTVAL (operands[2]))); + return "rldimi %0,%3,0,%5"; +} + [(set_attr "type" "insert") + (set_attr "size" "64")]) + +(define_insn "*rotldi3_insert_7" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (ior:DI (and:DI (match_operand:DI 3 "gpc_reg_operand" "r") + (match_operand:DI 4 "const_int_operand" "n")) + (and:DI (match_operand:DI 1 "gpc_reg_operand" "0") + (match_operand:DI 2 "const_int_operand" "n"))))] + "exact_log2 (-UINTVAL (operands[2])) > 0 + && UINTVAL (operands[2]) + UINTVAL (operands[4]) + 1 == 0" +{ + operands[5] = GEN_INT (64 - exact_log2 (-UINTVAL (operands[2]))); + return "rldimi %0,%3,0,%5"; +} + [(set_attr "type" "insert") + (set_attr "size" "64")]) + + +; This handles the important case of multiple-precision shifts. There is +; no canonicalization rule for ASHIFT vs. LSHIFTRT, so two patterns. +(define_split + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ior:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand") + (match_operand:SI 3 "const_int_operand")) + (lshiftrt:GPR (match_operand:GPR 2 "gpc_reg_operand") + (match_operand:SI 4 "const_int_operand"))))] + "can_create_pseudo_p () + && INTVAL (operands[3]) + INTVAL (operands[4]) + >= GET_MODE_PRECISION (<MODE>mode)" + [(set (match_dup 5) + (lshiftrt:GPR (match_dup 2) + (match_dup 4))) + (set (match_dup 0) + (ior:GPR (and:GPR (match_dup 5) + (match_dup 6)) + (ashift:GPR (match_dup 1) + (match_dup 3))))] +{ + unsigned HOST_WIDE_INT mask = 1; + mask = (mask << INTVAL (operands[3])) - 1; + operands[5] = gen_reg_rtx (<MODE>mode); + operands[6] = GEN_INT (mask); +}) + +(define_split + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ior:GPR (lshiftrt:GPR (match_operand:GPR 2 "gpc_reg_operand") + (match_operand:SI 4 "const_int_operand")) + (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand") + (match_operand:SI 3 "const_int_operand"))))] + "can_create_pseudo_p () + && INTVAL (operands[3]) + INTVAL (operands[4]) + >= GET_MODE_PRECISION (<MODE>mode)" + [(set (match_dup 5) + (lshiftrt:GPR (match_dup 2) + (match_dup 4))) + (set (match_dup 0) + (ior:GPR (and:GPR (match_dup 5) + (match_dup 6)) + (ashift:GPR (match_dup 1) + (match_dup 3))))] +{ + unsigned HOST_WIDE_INT mask = 1; + mask = (mask << INTVAL (operands[3])) - 1; + operands[5] = gen_reg_rtx (<MODE>mode); + operands[6] = GEN_INT (mask); +}) + + +; Another important case is setting some bits to 1; we can do that with +; an insert instruction, in many cases. +(define_insn_and_split "*ior<mode>_mask" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "0") + (match_operand:GPR 2 "const_int_operand" "n"))) + (clobber (match_scratch:GPR 3 "=r"))] + "!logical_const_operand (operands[2], <MODE>mode) + && rs6000_is_valid_mask (operands[2], NULL, NULL, <MODE>mode)" + "#" + "&& 1" + [(set (match_dup 3) + (const_int -1)) + (set (match_dup 0) + (ior:GPR (and:GPR (rotate:GPR (match_dup 3) + (match_dup 4)) + (match_dup 2)) + (and:GPR (match_dup 1) + (match_dup 5))))] +{ + int nb, ne; + rs6000_is_valid_mask (operands[2], &nb, &ne, <MODE>mode); + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = GEN_INT (ne); + operands[5] = GEN_INT (~UINTVAL (operands[2])); +} + [(set_attr "type" "two") + (set_attr "length" "8")]) + + +;; Now the simple shifts. + +(define_insn "rotl<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (rotate:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn")))] + "" + "rotl<wd>%I2 %0,%1,%<hH>2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn "*rotlsi3_64" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn"))))] + "TARGET_POWERPC64" + "rotlw%I2 %0,%1,%h2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn_and_split "*rotl<mode>3_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (rotate:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + rotl<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (rotate:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*rotl<mode>3_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (rotate:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (rotate:GPR (match_dup 1) + (match_dup 2)))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + rotl<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (rotate:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "ashl<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn")))] + "" + "sl<wd>%I2 %0,%1,%<hH>2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn "*ashlsi3_64" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn"))))] + "TARGET_POWERPC64" + "slw%I2 %0,%1,%h2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn_and_split "*ashl<mode>3_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + sl<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (ashift:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*ashl<mode>3_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (ashift:GPR (match_dup 1) + (match_dup 2)))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + sl<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (ashift:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +;; Pretend we have a memory form of extswsli until register allocation is done +;; so that we use LWZ to load the value from memory, instead of LWA. +(define_insn_and_split "ashdi3_extswsli" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (ashift:DI + (sign_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "r,m")) + (match_operand:DI 2 "u6bit_cint_operand" "n,n")))] + "TARGET_EXTSWSLI" + "@ + extswsli %0,%1,%2 + #" + "&& reload_completed && MEM_P (operands[1])" + [(set (match_dup 3) + (match_dup 1)) + (set (match_dup 0) + (ashift:DI (sign_extend:DI (match_dup 3)) + (match_dup 2)))] +{ + operands[3] = gen_lowpart (SImode, operands[0]); +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "no")]) + + +(define_insn_and_split "ashdi3_extswsli_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y,?x,??y") + (compare:CC + (ashift:DI + (sign_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "r,r,m,m")) + (match_operand:DI 2 "u6bit_cint_operand" "n,n,n,n")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r,r,r,r"))] + "TARGET_EXTSWSLI" + "@ + extswsli. %0,%1,%2 + # + # + #" + "&& reload_completed + && (cc_reg_not_cr0_operand (operands[3], CCmode) + || memory_operand (operands[1], SImode))" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx shift = operands[2]; + rtx cr = operands[3]; + rtx src2; + + if (!MEM_P (src)) + src2 = src; + else + { + src2 = gen_lowpart (SImode, dest); + emit_move_insn (src2, src); + } + + if (REGNO (cr) == CR0_REGNO) + { + emit_insn (gen_ashdi3_extswsli_dot2 (dest, src2, shift, cr)); + DONE; + } + + emit_insn (gen_ashdi3_extswsli (dest, src2, shift)); + emit_insn (gen_rtx_SET (cr, gen_rtx_COMPARE (CCmode, dest, const0_rtx))); + DONE; +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "no") + (set_attr "dot" "yes") + (set_attr "length" "4,8,8,12")]) + +(define_insn_and_split "ashdi3_extswsli_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y,?x,??y") + (compare:CC + (ashift:DI + (sign_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "r,r,m,m")) + (match_operand:DI 2 "u6bit_cint_operand" "n,n,n,n")) + (const_int 0))) + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") + (ashift:DI (sign_extend:DI (match_dup 1)) + (match_dup 2)))] + "TARGET_EXTSWSLI" + "@ + extswsli. %0,%1,%2 + # + # + #" + "&& reload_completed + && (cc_reg_not_cr0_operand (operands[3], CCmode) + || memory_operand (operands[1], SImode))" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx shift = operands[2]; + rtx cr = operands[3]; + rtx src2; + + if (!MEM_P (src)) + src2 = src; + else + { + src2 = gen_lowpart (SImode, dest); + emit_move_insn (src2, src); + } + + if (REGNO (cr) == CR0_REGNO) + { + emit_insn (gen_ashdi3_extswsli_dot2 (dest, src2, shift, cr)); + DONE; + } + + emit_insn (gen_ashdi3_extswsli (dest, src2, shift)); + emit_insn (gen_rtx_SET (cr, gen_rtx_COMPARE (CCmode, dest, const0_rtx))); + DONE; +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "no") + (set_attr "dot" "yes") + (set_attr "length" "4,8,8,12")]) + +(define_insn "lshr<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn")))] + "" + "sr<wd>%I2 %0,%1,%<hH>2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn "*lshrsi3_64" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (zero_extend:DI + (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn"))))] + "TARGET_POWERPC64" + "srw%I2 %0,%1,%h2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn_and_split "*lshr<mode>3_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r"))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + sr<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (lshiftrt:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*lshr<mode>3_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (lshiftrt:GPR (match_dup 1) + (match_dup 2)))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + sr<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(set (match_dup 0) + (lshiftrt:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + + +(define_insn "ashr<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (ashiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn"))) + (clobber (reg:GPR CA_REGNO))] + "" + "sra<wd>%I2 %0,%1,%<hH>2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn "*ashrsi3_64" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (sign_extend:DI + (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "reg_or_cint_operand" "rn")))) + (clobber (reg:SI CA_REGNO))] + "TARGET_POWERPC64" + "sraw%I2 %0,%1,%h2" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes")]) + +(define_insn_and_split "*ashr<mode>3_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (ashiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (clobber (match_scratch:GPR 0 "=r,r")) + (clobber (reg:GPR CA_REGNO))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + sra<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (ashiftrt:GPR (match_dup 1) + (match_dup 2))) + (clobber (reg:GPR CA_REGNO))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +(define_insn_and_split "*ashr<mode>3_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (ashiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "rn,rn")) + (const_int 0))) + (set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (ashiftrt:GPR (match_dup 1) + (match_dup 2))) + (clobber (reg:GPR CA_REGNO))] + "<MODE>mode == Pmode && rs6000_gen_cell_microcode" + "@ + sra<wd>%I2. %0,%1,%<hH>2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (ashiftrt:GPR (match_dup 1) + (match_dup 2))) + (clobber (reg:GPR CA_REGNO))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "yes") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) + +;; Builtins to replace a division to generate FRE reciprocal estimate +;; instructions and the necessary fixup instructions +(define_expand "recip<mode>3" + [(match_operand:RECIPF 0 "gpc_reg_operand" "") + (match_operand:RECIPF 1 "gpc_reg_operand" "") + (match_operand:RECIPF 2 "gpc_reg_operand" "")] + "RS6000_RECIP_HAVE_RE_P (<MODE>mode)" +{ + rs6000_emit_swdiv (operands[0], operands[1], operands[2], false); + DONE; +}) + +;; Split to create division from FRE/FRES/etc. and fixup instead of the normal +;; hardware division. This is only done before register allocation and with +;; -ffast-math. This must appear before the divsf3/divdf3 insns. +;; We used to also check optimize_insn_for_speed_p () but problems with guessed +;; frequencies (pr68212/pr77536) yields that unreliable so it was removed. +(define_split + [(set (match_operand:RECIPF 0 "gpc_reg_operand" "") + (div:RECIPF (match_operand 1 "gpc_reg_operand" "") + (match_operand 2 "gpc_reg_operand" "")))] + "RS6000_RECIP_AUTO_RE_P (<MODE>mode) + && can_create_pseudo_p () && flag_finite_math_only + && !flag_trapping_math && flag_reciprocal_math" + [(const_int 0)] +{ + rs6000_emit_swdiv (operands[0], operands[1], operands[2], true); + DONE; +}) + +;; Builtins to replace 1/sqrt(x) with instructions using RSQRTE and the +;; appropriate fixup. +(define_expand "rsqrt<mode>2" + [(match_operand:RECIPF 0 "gpc_reg_operand" "") + (match_operand:RECIPF 1 "gpc_reg_operand" "")] + "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)" +{ + rs6000_emit_swsqrt (operands[0], operands[1], 1); + DONE; +}) + +;; Floating-point insns, excluding normal data motion. We combine the SF/DF +;; modes here, and also add in conditional vsx/power8-vector support to access +;; values in the traditional Altivec registers if the appropriate +;; -mupper-regs-{df,sf} option is enabled. + +(define_expand "abs<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))] + "TARGET_<MODE>_INSN" + "") + +(define_insn "*abs<mode>2_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>") + (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))] + "TARGET_<MODE>_FPR" + "@ + fabs %0,%1 + xsabsdp %x0,%x1" + [(set_attr "type" "fpsimple") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_insn "*nabs<mode>2_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>") + (neg:SFDF + (abs:SFDF + (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))))] + "TARGET_<MODE>_FPR" + "@ + fnabs %0,%1 + xsnabsdp %x0,%x1" + [(set_attr "type" "fpsimple") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_expand "neg<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))] + "TARGET_<MODE>_INSN" + "") + +(define_insn "*neg<mode>2_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>") + (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))] + "TARGET_<MODE>_FPR" + "@ + fneg %0,%1 + xsnegdp %x0,%x1" + [(set_attr "type" "fpsimple") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_expand "add<mode>3" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "") + (match_operand:SFDF 2 "gpc_reg_operand" "")))] + "TARGET_<MODE>_INSN" + "") + +(define_insn "*add<mode>3_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>") + (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>")))] + "TARGET_<MODE>_FPR" + "@ + fadd<Ftrad> %0,%1,%2 + xsadd<Fvsx> %x0,%x1,%x2" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_expand "sub<mode>3" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "") + (match_operand:SFDF 2 "gpc_reg_operand" "")))] + "TARGET_<MODE>_INSN" + "") + +(define_insn "*sub<mode>3_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>") + (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>")))] + "TARGET_<MODE>_FPR" + "@ + fsub<Ftrad> %0,%1,%2 + xssub<Fvsx> %x0,%x1,%x2" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_expand "mul<mode>3" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "") + (match_operand:SFDF 2 "gpc_reg_operand" "")))] + "TARGET_<MODE>_INSN" + "") + +(define_insn "*mul<mode>3_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>") + (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>")))] + "TARGET_<MODE>_FPR" + "@ + fmul<Ftrad> %0,%1,%2 + xsmul<Fvsx> %x0,%x1,%x2" + [(set_attr "type" "dmul") + (set_attr "fp_type" "fp_mul_<Fs>")]) + +(define_expand "div<mode>3" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "") + (match_operand:SFDF 2 "gpc_reg_operand" "")))] + "TARGET_<MODE>_INSN && !TARGET_SIMPLE_FPU" +{ + if (RS6000_RECIP_AUTO_RE_P (<MODE>mode) + && can_create_pseudo_p () && flag_finite_math_only + && !flag_trapping_math && flag_reciprocal_math) + { + rs6000_emit_swdiv (operands[0], operands[1], operands[2], true); + DONE; + } +}) + +(define_insn "*div<mode>3_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>") + (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>")))] + "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU" + "@ + fdiv<Ftrad> %0,%1,%2 + xsdiv<Fvsx> %x0,%x1,%x2" + [(set_attr "type" "<Fs>div") + (set_attr "fp_type" "fp_div_<Fs>")]) + +(define_insn "*sqrt<mode>2_internal" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>") + (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>")))] + "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU + && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))" + "@ + fsqrt<Ftrad> %0,%1 + xssqrt<Fvsx> %x0,%x1" + [(set_attr "type" "<Fs>sqrt") + (set_attr "fp_type" "fp_sqrt_<Fs>")]) + +(define_expand "sqrt<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))] + "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU + && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))" +{ + if (<MODE>mode == SFmode + && TARGET_RECIP_PRECISION + && RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode) + && !optimize_function_for_size_p (cfun) + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + rs6000_emit_swsqrt (operands[0], operands[1], 0); + DONE; + } +}) + +;; Floating point reciprocal approximation +(define_insn "fre<Fs>" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>")] + UNSPEC_FRES))] + "TARGET_<FFRE>" + "@ + fre<Ftrad> %0,%1 + xsre<Fvsx> %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "*rsqrt<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>")] + UNSPEC_RSQRT))] + "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)" + "@ + frsqrte<Ftrad> %0,%1 + xsrsqrte<Fvsx> %x0,%x1" + [(set_attr "type" "fp")]) + +;; Floating point comparisons +(define_insn "*cmp<mode>_fpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y") + (compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>")))] + "TARGET_<MODE>_FPR" + "@ + fcmpu %0,%1,%2 + xscmpudp %0,%x1,%x2" + [(set_attr "type" "fpcompare")]) + +;; Floating point conversions +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "gpc_reg_operand") + (float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand")))] + "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)" +{ + if (HONOR_SNANS (SFmode)) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_insn_and_split "*extendsfdf2_fpr" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wu,wb") + (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z,wY")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !HONOR_SNANS (SFmode)" + "@ + # + fmr %0,%1 + lfs%U1%X1 %0,%1 + # + xscpsgndp %x0,%x1,%x1 + lxsspx %x0,%y1 + lxssp %0,%1" + "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])" + [(const_int 0)] +{ + emit_note (NOTE_INSN_DELETED); + DONE; +} + [(set_attr "type" "fp,fpsimple,fpload,fp,fpsimple,fpload,fpload")]) + +(define_insn "*extendsfdf2_snan" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (float_extend:DF (match_operand:SF 1 "gpc_reg_operand" "f,wy")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && HONOR_SNANS (SFmode)" + "@ + frsp %0,%1 + xsrsp %x0,%x1" + [(set_attr "type" "fp")]) + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)" + "") + +(define_insn "*truncdfsf2_fpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy") + (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d,ws")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "@ + frsp %0,%1 + xsrsp %x0,%x1" + [(set_attr "type" "fp")]) + +;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in +;; builtins.c and optabs.c that are not correct for IBM long double +;; when little-endian. +(define_expand "signbit<mode>2" + [(set (match_dup 2) + (float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand" ""))) + (set (match_dup 3) + (subreg:DI (match_dup 2) 0)) + (set (match_dup 4) + (match_dup 5)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (match_dup 6))] + "TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && (!FLOAT128_IEEE_P (<MODE>mode) + || (TARGET_POWERPC64 && TARGET_DIRECT_MOVE))" +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + { + if (<MODE>mode == KFmode) + emit_insn (gen_signbitkf2_dm (operands[0], operands[1])); + else if (<MODE>mode == TFmode) + emit_insn (gen_signbittf2_dm (operands[0], operands[1])); + else + gcc_unreachable (); + DONE; + } + operands[2] = gen_reg_rtx (DFmode); + operands[3] = gen_reg_rtx (DImode); + if (TARGET_POWERPC64) + { + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63)); + operands[6] = gen_rtx_SUBREG (SImode, operands[4], + WORDS_BIG_ENDIAN ? 4 : 0); + } + else + { + operands[4] = gen_reg_rtx (SImode); + operands[5] = gen_rtx_SUBREG (SImode, operands[3], + WORDS_BIG_ENDIAN ? 0 : 4); + operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31)); + } +}) + +(define_expand "copysign<mode>3" + [(set (match_dup 3) + (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ""))) + (set (match_dup 4) + (neg:SFDF (abs:SFDF (match_dup 1)))) + (set (match_operand:SFDF 0 "gpc_reg_operand" "") + (if_then_else:SFDF (ge (match_operand:SFDF 2 "gpc_reg_operand" "") + (match_dup 5)) + (match_dup 3) + (match_dup 4)))] + "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> + && ((TARGET_PPC_GFXOPT + && !HONOR_NANS (<MODE>mode) + && !HONOR_SIGNED_ZEROS (<MODE>mode)) + || TARGET_CMPB + || VECTOR_UNIT_VSX_P (<MODE>mode))" +{ + if (TARGET_CMPB || VECTOR_UNIT_VSX_P (<MODE>mode)) + { + emit_insn (gen_copysign<mode>3_fcpsgn (operands[0], operands[1], + operands[2])); + DONE; + } + + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); + operands[5] = CONST0_RTX (<MODE>mode); + }) + +;; Optimize signbit on 64-bit systems with direct move to avoid doing the store +;; and load. +(define_insn_and_split "signbit<mode>2_dm" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r") + (unspec:SI + [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")] + UNSPEC_SIGNBIT))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_signbit (operands[0], operands[1]); + DONE; +} + [(set_attr "length" "8,8,4") + (set_attr "type" "mftgpr,load,integer")]) + +(define_insn_and_split "*signbit<mode>2_dm_<su>ext" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r") + (any_extend:DI + (unspec:SI + [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")] + UNSPEC_SIGNBIT)))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_signbit (operands[0], operands[1]); + DONE; +} + [(set_attr "length" "8,8,4") + (set_attr "type" "mftgpr,load,integer")]) + +;; MODES_TIEABLE_P doesn't allow DImode to be tied with the various floating +;; point types, which makes normal SUBREG's problematical. Instead use a +;; special pattern to avoid using a normal movdi. +(define_insn "signbit<mode>2_dm2" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "wa") + (const_int 0)] + UNSPEC_SIGNBIT))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + + +;; Use an unspec rather providing an if-then-else in RTL, to prevent the +;; compiler from optimizing -0.0 +(define_insn "copysign<mode>3_fcpsgn" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")] + UNSPEC_COPYSIGN))] + "TARGET_<MODE>_FPR && (TARGET_CMPB || VECTOR_UNIT_VSX_P (<MODE>mode))" + "@ + fcpsgn %0,%2,%1 + xscpsgndp %x0,%x2,%x1" + [(set_attr "type" "fpsimple")]) + +;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a +;; fsel instruction and some auxiliary computations. Then we just have a +;; single DEFINE_INSN for fsel and the define_splits to make them if made by +;; combine. +;; For MIN, MAX on non-VSX machines, and conditional move all of the time, we +;; use DEFINE_EXPAND's that involve a fsel instruction and some auxiliary +;; computations. Then we just have a single DEFINE_INSN for fsel and the +;; define_splits to make them if made by combine. On VSX machines we have the +;; min/max instructions. +;; +;; On VSX, we only check for TARGET_VSX instead of checking for a vsx/p8 vector +;; to allow either DF/SF to use only traditional registers. + +(define_expand "s<minmax><mode>3" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (fp_minmax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "") + (match_operand:SFDF 2 "gpc_reg_operand" "")))] + "TARGET_MINMAX_<MODE>" +{ + rs6000_emit_minmax (operands[0], <SMINMAX>, operands[1], operands[2]); + DONE; +}) + +(define_insn "*s<minmax><mode>3_vsx" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=<Fv>") + (fp_minmax:SFDF (match_operand:SFDF 1 "vsx_register_operand" "<Fv>") + (match_operand:SFDF 2 "vsx_register_operand" "<Fv>")))] + "TARGET_VSX && TARGET_<MODE>_FPR" +{ + return (TARGET_P9_MINMAX + ? "xs<minmax>cdp %x0,%x1,%x2" + : "xs<minmax>dp %x0,%x1,%x2"); +} + [(set_attr "type" "fp")]) + +;; The conditional move instructions allow us to perform max and min operations +;; even when we don't have the appropriate max/min instruction using the FSEL +;; instruction. + +(define_insn_and_split "*s<minmax><mode>3_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (fp_minmax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "") + (match_operand:SFDF 2 "gpc_reg_operand" "")))] + "!TARGET_VSX && TARGET_MINMAX_<MODE>" + "#" + "&& 1" + [(const_int 0)] +{ + rs6000_emit_minmax (operands[0], <SMINMAX>, operands[1], operands[2]); + DONE; +}) + +(define_expand "mov<mode>cc" + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (if_then_else:GPR (match_operand 1 "comparison_operator" "") + (match_operand:GPR 2 "gpc_reg_operand" "") + (match_operand:GPR 3 "gpc_reg_operand" "")))] + "TARGET_ISEL<sel>" + " +{ + if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}") + +;; We use the BASE_REGS for the isel input operands because, if rA is +;; 0, the value of 0 is placed in rD upon truth. Similarly for rB +;; because we may switch the operands and rB may end up being rA. +;; +;; We need 2 patterns: an unsigned and a signed pattern. We could +;; leave out the mode in operand 4 and use one pattern, but reload can +;; change the mode underneath our feet and then gets confused trying +;; to reload the value. +(define_insn "isel_signed_<mode>" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (if_then_else:GPR + (match_operator 1 "scc_comparison_operator" + [(match_operand:CC 4 "cc_reg_operand" "y,y") + (const_int 0)]) + (match_operand:GPR 2 "reg_or_cint_operand" "O,b") + (match_operand:GPR 3 "gpc_reg_operand" "r,r")))] + "TARGET_ISEL<sel>" + "* +{ return output_isel (operands); }" + [(set_attr "type" "isel") + (set_attr "length" "4")]) + +(define_insn "isel_unsigned_<mode>" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") + (if_then_else:GPR + (match_operator 1 "scc_comparison_operator" + [(match_operand:CCUNS 4 "cc_reg_operand" "y,y") + (const_int 0)]) + (match_operand:GPR 2 "reg_or_cint_operand" "O,b") + (match_operand:GPR 3 "gpc_reg_operand" "r,r")))] + "TARGET_ISEL<sel>" + "* +{ return output_isel (operands); }" + [(set_attr "type" "isel") + (set_attr "length" "4")]) + +;; These patterns can be useful for combine; they let combine know that +;; isel can handle reversed comparisons so long as the operands are +;; registers. + +(define_insn "*isel_reversed_signed_<mode>" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (if_then_else:GPR + (match_operator 1 "scc_rev_comparison_operator" + [(match_operand:CC 4 "cc_reg_operand" "y") + (const_int 0)]) + (match_operand:GPR 2 "gpc_reg_operand" "b") + (match_operand:GPR 3 "gpc_reg_operand" "b")))] + "TARGET_ISEL<sel>" + "* +{ return output_isel (operands); }" + [(set_attr "type" "isel") + (set_attr "length" "4")]) + +(define_insn "*isel_reversed_unsigned_<mode>" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (if_then_else:GPR + (match_operator 1 "scc_rev_comparison_operator" + [(match_operand:CCUNS 4 "cc_reg_operand" "y") + (const_int 0)]) + (match_operand:GPR 2 "gpc_reg_operand" "b") + (match_operand:GPR 3 "gpc_reg_operand" "b")))] + "TARGET_ISEL<sel>" + "* +{ return output_isel (operands); }" + [(set_attr "type" "isel") + (set_attr "length" "4")]) + +;; Floating point conditional move +(define_expand "mov<mode>cc" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "") + (if_then_else:SFDF (match_operand 1 "comparison_operator" "") + (match_operand:SFDF 2 "gpc_reg_operand" "") + (match_operand:SFDF 3 "gpc_reg_operand" "")))] + "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT" + " +{ + if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}") + +(define_insn "*fsel<SFDF:mode><SFDF2:mode>4" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=&<SFDF:rreg2>") + (if_then_else:SFDF + (ge (match_operand:SFDF2 1 "gpc_reg_operand" "<SFDF2:rreg2>") + (match_operand:SFDF2 4 "zero_fp_constant" "F")) + (match_operand:SFDF 2 "gpc_reg_operand" "<SFDF:rreg2>") + (match_operand:SFDF 3 "gpc_reg_operand" "<SFDF:rreg2>")))] + "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT" + "fsel %0,%1,%2,%3" + [(set_attr "type" "fp")]) + +(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_p9" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=&<SFDF:Fv>,<SFDF:Fv>") + (if_then_else:SFDF + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:SFDF2 2 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>") + (match_operand:SFDF2 3 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>")]) + (match_operand:SFDF 4 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>") + (match_operand:SFDF 5 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>"))) + (clobber (match_scratch:V2DI 6 "=0,&wa"))] + "TARGET_P9_MINMAX" + "#" + "" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 1) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:SFDF (ne (match_dup 6) + (match_dup 8)) + (match_dup 4) + (match_dup 5)))] +{ + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Handle inverting the fpmask comparisons. +(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_invert_p9" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=&<SFDF:Fv>,<SFDF:Fv>") + (if_then_else:SFDF + (match_operator:CCFP 1 "invert_fpmask_comparison_operator" + [(match_operand:SFDF2 2 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>") + (match_operand:SFDF2 3 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>")]) + (match_operand:SFDF 4 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>") + (match_operand:SFDF 5 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>"))) + (clobber (match_scratch:V2DI 6 "=0,&wa"))] + "TARGET_P9_MINMAX" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 9) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:SFDF (ne (match_dup 6) + (match_dup 8)) + (match_dup 5) + (match_dup 4)))] +{ + rtx op1 = operands[1]; + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); + + operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +(define_insn "*fpmask<mode>" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (if_then_else:V2DI + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:SFDF 2 "vsx_register_operand" "<Fv>") + (match_operand:SFDF 3 "vsx_register_operand" "<Fv>")]) + (match_operand:V2DI 4 "all_ones_constant" "") + (match_operand:V2DI 5 "zero_constant" "")))] + "TARGET_P9_MINMAX" + "xscmp%V1dp %x0,%x2,%x3" + [(set_attr "type" "fpcompare")]) + +(define_insn "*xxsel<mode>" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=<Fv>") + (if_then_else:SFDF (ne (match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "zero_constant" "")) + (match_operand:SFDF 3 "vsx_register_operand" "<Fv>") + (match_operand:SFDF 4 "vsx_register_operand" "<Fv>")))] + "TARGET_P9_MINMAX" + "xxsel %x0,%x4,%x3,%x1" + [(set_attr "type" "vecmove")]) + + +;; Conversions to and from floating-point. + +; We don't define lfiwax/lfiwzx with the normal definition, because we +; don't want to support putting SImode in FPR registers. +(define_insn "lfiwax" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wj,wj,wK") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r,wK")] + UNSPEC_LFIWAX))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX" + "@ + lfiwax %0,%y1 + lxsiwax %x0,%y1 + mtvsrwa %x0,%1 + vextsw2d %0,%1" + [(set_attr "type" "fpload,fpload,mffgpr,vecexts")]) + +; This split must be run before register allocation because it allocates the +; memory slot that is needed to move values to/from the FPR. We don't allocate +; it earlier to allow for the combiner to merge insns together where it might +; not be needed and also in case the insns are deleted as dead code. + +(define_insn_and_split "floatsi<mode>2_lfiwax" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>") + (float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) + (clobber (match_scratch:DI 2 "=wi"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX + && <SI_CONVERT_FP> && can_create_pseudo_p ()" + "#" + "" + [(pc)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp; + + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) + tmp = convert_to_mode (DImode, src, false); + else + { + tmp = operands[2]; + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + if (MEM_P (src)) + { + src = rs6000_address_for_fpconvert (src); + emit_insn (gen_lfiwax (tmp, src)); + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_move_insn (stack, src); + emit_insn (gen_lfiwax (tmp, stack)); + } + } + emit_insn (gen_floatdi<mode>2 (dest, tmp)); + DONE; +}" + [(set_attr "length" "12") + (set_attr "type" "fpload")]) + +(define_insn_and_split "floatsi<mode>2_lfiwax_mem" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>") + (float:SFDF + (sign_extend:DI + (match_operand:SI 1 "indexed_or_indirect_operand" "Z")))) + (clobber (match_scratch:DI 2 "=wi"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX + && <SI_CONVERT_FP>" + "#" + "" + [(pc)] + " +{ + operands[1] = rs6000_address_for_fpconvert (operands[1]); + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); + if (TARGET_VSX_SMALL_INTEGER) + emit_insn (gen_extendsidi2 (operands[2], operands[1])); + else + emit_insn (gen_lfiwax (operands[2], operands[1])); + emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8") + (set_attr "type" "fpload")]) + +(define_insn "lfiwzx" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wj,wj,wJwK") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r,wJwK")] + UNSPEC_LFIWZX))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX" + "@ + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + mtvsrwz %x0,%1 + xxextractuw %x0,%x1,4" + [(set_attr "type" "fpload,fpload,mftgpr,vecexts")]) + +(define_insn_and_split "floatunssi<mode>2_lfiwzx" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>") + (unsigned_float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) + (clobber (match_scratch:DI 2 "=wi"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX + && <SI_CONVERT_FP>" + "#" + "" + [(pc)] + " +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp; + + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) + tmp = convert_to_mode (DImode, src, true); + else + { + tmp = operands[2]; + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + if (MEM_P (src)) + { + src = rs6000_address_for_fpconvert (src); + emit_insn (gen_lfiwzx (tmp, src)); + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_move_insn (stack, src); + emit_insn (gen_lfiwzx (tmp, stack)); + } + } + emit_insn (gen_floatdi<mode>2 (dest, tmp)); + DONE; +}" + [(set_attr "length" "12") + (set_attr "type" "fpload")]) + +(define_insn_and_split "floatunssi<mode>2_lfiwzx_mem" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>") + (unsigned_float:SFDF + (zero_extend:DI + (match_operand:SI 1 "indexed_or_indirect_operand" "Z")))) + (clobber (match_scratch:DI 2 "=wi"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX + && <SI_CONVERT_FP>" + "#" + "" + [(pc)] + " +{ + operands[1] = rs6000_address_for_fpconvert (operands[1]); + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); + if (TARGET_VSX_SMALL_INTEGER) + emit_insn (gen_zero_extendsidi2 (operands[2], operands[1])); + else + emit_insn (gen_lfiwzx (operands[2], operands[1])); + emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8") + (set_attr "type" "fpload")]) + +; For each of these conversions, there is a define_expand, a define_insn +; with a '#' template, and a define_split (with C code). The idea is +; to allow constant folding with the template of the define_insn, +; then to have the insns split later (between sched1 and final). + +(define_expand "floatsidf2" + [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "") + (float:DF (match_operand:SI 1 "nonimmediate_operand" ""))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (match_dup 6))])] + "TARGET_HARD_FLOAT + && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)" + " +{ + if (TARGET_E500_DOUBLE) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_spe_floatsidf2 (operands[0], operands[1])); + DONE; + } + else if (TARGET_LFIWAX && TARGET_FCFID) + { + emit_insn (gen_floatsidf2_lfiwax (operands[0], operands[1])); + DONE; + } + else if (TARGET_FCFID) + { + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, false); + emit_insn (gen_floatdidf2 (operands[0], dreg)); + DONE; + } + + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, GEN_INT (0x43300000)); + operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503601774854144\", DFmode)); + operands[4] = rs6000_allocate_stack_temp (DFmode, true, false); + operands[5] = gen_reg_rtx (DFmode); + operands[6] = gen_reg_rtx (SImode); +}") + +(define_insn_and_split "*floatsidf2_internal" + [(set (match_operand:DF 0 "gpc_reg_operand" "=&d") + (float:DF (match_operand:SI 1 "gpc_reg_operand" "r"))) + (use (match_operand:SI 2 "gpc_reg_operand" "r")) + (use (match_operand:DF 3 "gpc_reg_operand" "d")) + (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o")) + (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d")) + (clobber (match_operand:SI 6 "gpc_reg_operand" "=&r"))] + "! TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "#" + "" + [(pc)] + " +{ + rtx lowword, highword; + gcc_assert (MEM_P (operands[4])); + highword = adjust_address (operands[4], SImode, 0); + lowword = adjust_address (operands[4], SImode, 4); + if (! WORDS_BIG_ENDIAN) + std::swap (lowword, highword); + + emit_insn (gen_xorsi3 (operands[6], operands[1], + GEN_INT (~ (HOST_WIDE_INT) 0x7fffffff))); + emit_move_insn (lowword, operands[6]); + emit_move_insn (highword, operands[2]); + emit_move_insn (operands[5], operands[4]); + emit_insn (gen_subdf3 (operands[0], operands[5], operands[3])); + DONE; +}" + [(set_attr "length" "24") + (set_attr "type" "fp")]) + +;; If we don't have a direct conversion to single precision, don't enable this +;; conversion for 32-bit without fast math, because we don't have the insn to +;; generate the fixup swizzle to avoid double rounding problems. +(define_expand "floatunssisf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (unsigned_float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT + && (!TARGET_FPRS + || (TARGET_FPRS + && ((TARGET_FCFIDUS && TARGET_LFIWZX) + || (TARGET_DOUBLE_FLOAT && TARGET_FCFID + && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))" + " +{ + if (!TARGET_FPRS) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + } + else if (TARGET_LFIWZX && TARGET_FCFIDUS) + { + emit_insn (gen_floatunssisf2_lfiwzx (operands[0], operands[1])); + DONE; + } + else + { + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, true); + emit_insn (gen_floatdisf2 (operands[0], dreg)); + DONE; + } +}") + +(define_expand "floatunssidf2" + [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "") + (unsigned_float:DF (match_operand:SI 1 "nonimmediate_operand" ""))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 4)) + (clobber (match_dup 5))])] + "TARGET_HARD_FLOAT + && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)" + " +{ + if (TARGET_E500_DOUBLE) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_spe_floatunssidf2 (operands[0], operands[1])); + DONE; + } + else if (TARGET_LFIWZX && TARGET_FCFID) + { + emit_insn (gen_floatunssidf2_lfiwzx (operands[0], operands[1])); + DONE; + } + else if (TARGET_FCFID) + { + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, true); + emit_insn (gen_floatdidf2 (operands[0], dreg)); + DONE; + } + + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, GEN_INT (0x43300000)); + operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503599627370496\", DFmode)); + operands[4] = rs6000_allocate_stack_temp (DFmode, true, false); + operands[5] = gen_reg_rtx (DFmode); +}") + +(define_insn_and_split "*floatunssidf2_internal" + [(set (match_operand:DF 0 "gpc_reg_operand" "=&d") + (unsigned_float:DF (match_operand:SI 1 "gpc_reg_operand" "r"))) + (use (match_operand:SI 2 "gpc_reg_operand" "r")) + (use (match_operand:DF 3 "gpc_reg_operand" "d")) + (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o")) + (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d"))] + "! TARGET_FCFIDU && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !(TARGET_FCFID && TARGET_POWERPC64)" + "#" + "" + [(pc)] + " +{ + rtx lowword, highword; + gcc_assert (MEM_P (operands[4])); + highword = adjust_address (operands[4], SImode, 0); + lowword = adjust_address (operands[4], SImode, 4); + if (! WORDS_BIG_ENDIAN) + std::swap (lowword, highword); + + emit_move_insn (lowword, operands[1]); + emit_move_insn (highword, operands[2]); + emit_move_insn (operands[5], operands[4]); + emit_insn (gen_subdf3 (operands[0], operands[5], operands[3])); + DONE; +}" + [(set_attr "length" "20") + (set_attr "type" "fp")]) + +;; ISA 3.0 adds instructions lxsi[bh]zx to directly load QImode and HImode to +;; vector registers. These insns favor doing the sign/zero extension in +;; the vector registers, rather then loading up a GPR, doing a sign/zero +;; extension and then a direct move. + +(define_expand "float<QHI:mode><FP_ISA3:mode>2" + [(parallel [(set (match_operand:FP_ISA3 0 "vsx_register_operand") + (float:FP_ISA3 + (match_operand:QHI 1 "input_operand"))) + (clobber (match_scratch:DI 2)) + (clobber (match_scratch:DI 3)) + (clobber (match_scratch:<QHI:MODE> 4))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_VSX_SMALL_INTEGER" +{ + if (MEM_P (operands[1])) + operands[1] = rs6000_address_for_fpconvert (operands[1]); +}) + +(define_insn_and_split "*float<QHI:mode><FP_ISA3:mode>2_internal" + [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>,<Fv>") + (float:FP_ISA3 + (match_operand:QHI 1 "reg_or_indexed_operand" "wK,r,Z"))) + (clobber (match_scratch:DI 2 "=wK,wi,wK")) + (clobber (match_scratch:DI 3 "=X,r,X")) + (clobber (match_scratch:<QHI:MODE> 4 "=X,X,wK"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_UPPER_REGS_DI && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx result = operands[0]; + rtx input = operands[1]; + rtx di = operands[2]; + + if (!MEM_P (input)) + { + rtx tmp = operands[3]; + if (altivec_register_operand (input, <QHI:MODE>mode)) + emit_insn (gen_extend<QHI:mode>di2 (di, input)); + else if (GET_CODE (tmp) == SCRATCH) + emit_insn (gen_extend<QHI:mode>di2 (di, input)); + else + { + emit_insn (gen_extend<QHI:mode>di2 (tmp, input)); + emit_move_insn (di, tmp); + } + } + else + { + rtx tmp = operands[4]; + emit_move_insn (tmp, input); + emit_insn (gen_extend<QHI:mode>di2 (di, tmp)); + } + + emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di)); + DONE; +}) + +(define_expand "floatuns<QHI:mode><FP_ISA3:mode>2" + [(parallel [(set (match_operand:FP_ISA3 0 "vsx_register_operand") + (unsigned_float:FP_ISA3 + (match_operand:QHI 1 "input_operand" ""))) + (clobber (match_scratch:DI 2 "")) + (clobber (match_scratch:DI 3 ""))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_VSX_SMALL_INTEGER" +{ + if (MEM_P (operands[1])) + operands[1] = rs6000_address_for_fpconvert (operands[1]); +}) + +(define_insn_and_split "*floatuns<QHI:mode><FP_ISA3:mode>2_internal" + [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>,<Fv>") + (unsigned_float:FP_ISA3 + (match_operand:QHI 1 "reg_or_indexed_operand" "wK,r,Z"))) + (clobber (match_scratch:DI 2 "=wK,wi,wJwK")) + (clobber (match_scratch:DI 3 "=X,r,X"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx result = operands[0]; + rtx input = operands[1]; + rtx di = operands[2]; + + if (MEM_P (input) || altivec_register_operand (input, <QHI:MODE>mode)) + emit_insn (gen_zero_extend<QHI:mode>di2 (di, input)); + else + { + rtx tmp = operands[3]; + if (GET_CODE (tmp) == SCRATCH) + emit_insn (gen_extend<QHI:mode>di2 (di, input)); + else + { + emit_insn (gen_zero_extend<QHI:mode>di2 (tmp, input)); + emit_move_insn (di, tmp); + } + } + + emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di)); + DONE; +}) + +(define_expand "fix_trunc<mode>si2" + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && ((TARGET_FPRS && <TARGET_FLOAT>) || <E500_CONVERT>)" + " +{ + if (!<E500_CONVERT> && !TARGET_VSX_SMALL_INTEGER) + { + rtx src = force_reg (<MODE>mode, operands[1]); + + if (TARGET_STFIWX) + emit_insn (gen_fix_trunc<mode>si2_stfiwx (operands[0], src)); + else + { + rtx tmp = gen_reg_rtx (DImode); + rtx stack = rs6000_allocate_stack_temp (DImode, true, false); + emit_insn (gen_fix_trunc<mode>si2_internal (operands[0], src, + tmp, stack)); + } + DONE; + } +}") + +; Like the convert to float patterns, this insn must be split before +; register allocation so that it can allocate the memory slot if it +; needed +(define_insn_and_split "fix_trunc<mode>si2_stfiwx" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT) + && TARGET_STFIWX && can_create_pseudo_p () + && !TARGET_VSX_SMALL_INTEGER" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwz_<mode> (tmp, src)); + if (MEM_P (dest)) + { + dest = rs6000_address_for_fpconvert (dest); + emit_insn (gen_stfiwx (dest, tmp)); + DONE; + } + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) + { + dest = gen_lowpart (DImode, dest); + emit_move_insn (dest, tmp); + DONE; + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (gen_stfiwx (stack, tmp)); + emit_move_insn (dest, stack); + DONE; + } +} + [(set_attr "length" "12") + (set_attr "type" "fp")]) + +(define_insn_and_split "fix_trunc<mode>si2_internal" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,?r") + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,<rreg>"))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d")) + (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && !TARGET_VSX_SMALL_INTEGER" + "#" + "" + [(pc)] + " +{ + rtx lowword; + gcc_assert (MEM_P (operands[3])); + lowword = adjust_address (operands[3], SImode, WORDS_BIG_ENDIAN ? 4 : 0); + + emit_insn (gen_fctiwz_<mode> (operands[2], operands[1])); + emit_move_insn (operands[3], operands[2]); + emit_move_insn (operands[0], lowword); + DONE; +}" + [(set_attr "length" "16") + (set_attr "type" "fp")]) + +(define_expand "fix_trunc<mode>di2" + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS + && TARGET_FCFID" + "") + +(define_insn "*fix_trunc<mode>di2_fctidz" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") + (fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS + && TARGET_FCFID" + "@ + fctidz %0,%1 + xscvdpsxds %x0,%x1" + [(set_attr "type" "fp")]) + +(define_expand "fix_trunc<SFDF:mode><QHI:mode>2" + [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand") + (fix:QHI (match_operand:SFDF 1 "gpc_reg_operand"))) + (clobber (match_scratch:DI 2))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" +{ + if (MEM_P (operands[0])) + operands[0] = rs6000_address_for_fpconvert (operands[0]); +}) + +(define_insn_and_split "*fix_trunc<SFDF:mode><QHI:mode>2_internal" + [(set (match_operand:<QHI:MODE> 0 "reg_or_indexed_operand" "=wIwJ,rZ") + (fix:QHI + (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>"))) + (clobber (match_scratch:DI 2 "=X,wi"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + + if (vsx_register_operand (dest, <QHI:MODE>mode)) + { + rtx di_dest = gen_rtx_REG (DImode, REGNO (dest)); + emit_insn (gen_fix_trunc<SFDF:mode>di2 (di_dest, src)); + } + else + { + rtx tmp = operands[2]; + rtx tmp2 = gen_rtx_REG (<QHI:MODE>mode, REGNO (tmp)); + + emit_insn (gen_fix_trunc<SFDF:mode>di2 (tmp, src)); + emit_move_insn (dest, tmp2); + } + DONE; +}) + +(define_expand "fixuns_trunc<mode>si2" + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT + && ((TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ && TARGET_STFIWX) + || <E500_CONVERT>)" + " +{ + if (!<E500_CONVERT> && !TARGET_VSX_SMALL_INTEGER) + { + emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1])); + DONE; + } +}") + +(define_insn_and_split "fixuns_trunc<mode>si2_stfiwx" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DI 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ + && TARGET_STFIWX && can_create_pseudo_p () + && !TARGET_VSX_SMALL_INTEGER" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwuz_<mode> (tmp, src)); + if (MEM_P (dest)) + { + dest = rs6000_address_for_fpconvert (dest); + emit_insn (gen_stfiwx (dest, tmp)); + DONE; + } + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) + { + dest = gen_lowpart (DImode, dest); + emit_move_insn (dest, tmp); + DONE; + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (gen_stfiwx (stack, tmp)); + emit_move_insn (dest, stack); + DONE; + } +} + [(set_attr "length" "12") + (set_attr "type" "fp")]) + +(define_insn "fixuns_trunc<mode>di2" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") + (unsigned_fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && TARGET_FCTIDUZ" + "@ + fctiduz %0,%1 + xscvdpuxds %x0,%x1" + [(set_attr "type" "fp")]) + +(define_expand "fixuns_trunc<SFDF:mode><QHI:mode>2" + [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand") + (unsigned_fix:QHI (match_operand:SFDF 1 "gpc_reg_operand"))) + (clobber (match_scratch:DI 2))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" +{ + if (MEM_P (operands[0])) + operands[0] = rs6000_address_for_fpconvert (operands[0]); +}) + +(define_insn_and_split "*fixuns_trunc<SFDF:mode><QHI:mode>2_internal" + [(set (match_operand:<QHI:MODE> 0 "reg_or_indexed_operand" "=wIwJ,rZ") + (unsigned_fix:QHI + (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>"))) + (clobber (match_scratch:DI 2 "=X,wi"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + + if (vsx_register_operand (dest, <QHI:MODE>mode)) + { + rtx di_dest = gen_rtx_REG (DImode, REGNO (dest)); + emit_insn (gen_fixuns_trunc<SFDF:mode>di2 (di_dest, src)); + } + else + { + rtx tmp = operands[2]; + rtx tmp2 = gen_rtx_REG (<QHI:MODE>mode, REGNO (tmp)); + + emit_insn (gen_fixuns_trunc<SFDF:mode>di2 (tmp, src)); + emit_move_insn (dest, tmp2); + } + DONE; +}) + +;; If -mvsx-small-integer, we can represent the FIX operation directly. On +;; older machines, we have to use an UNSPEC to produce a SImode and move it +;; to another location, since SImode is not allowed in vector registers. +(define_insn "*fctiw<u>z_<mode>_smallint" + [(set (match_operand:SI 0 "vsx_register_operand" "=d,wi") + (any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && TARGET_VSX_SMALL_INTEGER" + "@ + fctiw<u>z %0,%1 + xscvdp<su>xws %x0,%x1" + [(set_attr "type" "fp")]) + +;; Combiner pattern to prevent moving the result of converting a floating point +;; value to 32-bit integer to GPR in order to save it. +(define_insn_and_split "*fctiw<u>z_<mode>_mem" + [(set (match_operand:SI 0 "memory_operand" "=Z") + (any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "wa"))) + (clobber (match_scratch:SI 2 "=wa"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(set (match_dup 2) + (any_fix:SI (match_dup 1))) + (set (match_dup 0) + (match_dup 2))]) + +;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) +;; rather than (set (subreg:SI (reg)) (fix:SI ...)) +;; because the first makes it clear that operand 0 is not live +;; before the instruction. +(define_insn "fctiwz_<mode>" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") + (unspec:DI [(fix:SI + (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))] + UNSPEC_FCTIWZ))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" + "@ + fctiwz %0,%1 + xscvdpsxws %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "fctiwuz_<mode>" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") + (unspec:DI [(unsigned_fix:SI + (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))] + UNSPEC_FCTIWUZ))] + "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ" + "@ + fctiwuz %0,%1 + xscvdpuxws %x0,%x1" + [(set_attr "type" "fp")]) + +;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since +;; since the friz instruction does not truncate the value if the floating +;; point value is < LONG_MIN or > LONG_MAX. +(define_insn "*friz" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,ws"))))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_FPRND + && flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ" + "@ + friz %0,%1 + xsrdpiz %x0,%x1" + [(set_attr "type" "fp")]) + +;; Opitmize converting SF/DFmode to signed SImode and back to SF/DFmode. This +;; optimization prevents on ISA 2.06 systems and earlier having to store the +;; value from the FPR/vector unit to the stack, load the value into a GPR, sign +;; extend it, store it back on the stack from the GPR, load it back into the +;; FP/vector unit to do the rounding. If we have direct move (ISA 2.07), +;; disable using store and load to sign/zero extend the value. +(define_insn_and_split "*round32<mode>2_fprs" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (float:SFDF + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))) + (clobber (match_scratch:DI 2 "=d")) + (clobber (match_scratch:DI 3 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && <SI_CONVERT_FP> && TARGET_LFIWAX && TARGET_STFIWX && TARGET_FCFID + && !TARGET_DIRECT_MOVE && can_create_pseudo_p ()" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp1 = operands[2]; + rtx tmp2 = operands[3]; + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + + if (GET_CODE (tmp1) == SCRATCH) + tmp1 = gen_reg_rtx (DImode); + if (GET_CODE (tmp2) == SCRATCH) + tmp2 = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwz_<mode> (tmp1, src)); + emit_insn (gen_stfiwx (stack, tmp1)); + emit_insn (gen_lfiwax (tmp2, stack)); + emit_insn (gen_floatdi<mode>2 (dest, tmp2)); + DONE; +} + [(set_attr "type" "fpload") + (set_attr "length" "16")]) + +(define_insn_and_split "*roundu32<mode>2_fprs" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (unsigned_float:SFDF + (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))) + (clobber (match_scratch:DI 2 "=d")) + (clobber (match_scratch:DI 3 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && TARGET_LFIWZX && TARGET_STFIWX && TARGET_FCFIDU && !TARGET_DIRECT_MOVE + && can_create_pseudo_p ()" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp1 = operands[2]; + rtx tmp2 = operands[3]; + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + + if (GET_CODE (tmp1) == SCRATCH) + tmp1 = gen_reg_rtx (DImode); + if (GET_CODE (tmp2) == SCRATCH) + tmp2 = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwuz_<mode> (tmp1, src)); + emit_insn (gen_stfiwx (stack, tmp1)); + emit_insn (gen_lfiwzx (tmp2, stack)); + emit_insn (gen_floatdi<mode>2 (dest, tmp2)); + DONE; +} + [(set_attr "type" "fpload") + (set_attr "length" "16")]) + +;; No VSX equivalent to fctid +(define_insn "lrint<mode>di2" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + (unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")] + UNSPEC_FCTID))] + "TARGET_<MODE>_FPR && TARGET_FPRND" + "fctid %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "btrunc<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")] + UNSPEC_FRIZ))] + "TARGET_<MODE>_FPR && TARGET_FPRND" + "@ + friz %0,%1 + xsrdpiz %x0,%x1" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_insn "ceil<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")] + UNSPEC_FRIP))] + "TARGET_<MODE>_FPR && TARGET_FPRND" + "@ + frip %0,%1 + xsrdpip %x0,%x1" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_insn "floor<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")] + UNSPEC_FRIM))] + "TARGET_<MODE>_FPR && TARGET_FPRND" + "@ + frim %0,%1 + xsrdpim %x0,%x1" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +;; No VSX equivalent to frin +(define_insn "round<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")] + UNSPEC_FRIN))] + "TARGET_<MODE>_FPR && TARGET_FPRND" + "frin %0,%1" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_insn "*xsrdpi<mode>2" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>") + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Fv>")] + UNSPEC_XSRDPI))] + "TARGET_<MODE>_FPR && TARGET_VSX" + "xsrdpi %x0,%x1" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_<Fs>")]) + +(define_expand "lround<mode>di2" + [(set (match_dup 2) + (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")] + UNSPEC_XSRDPI)) + (set (match_operand:DI 0 "gpc_reg_operand" "") + (unspec:DI [(match_dup 2)] + UNSPEC_FCTID))] + "TARGET_<MODE>_FPR && TARGET_VSX" +{ + operands[2] = gen_reg_rtx (<MODE>mode); +}) + +; An UNSPEC is used so we don't have to support SImode in FP registers. +; The 'wu' constraint is used for the 2nd alternative to ensure stxsiwx +; is only generated for Power8 or later. +(define_insn "stfiwx" + [(set (match_operand:SI 0 "memory_operand" "=Z,Z") + (unspec:SI [(match_operand:DI 1 "gpc_reg_operand" "d,wu")] + UNSPEC_STFIWX))] + "TARGET_PPC_GFXOPT" + "@ + stfiwx %1,%y0 + stxsiwx %x1,%y0" + [(set_attr "type" "fpstore")]) + +;; If we don't have a direct conversion to single precision, don't enable this +;; conversion for 32-bit without fast math, because we don't have the insn to +;; generate the fixup swizzle to avoid double rounding problems. +(define_expand "floatsisf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT + && (!TARGET_FPRS + || (TARGET_FPRS + && ((TARGET_FCFIDS && TARGET_LFIWAX) + || (TARGET_DOUBLE_FLOAT && TARGET_FCFID + && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))" + " +{ + if (!TARGET_FPRS) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + } + else if (TARGET_FCFIDS && TARGET_LFIWAX) + { + emit_insn (gen_floatsisf2_lfiwax (operands[0], operands[1])); + DONE; + } + else if (TARGET_FCFID && TARGET_LFIWAX) + { + rtx dfreg = gen_reg_rtx (DFmode); + emit_insn (gen_floatsidf2_lfiwax (dfreg, operands[1])); + emit_insn (gen_truncdfsf2 (operands[0], dfreg)); + DONE; + } + else + { + rtx dreg = operands[1]; + if (!REG_P (dreg)) + dreg = force_reg (SImode, dreg); + dreg = convert_to_mode (DImode, dreg, false); + emit_insn (gen_floatdisf2 (operands[0], dreg)); + DONE; + } +}") + +(define_expand "floatdidf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (float:DF (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "") + +(define_insn "*floatdidf2_fpr" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (float:DF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "@ + fcfid %0,%1 + xscvsxddp %x0,%x1" + [(set_attr "type" "fp")]) + +; Allow the combiner to merge source memory operands to the conversion so that +; the optimizer/register allocator doesn't try to load the value too early in a +; GPR and then use store/load to move it to a FPR and suffer from a store-load +; hit. We will split after reload to avoid the trip through the GPRs + +(define_insn_and_split "*floatdidf2_mem" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (float:DF (match_operand:DI 1 "memory_operand" "m,Z"))) + (clobber (match_scratch:DI 2 "=d,wi"))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && TARGET_FCFID" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:DF (match_dup 2)))] + "" + [(set_attr "length" "8") + (set_attr "type" "fpload")]) + +(define_expand "floatunsdidf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (unsigned_float:DF + (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FCFIDU" + "") + +(define_insn "*floatunsdidf2_fcfidu" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] + "TARGET_HARD_FLOAT && TARGET_FCFIDU" + "@ + fcfidu %0,%1 + xscvuxddp %x0,%x1" + [(set_attr "type" "fp") + (set_attr "length" "4")]) + +(define_insn_and_split "*floatunsdidf2_mem" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (unsigned_float:DF (match_operand:DI 1 "memory_operand" "m,Z"))) + (clobber (match_scratch:DI 2 "=d,wi"))] + "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (unsigned_float:DF (match_dup 2)))] + "" + [(set_attr "length" "8") + (set_attr "type" "fpload")]) + +(define_expand "floatdisf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (float:SF (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && (TARGET_FCFIDS || TARGET_POWERPC64 || flag_unsafe_math_optimizations)" + " +{ + if (!TARGET_FCFIDS) + { + rtx val = operands[1]; + if (!flag_unsafe_math_optimizations) + { + rtx label = gen_label_rtx (); + val = gen_reg_rtx (DImode); + emit_insn (gen_floatdisf2_internal2 (val, operands[1], label)); + emit_label (label); + } + emit_insn (gen_floatdisf2_internal1 (operands[0], val)); + DONE; + } +}") + +(define_insn "floatdisf2_fcfids" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy") + (float:SF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS" + "@ + fcfids %0,%1 + xscvsxdsp %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn_and_split "*floatdisf2_mem" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy,wy") + (float:SF (match_operand:DI 1 "memory_operand" "m,m,Z"))) + (clobber (match_scratch:DI 2 "=d,d,wi"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_floatdisf2_fcfids (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8")]) + +;; This is not IEEE compliant if rounding mode is "round to nearest". +;; If the DI->DF conversion is inexact, then it's possible to suffer +;; from double rounding. +;; Instead of creating a new cpu type for two FP operations, just use fp +(define_insn_and_split "floatdisf2_internal1" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (float:SF (match_operand:DI 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DF 2 "=d"))] + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && !TARGET_FCFIDS" + "#" + "&& reload_completed" + [(set (match_dup 2) + (float:DF (match_dup 1))) + (set (match_dup 0) + (float_truncate:SF (match_dup 2)))] + "" + [(set_attr "length" "8") + (set_attr "type" "fp")]) + +;; Twiddles bits to avoid double rounding. +;; Bits that might be truncated when converting to DFmode are replaced +;; by a bit that won't be lost at that stage, but is below the SFmode +;; rounding position. +(define_expand "floatdisf2_internal2" + [(parallel [(set (match_dup 3) (ashiftrt:DI (match_operand:DI 1 "" "") + (const_int 53))) + (clobber (reg:DI CA_REGNO))]) + (set (match_operand:DI 0 "" "") (and:DI (match_dup 1) + (const_int 2047))) + (set (match_dup 3) (plus:DI (match_dup 3) + (const_int 1))) + (set (match_dup 0) (plus:DI (match_dup 0) + (const_int 2047))) + (set (match_dup 4) (compare:CCUNS (match_dup 3) + (const_int 2))) + (set (match_dup 0) (ior:DI (match_dup 0) + (match_dup 1))) + (set (match_dup 0) (and:DI (match_dup 0) + (const_int -2048))) + (set (pc) (if_then_else (geu (match_dup 4) (const_int 0)) + (label_ref (match_operand:DI 2 "" "")) + (pc))) + (set (match_dup 0) (match_dup 1))] + "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && !TARGET_FCFIDS" + " +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (CCUNSmode); +}") + +(define_expand "floatunsdisf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" + "") + +(define_insn "floatunsdisf2_fcfidus" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wu") + (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" + "@ + fcfidus %0,%1 + xscvuxdsp %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn_and_split "*floatunsdisf2_mem" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy,wy") + (unsigned_float:SF (match_operand:DI 1 "memory_operand" "m,m,Z"))) + (clobber (match_scratch:DI 2 "=d,d,wi"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" + "#" + "&& reload_completed" + [(pc)] + " +{ + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_floatunsdisf2_fcfidus (operands[0], operands[2])); + DONE; +}" + [(set_attr "length" "8") + (set_attr "type" "fpload")]) + +;; Define the TImode operations that can be done in a small number +;; of instructions. The & constraints are to prevent the register +;; allocator from allocating registers that overlap with the inputs +;; (for example, having an input in 7,8 and an output in 6,7). We +;; also allow for the output being the same as one of the inputs. + +(define_expand "addti3" + [(set (match_operand:TI 0 "gpc_reg_operand" "") + (plus:TI (match_operand:TI 1 "gpc_reg_operand" "") + (match_operand:TI 2 "reg_or_short_operand" "")))] + "TARGET_64BIT" +{ + rtx lo0 = gen_lowpart (DImode, operands[0]); + rtx lo1 = gen_lowpart (DImode, operands[1]); + rtx lo2 = gen_lowpart (DImode, operands[2]); + rtx hi0 = gen_highpart (DImode, operands[0]); + rtx hi1 = gen_highpart (DImode, operands[1]); + rtx hi2 = gen_highpart_mode (DImode, TImode, operands[2]); + + if (!reg_or_short_operand (lo2, DImode)) + lo2 = force_reg (DImode, lo2); + if (!adde_operand (hi2, DImode)) + hi2 = force_reg (DImode, hi2); + + emit_insn (gen_adddi3_carry (lo0, lo1, lo2)); + emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2)); + DONE; +}) + +(define_expand "subti3" + [(set (match_operand:TI 0 "gpc_reg_operand" "") + (minus:TI (match_operand:TI 1 "reg_or_short_operand" "") + (match_operand:TI 2 "gpc_reg_operand" "")))] + "TARGET_64BIT" +{ + rtx lo0 = gen_lowpart (DImode, operands[0]); + rtx lo1 = gen_lowpart (DImode, operands[1]); + rtx lo2 = gen_lowpart (DImode, operands[2]); + rtx hi0 = gen_highpart (DImode, operands[0]); + rtx hi1 = gen_highpart_mode (DImode, TImode, operands[1]); + rtx hi2 = gen_highpart (DImode, operands[2]); + + if (!reg_or_short_operand (lo1, DImode)) + lo1 = force_reg (DImode, lo1); + if (!adde_operand (hi1, DImode)) + hi1 = force_reg (DImode, hi1); + + emit_insn (gen_subfdi3_carry (lo0, lo2, lo1)); + emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1)); + DONE; +}) + +;; 128-bit logical operations expanders + +(define_expand "and<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "") + (match_operand:BOOL_128 2 "vlogical_operand" "")))] + "" + "") + +(define_expand "ior<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "") + (match_operand:BOOL_128 2 "vlogical_operand" "")))] + "" + "") + +(define_expand "xor<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "") + (match_operand:BOOL_128 2 "vlogical_operand" "")))] + "" + "") + +(define_expand "one_cmpl<mode>2" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")))] + "" + "") + +(define_expand "nor<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (and:BOOL_128 + (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")) + (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))] + "" + "") + +(define_expand "andc<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (and:BOOL_128 + (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" "")) + (match_operand:BOOL_128 1 "vlogical_operand" "")))] + "" + "") + +;; Power8 vector logical instructions. +(define_expand "eqv<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (not:BOOL_128 + (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "") + (match_operand:BOOL_128 2 "vlogical_operand" ""))))] + "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR" + "") + +;; Rewrite nand into canonical form +(define_expand "nand<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (ior:BOOL_128 + (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")) + (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))] + "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR" + "") + +;; The canonical form is to have the negated element first, so we need to +;; reverse arguments. +(define_expand "orc<mode>3" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "") + (ior:BOOL_128 + (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" "")) + (match_operand:BOOL_128 1 "vlogical_operand" "")))] + "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR" + "") + +;; 128-bit logical operations insns and split operations +(define_insn_and_split "*and<mode>3_internal" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") + (and:BOOL_128 + (match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>") + (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")))] + "" +{ + if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode)) + return "xxland %x0,%x1,%x2"; + + if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode)) + return "vand %0,%1,%2"; + + return "#"; +} + "reload_completed && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + rs6000_split_logical (operands, AND, false, false, false); + DONE; +} + [(set (attr "type") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "veclogical") + (const_string "integer"))) + (set (attr "length") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "4") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16"))))]) + +;; 128-bit IOR/XOR +(define_insn_and_split "*bool<mode>3_internal" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") + (match_operator:BOOL_128 3 "boolean_or_operator" + [(match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>") + (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")]))] + "" +{ + if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode)) + return "xxl%q3 %x0,%x1,%x2"; + + if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode)) + return "v%q3 %0,%1,%2"; + + return "#"; +} + "reload_completed && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, false); + DONE; +} + [(set (attr "type") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "veclogical") + (const_string "integer"))) + (set (attr "length") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "4") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16"))))]) + +;; 128-bit ANDC/ORC +(define_insn_and_split "*boolc<mode>3_internal1" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") + (match_operator:BOOL_128 3 "boolean_operator" + [(not:BOOL_128 + (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")) + (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")]))] + "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)" +{ + if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode)) + return "xxl%q3 %x0,%x1,%x2"; + + if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode)) + return "v%q3 %0,%1,%2"; + + return "#"; +} + "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)) + && reload_completed && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true); + DONE; +} + [(set (attr "type") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "veclogical") + (const_string "integer"))) + (set (attr "length") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "4") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16"))))]) + +(define_insn_and_split "*boolc<mode>3_internal2" + [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r") + (match_operator:TI2 3 "boolean_operator" + [(not:TI2 + (match_operand:TI2 2 "int_reg_operand" "r,0,r")) + (match_operand:TI2 1 "int_reg_operand" "r,r,0")]))] + "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)" + "#" + "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)" + [(const_int 0)] +{ + rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true); + DONE; +} + [(set_attr "type" "integer") + (set (attr "length") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16")))]) + +;; 128-bit NAND/NOR +(define_insn_and_split "*boolcc<mode>3_internal1" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") + (match_operator:BOOL_128 3 "boolean_operator" + [(not:BOOL_128 + (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")) + (not:BOOL_128 + (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))]))] + "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)" +{ + if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode)) + return "xxl%q3 %x0,%x1,%x2"; + + if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode)) + return "v%q3 %0,%1,%2"; + + return "#"; +} + "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)) + && reload_completed && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true); + DONE; +} + [(set (attr "type") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "veclogical") + (const_string "integer"))) + (set (attr "length") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "4") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16"))))]) + +(define_insn_and_split "*boolcc<mode>3_internal2" + [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r") + (match_operator:TI2 3 "boolean_operator" + [(not:TI2 + (match_operand:TI2 1 "int_reg_operand" "r,0,r")) + (not:TI2 + (match_operand:TI2 2 "int_reg_operand" "r,r,0"))]))] + "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)" + "#" + "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)" + [(const_int 0)] +{ + rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true); + DONE; +} + [(set_attr "type" "integer") + (set (attr "length") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16")))]) + + +;; 128-bit EQV +(define_insn_and_split "*eqv<mode>3_internal1" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") + (not:BOOL_128 + (xor:BOOL_128 + (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>") + (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))))] + "TARGET_P8_VECTOR" +{ + if (vsx_register_operand (operands[0], <MODE>mode)) + return "xxleqv %x0,%x1,%x2"; + + return "#"; +} + "TARGET_P8_VECTOR && reload_completed + && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + rs6000_split_logical (operands, XOR, true, false, false); + DONE; +} + [(set (attr "type") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "veclogical") + (const_string "integer"))) + (set (attr "length") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "4") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16"))))]) + +(define_insn_and_split "*eqv<mode>3_internal2" + [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r") + (not:TI2 + (xor:TI2 + (match_operand:TI2 1 "int_reg_operand" "r,0,r") + (match_operand:TI2 2 "int_reg_operand" "r,r,0"))))] + "!TARGET_P8_VECTOR" + "#" + "reload_completed && !TARGET_P8_VECTOR" + [(const_int 0)] +{ + rs6000_split_logical (operands, XOR, true, false, false); + DONE; +} + [(set_attr "type" "integer") + (set (attr "length") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16")))]) + +;; 128-bit one's complement +(define_insn_and_split "*one_cmpl<mode>3_internal" + [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") + (not:BOOL_128 + (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] + "" +{ + if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode)) + return "xxlnor %x0,%x1,%x1"; + + if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode)) + return "vnor %0,%1,%1"; + + return "#"; +} + "reload_completed && int_reg_operand (operands[0], <MODE>mode)" + [(const_int 0)] +{ + rs6000_split_logical (operands, NOT, false, false, false); + DONE; +} + [(set (attr "type") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "veclogical") + (const_string "integer"))) + (set (attr "length") + (if_then_else + (match_test "vsx_register_operand (operands[0], <MODE>mode)") + (const_string "4") + (if_then_else + (match_test "TARGET_POWERPC64") + (const_string "8") + (const_string "16"))))]) + + +;; Now define ways of moving data around. + +;; Set up a register with a value from the GOT table + +(define_expand "movsi_got" + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (unspec:SI [(match_operand:SI 1 "got_operand" "") + (match_dup 2)] UNSPEC_MOVSI_GOT))] + "DEFAULT_ABI == ABI_V4 && flag_pic == 1" + " +{ + if (GET_CODE (operands[1]) == CONST) + { + rtx offset = const0_rtx; + HOST_WIDE_INT value; + + operands[1] = eliminate_constant_term (XEXP (operands[1], 0), &offset); + value = INTVAL (offset); + if (value != 0) + { + rtx tmp = (!can_create_pseudo_p () + ? operands[0] + : gen_reg_rtx (Pmode)); + emit_insn (gen_movsi_got (tmp, operands[1])); + emit_insn (gen_addsi3 (operands[0], tmp, offset)); + DONE; + } + } + + operands[2] = rs6000_got_register (operands[1]); +}") + +(define_insn "*movsi_got_internal" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_operand:SI 1 "got_no_const_operand" "") + (match_operand:SI 2 "gpc_reg_operand" "b")] + UNSPEC_MOVSI_GOT))] + "DEFAULT_ABI == ABI_V4 && flag_pic == 1" + "lwz %0,%a1@got(%2)" + [(set_attr "type" "load")]) + +;; Used by sched, shorten_branches and final when the GOT pseudo reg +;; didn't get allocated to a hard register. +(define_split + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (unspec:SI [(match_operand:SI 1 "got_no_const_operand" "") + (match_operand:SI 2 "memory_operand" "")] + UNSPEC_MOVSI_GOT))] + "DEFAULT_ABI == ABI_V4 + && flag_pic == 1 + && (reload_in_progress || reload_completed)" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) (unspec:SI [(match_dup 1)(match_dup 0)] + UNSPEC_MOVSI_GOT))] + "") + +;; For SI, we special-case integers that can't be loaded in one insn. We +;; do the load 16-bits at a time. We could do this by loading from memory, +;; and this is even supposed to be faster, but it is simpler not to get +;; integers in the TOC. +(define_insn "movsi_low" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" ""))))] + "TARGET_MACHO && ! TARGET_64BIT" + "lwz %0,lo16(%2)(%1)" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +;; MR LA LWZ LFIWZX LXSIWZX +;; STW STFIWX STXSIWX LI LIS +;; # XXLOR XXSPLTIB 0 XXSPLTIB -1 VSPLTISW +;; XXLXOR 0 XXLORC -1 P9 const MTVSRWZ MFVSRWZ +;; MF%1 MT%0 MT%0 NOP +(define_insn "*movsi_internal1" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" + "=r, r, r, ?*wI, ?*wH, + m, ?Z, ?Z, r, r, + r, ?*wIwH, ?*wJwK, ?*wJwK, ?*wu, + ?*wJwK, ?*wH, ?*wK, ?*wIwH, ?r, + r, *c*l, *h, *h") + + (match_operand:SI 1 "input_operand" + "r, U, m, Z, Z, + r, wI, wH, I, L, + n, wIwH, O, wM, wB, + O, wM, wS, r, wIwH, + *h, r, r, 0"))] + + "!TARGET_SINGLE_FPU && + (gpc_reg_operand (operands[0], SImode) || gpc_reg_operand (operands[1], SImode))" + "@ + mr %0,%1 + la %0,%a1 + lwz%U1%X1 %0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + stw%U0%X0 %1,%0 + stfiwx %1,%y0 + stxsiwx %x1,%y0 + li %0,%1 + lis %0,%v1 + # + xxlor %x0,%x1,%x1 + xxspltib %x0,0 + xxspltib %x0,255 + vspltisw %0,%1 + xxlxor %x0,%x0,%x0 + xxlorc %x0,%x0,%x0 + # + mtvsrwz %x0,%1 + mfvsrwz %0,%x1 + mf%1 %0 + mt%0 %1 + mt%0 %1 + nop" + [(set_attr "type" + "*, *, load, fpload, fpload, + store, fpstore, fpstore, *, *, + *, veclogical, vecsimple, vecsimple, vecsimple, + veclogical, veclogical, vecsimple, mffgpr, mftgpr, + *, *, *, *") + + (set_attr "length" + "4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, + 8, 4, 4, 4, 4, + 4, 4, 8, 4, 4, + 4, 4, 4, 4")]) + +(define_insn "*movsi_internal1_single" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,r,r,m,r,r,r,r,*c*l,*h,*h,m,*f") + (match_operand:SI 1 "input_operand" "r,U,m,r,I,L,n,*h,r,r,0,f,m"))] + "TARGET_SINGLE_FPU && + (gpc_reg_operand (operands[0], SImode) || gpc_reg_operand (operands[1], SImode))" + "@ + mr %0,%1 + la %0,%a1 + lwz%U1%X1 %0,%1 + stw%U0%X0 %1,%0 + li %0,%1 + lis %0,%v1 + # + mf%1 %0 + mt%0 %1 + mt%0 %1 + nop + stfs%U0%X0 %1,%0 + lfs%U1%X1 %0,%1" + [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*,fpstore,fpload") + (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")]) + +;; Like movsi, but adjust a SF value to be used in a SI context, i.e. +;; (set (reg:SI ...) (subreg:SI (reg:SF ...) 0)) +;; +;; Because SF values are actually stored as DF values within the vector +;; registers, we need to convert the value to the vector SF format when +;; we need to use the bits in a union or similar cases. We only need +;; to do this transformation when the value is a vector register. Loads, +;; stores, and transfers within GPRs are assumed to be safe. +;; +;; This is a more general case of reload_gpr_from_vsxsf. That insn must have +;; no alternatives, because the call is created as part of secondary_reload, +;; and operand #2's register class is used to allocate the temporary register. +;; This function is called before reload, and it creates the temporary as +;; needed. + +;; MR LWZ LFIWZX LXSIWZX STW +;; STFS STXSSP STXSSPX VSX->GPR MTVSRWZ +;; VSX->VSX + +(define_insn_and_split "movsi_from_sf" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" + "=r, r, ?*wI, ?*wH, m, + m, wY, Z, r, wIwH, + ?wK") + + (unspec:SI [(match_operand:SF 1 "input_operand" + "r, m, Z, Z, r, + f, wb, wu, wIwH, r, + wK")] + UNSPEC_SI_FROM_SF)) + + (clobber (match_scratch:V4SF 2 + "=X, X, X, X, X, + X, X, X, wa, X, + wa"))] + + "TARGET_NO_SF_SUBREG + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SFmode))" + "@ + mr %0,%1 + lwz%U1%X1 %0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + stw%U0%X0 %1,%0 + stfs%U0%X0 %1,%0 + stxssp %1,%0 + stxsspx %x1,%y0 + # + mtvsrwz %x0,%1 + #" + "&& reload_completed + && register_operand (operands[0], SImode) + && vsx_reg_sfsubreg_ok (operands[1], SFmode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_di = gen_rtx_REG (DImode, REGNO (op0)); + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + + if (int_reg_operand (op0, SImode)) + { + emit_insn (gen_p8_mfvsrd_4_disf (op0_di, op2)); + emit_insn (gen_lshrdi3 (op0_di, op0_di, GEN_INT (32))); + } + else + { + rtx op1_v16qi = gen_rtx_REG (V16QImode, REGNO (op1)); + rtx byte_off = VECTOR_ELT_ORDER_BIG ? const0_rtx : GEN_INT (12); + emit_insn (gen_vextract4b (op0_di, op1_v16qi, byte_off)); + } + + DONE; +} + [(set_attr "type" + "*, load, fpload, fpload, store, + fpstore, fpstore, fpstore, mftgpr, mffgpr, + veclogical") + + (set_attr "length" + "4, 4, 4, 4, 4, + 4, 4, 4, 12, 4, + 8")]) + +;; movsi_from_sf with zero extension +;; +;; RLDICL LWZ LFIWZX LXSIWZX VSX->GPR +;; MTVSRWZ VSX->VSX + +(define_insn_and_split "*movdi_from_sf_zero_ext" + [(set (match_operand:DI 0 "gpc_reg_operand" + "=r, r, ?*wI, ?*wH, r, + wIwH, ?wK") + + (zero_extend:DI + (unspec:SI [(match_operand:SF 1 "input_operand" + "r, m, Z, Z, wIwH, + r, wK")] + UNSPEC_SI_FROM_SF))) + + (clobber (match_scratch:V4SF 2 + "=X, X, X, X, wa, + X, wa"))] + + "TARGET_DIRECT_MOVE_64BIT + && (register_operand (operands[0], DImode) + || register_operand (operands[1], SImode))" + "@ + rldicl %0,%1,0,32 + lwz%U1%X1 %0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + # + mtvsrwz %x0,%1 + #" + "&& reload_completed + && vsx_reg_sfsubreg_ok (operands[1], SFmode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + + if (int_reg_operand (op0, DImode)) + { + emit_insn (gen_p8_mfvsrd_4_disf (op0, op2)); + emit_insn (gen_lshrdi3 (op0, op0, GEN_INT (32))); + } + else + { + rtx op0_si = gen_rtx_REG (SImode, REGNO (op0)); + rtx op1_v16qi = gen_rtx_REG (V16QImode, REGNO (op1)); + rtx byte_off = VECTOR_ELT_ORDER_BIG ? const0_rtx : GEN_INT (12); + emit_insn (gen_vextract4b (op0_si, op1_v16qi, byte_off)); + } + + DONE; +} + [(set_attr "type" + "*, load, fpload, fpload, mftgpr, + mffgpr, veclogical") + + (set_attr "length" + "4, 4, 4, 4, 12, + 4, 8")]) + +;; Split a load of a large constant into the appropriate two-insn +;; sequence. + +(define_split + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "(unsigned HOST_WIDE_INT) (INTVAL (operands[1]) + 0x8000) >= 0x10000 + && (INTVAL (operands[1]) & 0xffff) != 0" + [(set (match_dup 0) + (match_dup 2)) + (set (match_dup 0) + (ior:SI (match_dup 0) + (match_dup 3)))] + " +{ + if (rs6000_emit_set_const (operands[0], operands[1])) + DONE; + else + FAIL; +}") + +;; Split loading -128..127 to use XXSPLITB and VEXTSW2D +(define_split + [(set (match_operand:DI 0 "altivec_register_operand") + (match_operand:DI 1 "xxspltib_constant_split"))] + "TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR && reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int r = REGNO (op0); + rtx op0_v16qi = gen_rtx_REG (V16QImode, r); + + emit_insn (gen_xxspltib_v16qi (op0_v16qi, op1)); + emit_insn (gen_vsx_sign_extend_qi_si (operands[0], op0_v16qi)); + DONE; +}) + +(define_insn "*mov<mode>_internal2" + [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y") + (compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r") + (const_int 0))) + (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))] + "" + "@ + cmp<wd>i %2,%0,0 + mr. %0,%1 + #" + [(set_attr "type" "cmp,logical,cmp") + (set_attr "dot" "yes") + (set_attr "length" "4,4,8")]) + +(define_split + [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "") + (compare:CC (match_operand:P 1 "gpc_reg_operand" "") + (const_int 0))) + (set (match_operand:P 0 "gpc_reg_operand" "") (match_dup 1))] + "reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) + (compare:CC (match_dup 0) + (const_int 0)))] + "") + +(define_expand "mov<mode>" + [(set (match_operand:INT 0 "general_operand" "") + (match_operand:INT 1 "any_operand" ""))] + "" + "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }") + +;; MR LHZ/LBZ LXSI*ZX STH/STB STXSI*X LI +;; XXLOR load 0 load -1 VSPLTI* # MFVSRWZ +;; MTVSRWZ MF%1 MT%1 NOP +(define_insn "*mov<mode>_internal" + [(set (match_operand:QHI 0 "nonimmediate_operand" + "=r, r, ?*wJwK, m, Z, r, + ?*wJwK, ?*wJwK, ?*wJwK, ?*wK, ?*wK, r, + ?*wJwK, r, *c*l, *h") + + (match_operand:QHI 1 "input_operand" + "r, m, Z, r, wJwK, i, + wJwK, O, wM, wB, wS, ?*wJwK, + r, *h, r, 0"))] + + "gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode)" + "@ + mr %0,%1 + l<wd>z%U1%X1 %0,%1 + lxsi<wd>zx %x0,%y1 + st<wd>%U0%X0 %1,%0 + stxsi<wd>x %x1,%y0 + li %0,%1 + xxlor %x0,%x1,%x1 + xxspltib %x0,0 + xxspltib %x0,255 + vspltis<wd> %0,%1 + # + mfvsrwz %0,%x1 + mtvsrwz %x0,%1 + mf%1 %0 + mt%0 %1 + nop" + [(set_attr "type" + "*, load, fpload, store, fpstore, *, + vecsimple, vecperm, vecperm, vecperm, vecperm, mftgpr, + mffgpr, mfjmpr, mtjmpr, *") + + (set_attr "length" + "4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 8, 4, + 4, 4, 4, 4")]) + + +;; Here is how to move condition codes around. When we store CC data in +;; an integer register or memory, we store just the high-order 4 bits. +;; This lets us not shift in the most common case of CR0. +(define_expand "movcc" + [(set (match_operand:CC 0 "nonimmediate_operand" "") + (match_operand:CC 1 "nonimmediate_operand" ""))] + "" + "") + +(define_insn "*movcc_internal1" + [(set (match_operand:CC 0 "nonimmediate_operand" + "=y,x,?y,y,r,r,r,r,r,*c*l,r,m") + (match_operand:CC 1 "general_operand" + " y,r, r,O,x,y,r,I,h, r,m,r"))] + "register_operand (operands[0], CCmode) + || register_operand (operands[1], CCmode)" + "@ + mcrf %0,%1 + mtcrf 128,%1 + rlwinm %1,%1,%F0,0xffffffff\;mtcrf %R0,%1\;rlwinm %1,%1,%f0,0xffffffff + crxor %0,%0,%0 + mfcr %0%Q1 + mfcr %0%Q1\;rlwinm %0,%0,%f1,0xf0000000 + mr %0,%1 + li %0,%1 + mf%1 %0 + mt%0 %1 + lwz%U1%X1 %0,%1 + stw%U0%X0 %1,%0" + [(set (attr "type") + (cond [(eq_attr "alternative" "0,3") + (const_string "cr_logical") + (eq_attr "alternative" "1,2") + (const_string "mtcr") + (eq_attr "alternative" "6,7") + (const_string "integer") + (eq_attr "alternative" "8") + (const_string "mfjmpr") + (eq_attr "alternative" "9") + (const_string "mtjmpr") + (eq_attr "alternative" "10") + (const_string "load") + (eq_attr "alternative" "11") + (const_string "store") + (match_test "TARGET_MFCRF") + (const_string "mfcrf") + ] + (const_string "mfcr"))) + (set_attr "length" "4,4,12,4,4,8,4,4,4,4,4,4")]) + +;; For floating-point, we normally deal with the floating-point registers +;; unless -msoft-float is used. The sole exception is that parameter passing +;; can produce floating-point values in fixed-point registers. Unless the +;; value is a simple constant or already in memory, we deal with this by +;; allocating memory and copying the value explicitly via that memory location. + +;; Move 32-bit binary/decimal floating point +(define_expand "mov<mode>" + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "") + (match_operand:FMOVE32 1 "any_operand" ""))] + "<fmove_ok>" + "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }") + +(define_split + [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "") + (match_operand:FMOVE32 1 "const_double_operand" ""))] + "reload_completed + && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) <= 31))" + [(set (match_dup 2) (match_dup 3))] + " +{ + long l; + + <real_value_to_target> (*CONST_DOUBLE_REAL_VALUE (operands[1]), l); + + if (! TARGET_POWERPC64) + operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode); + else + operands[2] = gen_lowpart (SImode, operands[0]); + + operands[3] = gen_int_mode (l, SImode); +}") + +;; Originally, we tried to keep movsf and movsd common, but the differences +;; addressing was making it rather difficult to hide with mode attributes. In +;; particular for SFmode, on ISA 2.07 (power8) systems, having the GPR store +;; before the VSX stores meant that the register allocator would tend to do a +;; direct move to the GPR (which involves conversion from scalar to +;; vector/memory formats) to save values in the traditional Altivec registers, +;; while SDmode had problems on power6 if the GPR store was not first due to +;; the power6 not having an integer store operation. +;; +;; LWZ LFS LXSSP LXSSPX STFS STXSSP +;; STXSSPX STW XXLXOR LI FMR XSCPSGNDP +;; MR MT<x> MF<x> NOP + +(define_insn "movsf_hardfloat" + [(set (match_operand:SF 0 "nonimmediate_operand" + "=!r, f, wb, wu, m, wY, + Z, m, ww, !r, f, ww, + !r, *c*l, !r, *h") + (match_operand:SF 1 "input_operand" + "m, m, wY, Z, f, wb, + wu, r, j, j, f, ww, + r, r, *h, 0"))] + "(register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)) + && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && (TARGET_ALLOW_SF_SUBREG + || valid_sf_si_move (operands[0], operands[1], SFmode))" + "@ + lwz%U1%X1 %0,%1 + lfs%U1%X1 %0,%1 + lxssp %0,%1 + lxsspx %x0,%y1 + stfs%U0%X0 %1,%0 + stxssp %1,%0 + stxsspx %x1,%y0 + stw%U0%X0 %1,%0 + xxlxor %x0,%x0,%x0 + li %0,0 + fmr %0,%1 + xscpsgndp %x0,%x1,%x1 + mr %0,%1 + mt%0 %1 + mf%1 %0 + nop" + [(set_attr "type" + "load, fpload, fpload, fpload, fpstore, fpstore, + fpstore, store, veclogical, integer, fpsimple, fpsimple, + *, mtjmpr, mfjmpr, *")]) + +;; LWZ LFIWZX STW STFIWX MTVSRWZ MFVSRWZ +;; FMR MR MT%0 MF%1 NOP +(define_insn "movsd_hardfloat" + [(set (match_operand:SD 0 "nonimmediate_operand" + "=!r, wz, m, Z, ?wh, ?r, + f, !r, *c*l, !r, *h") + (match_operand:SD 1 "input_operand" + "m, Z, r, wx, r, wh, + f, r, r, *h, 0"))] + "(register_operand (operands[0], SDmode) + || register_operand (operands[1], SDmode)) + && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" + "@ + lwz%U1%X1 %0,%1 + lfiwzx %0,%y1 + stw%U0%X0 %1,%0 + stfiwx %1,%y0 + mtvsrwz %x0,%1 + mfvsrwz %0,%x1 + fmr %0,%1 + mr %0,%1 + mt%0 %1 + mf%1 %0 + nop" + [(set_attr "type" + "load, fpload, store, fpstore, mffgpr, mftgpr, + fpsimple, *, mtjmpr, mfjmpr, *")]) + +(define_insn "*mov<mode>_softfloat" + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h") + (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))] + "(gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode)) + && (TARGET_SOFT_FLOAT || !TARGET_FPRS)" + "@ + mr %0,%1 + mt%0 %1 + mf%1 %0 + lwz%U1%X1 %0,%1 + stw%U0%X0 %1,%0 + li %0,%1 + lis %0,%v1 + # + # + nop" + [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*") + (set_attr "length" "4,4,4,4,4,4,4,4,8,4")]) + +;; Like movsf, but adjust a SI value to be used in a SF context, i.e. +;; (set (reg:SF ...) (subreg:SF (reg:SI ...) 0)) +;; +;; Because SF values are actually stored as DF values within the vector +;; registers, we need to convert the value to the vector SF format when +;; we need to use the bits in a union or similar cases. We only need +;; to do this transformation when the value is a vector register. Loads, +;; stores, and transfers within GPRs are assumed to be safe. +;; +;; This is a more general case of reload_vsx_from_gprsf. That insn must have +;; no alternatives, because the call is created as part of secondary_reload, +;; and operand #2's register class is used to allocate the temporary register. +;; This function is called before reload, and it creates the temporary as +;; needed. + +;; LWZ LFS LXSSP LXSSPX STW STFIWX +;; STXSIWX GPR->VSX VSX->GPR GPR->GPR +(define_insn_and_split "movsf_from_si" + [(set (match_operand:SF 0 "rs6000_nonimmediate_operand" + "=!r, f, wb, wu, m, Z, + Z, wy, ?r, !r") + + (unspec:SF [(match_operand:SI 1 "input_operand" + "m, m, wY, Z, r, f, + wu, r, wy, r")] + UNSPEC_SF_FROM_SI)) + + (clobber (match_scratch:DI 2 + "=X, X, X, X, X, X, + X, r, X, X"))] + + "TARGET_NO_SF_SUBREG + && (register_operand (operands[0], SFmode) + || register_operand (operands[1], SImode))" + "@ + lwz%U1%X1 %0,%1 + lfs%U1%X1 %0,%1 + lxssp %0,%1 + lxsspx %x0,%y1 + stw%U0%X0 %1,%0 + stfiwx %1,%y0 + stxsiwx %x1,%y0 + # + mfvsrwz %0,%x1 + mr %0,%1" + + "&& reload_completed + && vsx_reg_sfsubreg_ok (operands[0], SFmode) + && int_reg_operand_not_pseudo (operands[1], SImode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + DONE; +} + [(set_attr "length" + "4, 4, 4, 4, 4, 4, + 4, 12, 4, 4") + (set_attr "type" + "load, fpload, fpload, fpload, store, fpstore, + fpstore, vecfloat, mffgpr, *")]) + + +;; Move 64-bit binary/decimal floating point +(define_expand "mov<mode>" + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "") + (match_operand:FMOVE64 1 "any_operand" ""))] + "" + "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }") + +(define_split + [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "") + (match_operand:FMOVE64 1 "const_int_operand" ""))] + "! TARGET_POWERPC64 && reload_completed + && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) <= 31))" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 1))] + " +{ + int endian = (WORDS_BIG_ENDIAN == 0); + HOST_WIDE_INT value = INTVAL (operands[1]); + + operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode); + operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode); + operands[4] = GEN_INT (value >> 32); + operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000); +}") + +(define_split + [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "") + (match_operand:FMOVE64 1 "const_double_operand" ""))] + "! TARGET_POWERPC64 && reload_completed + && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) <= 31))" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 5))] + " +{ + int endian = (WORDS_BIG_ENDIAN == 0); + long l[2]; + + <real_value_to_target> (*CONST_DOUBLE_REAL_VALUE (operands[1]), l); + + operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode); + operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode); + operands[4] = gen_int_mode (l[endian], SImode); + operands[5] = gen_int_mode (l[1 - endian], SImode); +}") + +(define_split + [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "") + (match_operand:FMOVE64 1 "const_double_operand" ""))] + "TARGET_POWERPC64 && reload_completed + && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) <= 31))" + [(set (match_dup 2) (match_dup 3))] + " +{ + int endian = (WORDS_BIG_ENDIAN == 0); + long l[2]; + HOST_WIDE_INT val; + + <real_value_to_target> (*CONST_DOUBLE_REAL_VALUE (operands[1]), l); + + operands[2] = gen_lowpart (DImode, operands[0]); + /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN. */ + val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32 + | ((HOST_WIDE_INT)(unsigned long)l[1 - endian])); + + operands[3] = gen_int_mode (val, DImode); +}") + +;; Don't have reload use general registers to load a constant. It is +;; less efficient than loading the constant into an FP register, since +;; it will probably be used there. + +;; The move constraints are ordered to prefer floating point registers before +;; general purpose registers to avoid doing a store and a load to get the value +;; into a floating point register when it is needed for a floating point +;; operation. Prefer traditional floating point registers over VSX registers, +;; since the D-form version of the memory instructions does not need a GPR for +;; reloading. ISA 3.0 (power9) adds D-form addressing for scalars to Altivec +;; registers. + +;; If we have FPR registers, rs6000_emit_move has moved all constants to memory, +;; except for 0.0 which can be created on VSX with an xor instruction. + +(define_insn "*mov<mode>_hardfloat32" + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_p9>,wY,<f64_vsx>,<f64_vsx>,!r,Y,r,!r") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,wY,<f64_p9>,<f64_vsx>,<zero_fp>,<zero_fp>,r,Y,r"))] + "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "@ + stfd%U0%X0 %1,%0 + lfd%U1%X1 %0,%1 + fmr %0,%1 + lxsd%U1x %x0,%y1 + stxsd%U0x %x1,%y0 + lxsd %0,%1 + stxsd %1,%0 + xxlor %x0,%x1,%x1 + xxlxor %x0,%x0,%x0 + # + # + # + #" + [(set_attr "type" "fpstore,fpload,fpsimple,fpload,fpstore,fpload,fpstore,veclogical,veclogical,two,store,load,two") + (set_attr "size" "64") + (set_attr "length" "4,4,4,4,4,4,4,4,4,8,8,8,8")]) + +(define_insn "*mov<mode>_softfloat32" + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r") + (match_operand:FMOVE64 1 "input_operand" "r,Y,r,G,H,F"))] + "! TARGET_POWERPC64 + && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) + || TARGET_SOFT_FLOAT || TARGET_E500_SINGLE + || (<MODE>mode == DDmode && TARGET_E500_DOUBLE)) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "#" + [(set_attr "type" "store,load,two,*,*,*") + (set_attr "length" "8,8,8,8,12,16")]) + +; ld/std require word-aligned displacements -> 'Y' constraint. +; List Y->r and r->Y before r->r for reload. +(define_insn "*mov<mode>_hardfloat64" + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_p9>,wY,<f64_av>,Z,<f64_vsx>,<f64_vsx>,!r,Y,r,!r,*c*l,!r,*h,r,wg,r,<f64_dm>") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,wY,<f64_p9>,Z,<f64_av>,<f64_vsx>,<zero_fp>,<zero_fp>,r,Y,r,r,h,0,wg,r,<f64_dm>,r"))] + "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "@ + stfd%U0%X0 %1,%0 + lfd%U1%X1 %0,%1 + fmr %0,%1 + lxsd %0,%1 + stxsd %1,%0 + lxsd%U1x %x0,%y1 + stxsd%U0x %x1,%y0 + xxlor %x0,%x1,%x1 + xxlxor %x0,%x0,%x0 + li %0,0 + std%U0%X0 %1,%0 + ld%U1%X1 %0,%1 + mr %0,%1 + mt%0 %1 + mf%1 %0 + nop + mftgpr %0,%1 + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" + [(set_attr "type" "fpstore,fpload,fpsimple,fpload,fpstore,fpload,fpstore,veclogical,veclogical,integer,store,load,*,mtjmpr,mfjmpr,*,mftgpr,mffgpr,mftgpr,mffgpr") + (set_attr "size" "64") + (set_attr "length" "4")]) + +(define_insn "*mov<mode>_softfloat64" + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h") + (match_operand:FMOVE64 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))] + "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "@ + std%U0%X0 %1,%0 + ld%U1%X1 %0,%1 + mr %0,%1 + mt%0 %1 + mf%1 %0 + # + # + # + nop" + [(set_attr "type" "store,load,*,mtjmpr,mfjmpr,*,*,*,*") + (set_attr "length" "4,4,4,4,4,8,12,16,4")]) + +(define_expand "mov<mode>" + [(set (match_operand:FMOVE128 0 "general_operand" "") + (match_operand:FMOVE128 1 "any_operand" ""))] + "" + "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }") + +;; It's important to list Y->r and r->Y before r->r because otherwise +;; reload, given m->r, will try to pick r->r and reload it, which +;; doesn't make progress. + +;; We can't split little endian direct moves of TDmode, because the words are +;; not swapped like they are for TImode or TFmode. Subregs therefore are +;; problematical. Don't allow direct move for this case. + +(define_insn_and_split "*mov<mode>_64bit_dm" + [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,d,Y,r,r,r,wh") + (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,<zero_fp>,r,<zero_fp>Y,r,wh,r"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 + && FLOAT128_2REG_P (<MODE>mode) + && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "#" + "&& reload_completed" + [(pc)] +{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; } + [(set_attr "length" "8,8,8,8,12,12,8,8,8")]) + +(define_insn_and_split "*movtd_64bit_nodm" + [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r") + (match_operand:TD 1 "input_operand" "d,m,d,r,Y,r"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && !WORDS_BIG_ENDIAN + && (gpc_reg_operand (operands[0], TDmode) + || gpc_reg_operand (operands[1], TDmode))" + "#" + "&& reload_completed" + [(pc)] +{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; } + [(set_attr "length" "8,8,8,12,12,8")]) + +(define_insn_and_split "*mov<mode>_32bit" + [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,d,Y,r,r") + (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,<zero_fp>,r,<zero_fp>Y,r"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64 + && (FLOAT128_2REG_P (<MODE>mode) + || int_reg_operand_not_pseudo (operands[0], <MODE>mode) + || int_reg_operand_not_pseudo (operands[1], <MODE>mode)) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "#" + "&& reload_completed" + [(pc)] +{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; } + [(set_attr "length" "8,8,8,8,20,20,16")]) + +(define_insn_and_split "*mov<mode>_softfloat" + [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r") + (match_operand:FMOVE128 1 "input_operand" "r,YGHF,r"))] + "(TARGET_SOFT_FLOAT || !TARGET_FPRS) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "#" + "&& reload_completed" + [(pc)] +{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; } + [(set_attr "length" "20,20,16")]) + +(define_expand "extenddf<mode>2" + [(set (match_operand:FLOAT128 0 "gpc_reg_operand" "") + (float_extend:FLOAT128 (match_operand:DF 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) + { + gcc_assert (<MODE>mode == TFmode); + emit_insn (gen_spe_extenddftf2 (operands[0], operands[1])); + } + else if (TARGET_VSX) + { + if (<MODE>mode == TFmode) + emit_insn (gen_extenddftf2_vsx (operands[0], operands[1])); + else if (<MODE>mode == IFmode) + emit_insn (gen_extenddfif2_vsx (operands[0], operands[1])); + else + gcc_unreachable (); + } + else + { + rtx zero = gen_reg_rtx (DFmode); + rs6000_emit_move (zero, CONST0_RTX (DFmode), DFmode); + + if (<MODE>mode == TFmode) + emit_insn (gen_extenddftf2_fprs (operands[0], operands[1], zero)); + else if (<MODE>mode == IFmode) + emit_insn (gen_extenddfif2_fprs (operands[0], operands[1], zero)); + else + gcc_unreachable (); + } + DONE; +}) + +;; Allow memory operands for the source to be created by the combiner. +(define_insn_and_split "extenddf<mode>2_fprs" + [(set (match_operand:IBM128 0 "gpc_reg_operand" "=d,d,&d") + (float_extend:IBM128 + (match_operand:DF 1 "nonimmediate_operand" "d,m,d"))) + (use (match_operand:DF 2 "nonimmediate_operand" "m,m,d"))] + "!TARGET_VSX && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && TARGET_LONG_DOUBLE_128 && FLOAT128_IBM_P (<MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (match_dup 2))] +{ + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + + operands[3] = simplify_gen_subreg (DFmode, operands[0], <MODE>mode, hi_word); + operands[4] = simplify_gen_subreg (DFmode, operands[0], <MODE>mode, lo_word); +}) + +(define_insn_and_split "extenddf<mode>2_vsx" + [(set (match_operand:IBM128 0 "gpc_reg_operand" "=d,d") + (float_extend:IBM128 + (match_operand:DF 1 "nonimmediate_operand" "ws,m")))] + "TARGET_LONG_DOUBLE_128 && TARGET_VSX && FLOAT128_IBM_P (<MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (match_dup 4))] +{ + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + + operands[2] = simplify_gen_subreg (DFmode, operands[0], <MODE>mode, hi_word); + operands[3] = simplify_gen_subreg (DFmode, operands[0], <MODE>mode, lo_word); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "extendsf<mode>2" + [(set (match_operand:FLOAT128 0 "gpc_reg_operand" "") + (float_extend:FLOAT128 (match_operand:SF 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else + { + rtx tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extendsfdf2 (tmp, operands[1])); + emit_insn (gen_extenddf<mode>2 (operands[0], tmp)); + } + DONE; +}) + +(define_expand "trunc<mode>df2" + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + { + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; + } +}) + +(define_insn_and_split "trunc<mode>df2_internal1" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d") + (float_truncate:DF + (match_operand:IBM128 1 "gpc_reg_operand" "0,d")))] + "FLOAT128_IBM_P (<MODE>mode) && !TARGET_XL_COMPAT + && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128" + "@ + # + fmr %0,%1" + "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])" + [(const_int 0)] +{ + emit_note (NOTE_INSN_DELETED); + DONE; +} + [(set_attr "type" "fpsimple")]) + +(define_insn "trunc<mode>df2_internal2" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "d")))] + "FLOAT128_IBM_P (<MODE>mode) && TARGET_XL_COMPAT && TARGET_HARD_FLOAT + && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128" + "fadd %0,%1,%L1" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_addsub_d")]) + +(define_expand "trunc<mode>sf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (float_truncate:SF (match_operand:FLOAT128 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) + { + gcc_assert (<MODE>mode == TFmode); + emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1])); + } + else if (<MODE>mode == TFmode) + emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1])); + else if (<MODE>mode == IFmode) + emit_insn (gen_truncifsf2_fprs (operands[0], operands[1])); + else + gcc_unreachable (); + DONE; +}) + +(define_insn_and_split "trunc<mode>sf2_fprs" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (float_truncate:SF (match_operand:IBM128 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DF 2 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && TARGET_LONG_DOUBLE_128 && FLOAT128_IBM_P (<MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 2) + (float_truncate:DF (match_dup 1))) + (set (match_dup 0) + (float_truncate:SF (match_dup 2)))] + "") + +(define_expand "floatsi<mode>2" + [(parallel [(set (match_operand:FLOAT128 0 "gpc_reg_operand") + (float:FLOAT128 (match_operand:SI 1 "gpc_reg_operand"))) + (clobber (match_scratch:DI 2))])] + "TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + if (TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)) + ; + else if (FLOAT128_IEEE_P (<MODE>mode)) + { + rs6000_expand_float128_convert (op0, op1, false); + DONE; + } + else + { + rtx tmp = gen_reg_rtx (DFmode); + expand_float (tmp, op1, false); + if (<MODE>mode == TFmode) + emit_insn (gen_extenddftf2 (op0, tmp)); + else if (<MODE>mode == IFmode) + emit_insn (gen_extenddfif2 (op0, tmp)); + else + gcc_unreachable (); + DONE; + } +}) + +; fadd, but rounding towards zero. +; This is probably not the optimal code sequence. +(define_insn "fix_trunc_helper<mode>" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec:DF [(match_operand:IBM128 1 "gpc_reg_operand" "d")] + UNSPEC_FIX_TRUNC_TF)) + (clobber (match_operand:DF 2 "gpc_reg_operand" "=&d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && FLOAT128_IBM_P (<MODE>mode)" + "mffs %2\n\tmtfsb1 31\n\tmtfsb0 30\n\tfadd %0,%1,%L1\n\tmtfsf 1,%2" + [(set_attr "type" "fp") + (set_attr "length" "20")]) + +(define_expand "fix_trunc<mode>si2" + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (fix:SI (match_operand:FLOAT128 1 "gpc_reg_operand" "")))] + "TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + if (TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)) + ; + else + { + if (FLOAT128_IEEE_P (<MODE>mode)) + rs6000_expand_float128_convert (op0, op1, false); + else if (TARGET_E500_DOUBLE && <MODE>mode == TFmode) + emit_insn (gen_spe_fix_trunctfsi2 (op0, op1)); + else if (<MODE>mode == TFmode) + emit_insn (gen_fix_trunctfsi2_fprs (op0, op1)); + else if (<MODE>mode == IFmode) + emit_insn (gen_fix_truncifsi2_fprs (op0, op1)); + else + gcc_unreachable (); + DONE; + } +}) + +(define_expand "fix_trunc<mode>si2_fprs" + [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "") + (fix:SI (match_operand:IBM128 1 "gpc_reg_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (clobber (match_dup 5))])] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128" +{ + operands[2] = gen_reg_rtx (DFmode); + operands[3] = gen_reg_rtx (DFmode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = assign_stack_temp (DImode, GET_MODE_SIZE (DImode)); +}) + +(define_insn_and_split "*fix_trunc<mode>si2_internal" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (fix:SI (match_operand:IBM128 1 "gpc_reg_operand" "d"))) + (clobber (match_operand:DF 2 "gpc_reg_operand" "=d")) + (clobber (match_operand:DF 3 "gpc_reg_operand" "=&d")) + (clobber (match_operand:DI 4 "gpc_reg_operand" "=d")) + (clobber (match_operand:DI 5 "offsettable_mem_operand" "=o"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128" + "#" + "" + [(pc)] +{ + rtx lowword; + emit_insn (gen_fix_trunc_helper<mode> (operands[2], operands[1], + operands[3])); + + gcc_assert (MEM_P (operands[5])); + lowword = adjust_address (operands[5], SImode, WORDS_BIG_ENDIAN ? 4 : 0); + + emit_insn (gen_fctiwz_df (operands[4], operands[2])); + emit_move_insn (operands[5], operands[4]); + emit_move_insn (operands[0], lowword); + DONE; +}) + +(define_expand "fix_trunc<mode>di2" + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (fix:DI (match_operand:IEEE128 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + if (!TARGET_FLOAT128_HW) + { + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; + } +}) + +(define_expand "fixuns_trunc<IEEE128:mode><SDI:mode>2" + [(set (match_operand:SDI 0 "gpc_reg_operand" "") + (unsigned_fix:SDI (match_operand:IEEE128 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + +(define_expand "floatdi<mode>2" + [(set (match_operand:IEEE128 0 "gpc_reg_operand" "") + (float:IEEE128 (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + if (!TARGET_FLOAT128_HW) + { + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; + } +}) + +(define_expand "floatunsdi<IEEE128:mode>2" + [(set (match_operand:IEEE128 0 "gpc_reg_operand" "") + (unsigned_float:IEEE128 (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + if (!TARGET_FLOAT128_HW) + { + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; + } +}) + +(define_expand "floatuns<IEEE128:mode>2" + [(set (match_operand:IEEE128 0 "gpc_reg_operand" "") + (unsigned_float:IEEE128 (match_operand:SI 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + if (TARGET_FLOAT128_HW) + emit_insn (gen_floatuns_<IEEE128:mode>si2_hw (op0, op1)); + else + rs6000_expand_float128_convert (op0, op1, true); + DONE; +}) + +(define_expand "neg<mode>2" + [(set (match_operand:FLOAT128 0 "gpc_reg_operand" "") + (neg:FLOAT128 (match_operand:FLOAT128 1 "gpc_reg_operand" "")))] + "FLOAT128_IEEE_P (<MODE>mode) + || (FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE))" + " +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + { + if (TARGET_FLOAT128_HW) + { + if (<MODE>mode == TFmode) + emit_insn (gen_negtf2_hw (operands[0], operands[1])); + else if (<MODE>mode == KFmode) + emit_insn (gen_negkf2_hw (operands[0], operands[1])); + else + gcc_unreachable (); + } + else if (TARGET_FLOAT128_TYPE) + { + if (<MODE>mode == TFmode) + emit_insn (gen_ieee_128bit_vsx_negtf2 (operands[0], operands[1])); + else if (<MODE>mode == KFmode) + emit_insn (gen_ieee_128bit_vsx_negkf2 (operands[0], operands[1])); + else + gcc_unreachable (); + } + else + { + rtx libfunc = optab_libfunc (neg_optab, <MODE>mode); + rtx target = emit_library_call_value (libfunc, operands[0], LCT_CONST, + <MODE>mode, 1, + operands[1], <MODE>mode); + + if (target && !rtx_equal_p (target, operands[0])) + emit_move_insn (operands[0], target); + } + DONE; + } +}") + +(define_insn "neg<mode>2_internal" + [(set (match_operand:IBM128 0 "gpc_reg_operand" "=d") + (neg:IBM128 (match_operand:IBM128 1 "gpc_reg_operand" "d")))] + "TARGET_HARD_FLOAT && TARGET_FPRS && FLOAT128_IBM_P (TFmode)" + "* +{ + if (REGNO (operands[0]) == REGNO (operands[1]) + 1) + return \"fneg %L0,%L1\;fneg %0,%1\"; + else + return \"fneg %0,%1\;fneg %L0,%L1\"; +}" + [(set_attr "type" "fpsimple") + (set_attr "length" "8")]) + +(define_expand "abs<mode>2" + [(set (match_operand:FLOAT128 0 "gpc_reg_operand" "") + (abs:FLOAT128 (match_operand:FLOAT128 1 "gpc_reg_operand" "")))] + "FLOAT128_IEEE_P (<MODE>mode) + || (FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE))" + " +{ + rtx label; + + if (FLOAT128_IEEE_P (<MODE>mode)) + { + if (TARGET_FLOAT128_HW) + { + if (<MODE>mode == TFmode) + emit_insn (gen_abstf2_hw (operands[0], operands[1])); + else if (<MODE>mode == KFmode) + emit_insn (gen_abskf2_hw (operands[0], operands[1])); + else + FAIL; + DONE; + } + else if (TARGET_FLOAT128_TYPE) + { + if (<MODE>mode == TFmode) + emit_insn (gen_ieee_128bit_vsx_abstf2 (operands[0], operands[1])); + else if (<MODE>mode == KFmode) + emit_insn (gen_ieee_128bit_vsx_abskf2 (operands[0], operands[1])); + else + FAIL; + DONE; + } + else + FAIL; + } + + label = gen_label_rtx (); + if (TARGET_E500_DOUBLE && <MODE>mode == TFmode) + { + if (flag_finite_math_only && !flag_trapping_math) + emit_insn (gen_spe_abstf2_tst (operands[0], operands[1], label)); + else + emit_insn (gen_spe_abstf2_cmp (operands[0], operands[1], label)); + } + else if (<MODE>mode == TFmode) + emit_insn (gen_abstf2_internal (operands[0], operands[1], label)); + else if (<MODE>mode == TFmode) + emit_insn (gen_absif2_internal (operands[0], operands[1], label)); + else + FAIL; + emit_label (label); + DONE; +}") + +(define_expand "abs<mode>2_internal" + [(set (match_operand:IBM128 0 "gpc_reg_operand" "") + (match_operand:IBM128 1 "gpc_reg_operand" "")) + (set (match_dup 3) (match_dup 5)) + (set (match_dup 5) (abs:DF (match_dup 5))) + (set (match_dup 4) (compare:CCFP (match_dup 3) (match_dup 5))) + (set (pc) (if_then_else (eq (match_dup 4) (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc))) + (set (match_dup 6) (neg:DF (match_dup 6)))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && TARGET_LONG_DOUBLE_128" + " +{ + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + operands[3] = gen_reg_rtx (DFmode); + operands[4] = gen_reg_rtx (CCFPmode); + operands[5] = simplify_gen_subreg (DFmode, operands[0], <MODE>mode, hi_word); + operands[6] = simplify_gen_subreg (DFmode, operands[0], <MODE>mode, lo_word); +}") + + +;; Generate IEEE 128-bit -0.0 (0x80000000000000000000000000000000) in a vector +;; register + +(define_expand "ieee_128bit_negative_zero" + [(set (match_operand:V16QI 0 "register_operand" "") (match_dup 1))] + "TARGET_FLOAT128_TYPE" +{ + rtvec v = rtvec_alloc (16); + int i, high; + + for (i = 0; i < 16; i++) + RTVEC_ELT (v, i) = const0_rtx; + + high = (BYTES_BIG_ENDIAN) ? 0 : 15; + RTVEC_ELT (v, high) = GEN_INT (0x80); + + rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v)); + DONE; +}) + +;; IEEE 128-bit negate + +;; We have 2 insns here for negate and absolute value. The first uses +;; match_scratch so that phases like combine can recognize neg/abs as generic +;; insns, and second insn after the first split pass loads up the bit to +;; twiddle the sign bit. Later GCSE passes can then combine multiple uses of +;; neg/abs to create the constant just once. + +(define_insn_and_split "ieee_128bit_vsx_neg<mode>2" + [(set (match_operand:IEEE128 0 "register_operand" "=wa") + (neg:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (neg:IEEE128 (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_neg<mode>2_internal" + [(set (match_operand:IEEE128 0 "register_operand" "=wa") + (neg:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) + (use (match_operand:V16QI 2 "register_operand" "v"))] + "TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW" + "xxlxor %x0,%x1,%x2" + [(set_attr "type" "veclogical")]) + +;; IEEE 128-bit absolute value +(define_insn_and_split "ieee_128bit_vsx_abs<mode>2" + [(set (match_operand:IEEE128 0 "register_operand" "=wa") + (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (abs:IEEE128 (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_abs<mode>2_internal" + [(set (match_operand:IEEE128 0 "register_operand" "=wa") + (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) + (use (match_operand:V16QI 2 "register_operand" "v"))] + "TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW" + "xxlandc %x0,%x1,%x2" + [(set_attr "type" "veclogical")]) + +;; IEEE 128-bit negative absolute value +(define_insn_and_split "*ieee_128bit_vsx_nabs<mode>2" + [(set (match_operand:IEEE128 0 "register_operand" "=wa") + (neg:IEEE128 + (abs:IEEE128 + (match_operand:IEEE128 1 "register_operand" "wa")))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW + && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (neg:IEEE128 (abs:IEEE128 (match_dup 1)))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_nabs<mode>2_internal" + [(set (match_operand:IEEE128 0 "register_operand" "=wa") + (neg:IEEE128 + (abs:IEEE128 + (match_operand:IEEE128 1 "register_operand" "wa")))) + (use (match_operand:V16QI 2 "register_operand" "v"))] + "TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW" + "xxlor %x0,%x1,%x2" + [(set_attr "type" "veclogical")]) + +;; Float128 conversion functions. These expand to library function calls. +;; We use expand to convert from IBM double double to IEEE 128-bit +;; and trunc for the opposite. +(define_expand "extendiftf2" + [(set (match_operand:TF 0 "gpc_reg_operand" "") + (float_extend:TF (match_operand:IF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "extendifkf2" + [(set (match_operand:KF 0 "gpc_reg_operand" "") + (float_extend:KF (match_operand:IF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "extendtfkf2" + [(set (match_operand:KF 0 "gpc_reg_operand" "") + (float_extend:KF (match_operand:TF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "trunciftf2" + [(set (match_operand:IF 0 "gpc_reg_operand" "") + (float_truncate:IF (match_operand:TF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "truncifkf2" + [(set (match_operand:IF 0 "gpc_reg_operand" "") + (float_truncate:IF (match_operand:KF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "trunckftf2" + [(set (match_operand:TF 0 "gpc_reg_operand" "") + (float_truncate:TF (match_operand:KF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "trunctfif2" + [(set (match_operand:IF 0 "gpc_reg_operand" "") + (float_truncate:IF (match_operand:TF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128_TYPE" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + + +;; Reload helper functions used by rs6000_secondary_reload. The patterns all +;; must have 3 arguments, and scratch register constraint must be a single +;; constraint. + +;; Reload patterns to support gpr load/store with misaligned mem. +;; and multiple gpr load/store at offset >= 0xfffc +(define_expand "reload_<mode>_store" + [(parallel [(match_operand 0 "memory_operand" "=m") + (match_operand 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "register_operand" "=&b")])] + "" +{ + rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true); + DONE; +}) + +(define_expand "reload_<mode>_load" + [(parallel [(match_operand 0 "gpc_reg_operand" "=r") + (match_operand 1 "memory_operand" "m") + (match_operand:GPR 2 "register_operand" "=b")])] + "" +{ + rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false); + DONE; +}) + + +;; Reload patterns for various types using the vector registers. We may need +;; an additional base register to convert the reg+offset addressing to reg+reg +;; for vector registers and reg+reg or (reg+reg)&(-16) addressing to just an +;; index register for gpr registers. +(define_expand "reload_<RELOAD:mode>_<P:mptrsize>_store" + [(parallel [(match_operand:RELOAD 0 "memory_operand" "m") + (match_operand:RELOAD 1 "gpc_reg_operand" "wa") + (match_operand:P 2 "register_operand" "=b")])] + "<P:tptrsize>" +{ + rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true); + DONE; +}) + +(define_expand "reload_<RELOAD:mode>_<P:mptrsize>_load" + [(parallel [(match_operand:RELOAD 0 "gpc_reg_operand" "wa") + (match_operand:RELOAD 1 "memory_operand" "m") + (match_operand:P 2 "register_operand" "=b")])] + "<P:tptrsize>" +{ + rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false); + DONE; +}) + + +;; Reload sometimes tries to move the address to a GPR, and can generate +;; invalid RTL for addresses involving AND -16. Allow addresses involving +;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16. + +(define_insn_and_split "*vec_reload_and_plus_<mptrsize>" + [(set (match_operand:P 0 "gpc_reg_operand" "=b") + (and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "reg_or_cint_operand" "rI")) + (const_int -16)))] + "TARGET_ALTIVEC && (reload_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(set (match_dup 0) + (plus:P (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (and:P (match_dup 0) + (const_int -16)))]) + +;; Power8 merge instructions to allow direct move to/from floating point +;; registers in 32-bit mode. We use TF mode to get two registers to move the +;; individual 32-bit parts across. Subreg doesn't work too well on the TF +;; value, since it is allocated in reload and not all of the flow information +;; is setup for it. We have two patterns to do the two moves between gprs and +;; fprs. There isn't a dependancy between the two, but we could potentially +;; schedule other instructions between the two instructions. + +(define_insn "p8_fmrgow_<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=d") + (unspec:FMOVE64X [ + (match_operand:DF 1 "register_operand" "d") + (match_operand:DF 2 "register_operand" "d")] + UNSPEC_P8V_FMRGOW))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "fmrgow %0,%1,%2" + [(set_attr "type" "fpsimple")]) + +(define_insn "p8_mtvsrwz" + [(set (match_operand:DF 0 "register_operand" "=d") + (unspec:DF [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %x0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_fpr_from_gpr<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=d") + (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:IF 2 "register_operand" "=d"))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp_hi = simplify_gen_subreg (DFmode, operands[2], IFmode, 0); + rtx tmp_lo = simplify_gen_subreg (DFmode, operands[2], IFmode, 8); + rtx gpr_hi_reg = gen_highpart (SImode, src); + rtx gpr_lo_reg = gen_lowpart (SImode, src); + + emit_insn (gen_p8_mtvsrwz (tmp_hi, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrwz (tmp_lo, gpr_lo_reg)); + emit_insn (gen_p8_fmrgow_<mode> (dest, tmp_hi, tmp_lo)); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move 128 bit values from GPRs to VSX registers in 64-bit mode +(define_insn "p8_mtvsrd_df" + [(set (match_operand:DF 0 "register_operand" "=wa") + (unspec:DF [(match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %x0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_xxpermdi_<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR [ + (match_operand:DF 1 "register_operand" "wa") + (match_operand:DF 2 "register_operand" "wa")] + UNSPEC_P8V_XXPERMDI))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "xxpermdi %x0,%x1,%x2,0" + [(set_attr "type" "vecperm")]) + +(define_insn_and_split "reload_vsx_from_gpr<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:IF 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + /* You might think that we could use op0 as one temp and a DF clobber + as op2, but you'd be wrong. Secondary reload move patterns don't + check for overlap of the clobber and the destination. */ + rtx tmp_hi = simplify_gen_subreg (DFmode, operands[2], IFmode, 0); + rtx tmp_lo = simplify_gen_subreg (DFmode, operands[2], IFmode, 8); + rtx gpr_hi_reg = gen_highpart (DImode, src); + rtx gpr_lo_reg = gen_lowpart (DImode, src); + + emit_insn (gen_p8_mtvsrd_df (tmp_hi, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrd_df (tmp_lo, gpr_lo_reg)); + emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp_hi, tmp_lo)); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +(define_split + [(set (match_operand:FMOVE128_GPR 0 "nonimmediate_operand" "") + (match_operand:FMOVE128_GPR 1 "input_operand" ""))] + "reload_completed + && (int_reg_operand (operands[0], <MODE>mode) + || int_reg_operand (operands[1], <MODE>mode)) + && (!TARGET_DIRECT_MOVE_128 + || (!vsx_register_operand (operands[0], <MODE>mode) + && !vsx_register_operand (operands[1], <MODE>mode)))" + [(pc)] +{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) + +;; Move SFmode to a VSX from a GPR register. Because scalar floating point +;; type is stored internally as double precision in the VSX registers, we have +;; to convert it from the vector format. +(define_insn "p8_mtvsrd_sf" + [(set (match_operand:SF 0 "register_operand" "=wa") + (unspec:SF [(match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %x0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_vsx_from_gprsf" + [(set (match_operand:SF 0 "register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:DI 2 "register_operand" "=r"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "two")]) + +;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a +;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value, +;; and then doing a move of that. +(define_insn "p8_mfvsrd_3_<mode>" + [(set (match_operand:DF 0 "register_operand" "=r") + (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_gpr_from_vsx<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DFmode, dest); + rtx gpr_lo_reg = gen_lowpart (DFmode, dest); + + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src)); + emit_insn (gen_vsx_xxpermdi_<mode>_be (tmp, src, src, GEN_INT (3))); + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp)); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a GPR from a VSX register. Because scalar floating point +;; type is stored internally as double precision, we have to convert it to the +;; vector format. + +(define_insn_and_split "reload_gpr_from_vsxsf" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:V4SF 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0); + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2)); + emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32))); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +(define_insn "p8_mfvsrd_4_disf" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + + +;; Next come the multi-word integer load and store and the load and store +;; multiple insns. + +;; List r->r after r->Y, otherwise reload will try to reload a +;; non-offsettable address by using r->r which won't make progress. +;; Use of fprs is disparaged slightly otherwise reload prefers to reload +;; a gpr into a fpr instead of reloading an invalid 'Y' address + +;; GPR store GPR load GPR move FPR store FPR load FPR move +;; GPR const AVX store AVX store AVX load AVX load VSX move +;; P9 0 P9 -1 AVX 0/-1 VSX 0 VSX -1 P9 const +;; AVX const + +(define_insn "*movdi_internal32" + [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" + "=Y, r, r, ^m, ^d, ^d, + r, ^wY, $Z, ^wb, $wv, ^wi, + *wo, *wo, *wv, *wi, *wi, *wv, + *wv") + + (match_operand:DI 1 "input_operand" + "r, Y, r, d, m, d, + IJKnGHF, wb, wv, wY, Z, wi, + Oj, wM, OjwM, Oj, wM, wS, + wB"))] + + "! TARGET_POWERPC64 + && (gpc_reg_operand (operands[0], DImode) + || gpc_reg_operand (operands[1], DImode))" + "@ + # + # + # + stfd%U0%X0 %1,%0 + lfd%U1%X1 %0,%1 + fmr %0,%1 + # + stxsd %1,%0 + stxsdx %x1,%y0 + lxsd %0,%1 + lxsdx %x0,%y1 + xxlor %x0,%x1,%x1 + xxspltib %x0,0 + xxspltib %x0,255 + vspltisw %0,%1 + xxlxor %x0,%x0,%x0 + xxlorc %x0,%x0,%x0 + # + #" + [(set_attr "type" + "store, load, *, fpstore, fpload, fpsimple, + *, fpstore, fpstore, fpload, fpload, veclogical, + vecsimple, vecsimple, vecsimple, veclogical, veclogical, vecsimple, + vecsimple") + (set_attr "size" "64")]) + +(define_split + [(set (match_operand:DI 0 "gpc_reg_operand" "") + (match_operand:DI 1 "const_int_operand" ""))] + "! TARGET_POWERPC64 && reload_completed + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 1))] + " +{ + HOST_WIDE_INT value = INTVAL (operands[1]); + operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0, + DImode); + operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0, + DImode); + operands[4] = GEN_INT (value >> 32); + operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000); +}") + +(define_split + [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "") + (match_operand:DIFD 1 "input_operand" ""))] + "reload_completed && !TARGET_POWERPC64 + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" + [(pc)] +{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) + +;; GPR store GPR load GPR move GPR li GPR lis GPR # +;; FPR store FPR load FPR move AVX store AVX store AVX load +;; AVX load VSX move P9 0 P9 -1 AVX 0/-1 VSX 0 +;; VSX -1 P9 const AVX const From SPR To SPR SPR<->SPR +;; FPR->GPR GPR->FPR VSX->GPR GPR->VSX +(define_insn "*movdi_internal64" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=Y, r, r, r, r, r, + ^m, ^d, ^d, ^wY, $Z, $wb, + $wv, ^wi, *wo, *wo, *wv, *wi, + *wi, *wv, *wv, r, *h, *h, + ?*r, ?*wg, ?*r, ?*wj") + + (match_operand:DI 1 "input_operand" + "r, Y, r, I, L, nF, + d, m, d, wb, wv, wY, + Z, wi, Oj, wM, OjwM, Oj, + wM, wS, wB, *h, r, 0, + wg, r, wj, r"))] + + "TARGET_POWERPC64 + && (gpc_reg_operand (operands[0], DImode) + || gpc_reg_operand (operands[1], DImode))" + "@ + std%U0%X0 %1,%0 + ld%U1%X1 %0,%1 + mr %0,%1 + li %0,%1 + lis %0,%v1 + # + stfd%U0%X0 %1,%0 + lfd%U1%X1 %0,%1 + fmr %0,%1 + stxsd %1,%0 + stxsdx %x1,%y0 + lxsd %0,%1 + lxsdx %x0,%y1 + xxlor %x0,%x1,%x1 + xxspltib %x0,0 + xxspltib %x0,255 + # + xxlxor %x0,%x0,%x0 + xxlorc %x0,%x0,%x0 + # + # + mf%1 %0 + mt%0 %1 + nop + mftgpr %0,%1 + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" + [(set_attr "type" + "store, load, *, *, *, *, + fpstore, fpload, fpsimple, fpstore, fpstore, fpload, + fpload, veclogical, vecsimple, vecsimple, vecsimple, veclogical, + veclogical, vecsimple, vecsimple, mfjmpr, mtjmpr, *, + mftgpr, mffgpr, mftgpr, mffgpr") + + (set_attr "size" "64") + (set_attr "length" + "4, 4, 4, 4, 4, 20, + 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 8, + 8, 4, 4, 4, 4, 4, + 4, 4, 4, 4")]) + +; Some DImode loads are best done as a load of -1 followed by a mask +; instruction. +(define_split + [(set (match_operand:DI 0 "int_reg_operand_not_pseudo") + (match_operand:DI 1 "const_int_operand"))] + "TARGET_POWERPC64 + && num_insns_constant (operands[1], DImode) > 1 + && !IN_RANGE (INTVAL (operands[1]), -0x80000000, 0xffffffff) + && rs6000_is_valid_and_mask (operands[1], DImode)" + [(set (match_dup 0) + (const_int -1)) + (set (match_dup 0) + (and:DI (match_dup 0) + (match_dup 1)))] + "") + +;; Split a load of a large constant into the appropriate five-instruction +;; sequence. Handle anything in a constant number of insns. +;; When non-easy constants can go in the TOC, this should use +;; easy_fp_constant predicate. +(define_split + [(set (match_operand:DI 0 "int_reg_operand_not_pseudo" "") + (match_operand:DI 1 "const_int_operand" ""))] + "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))] + " +{ + if (rs6000_emit_set_const (operands[0], operands[1])) + DONE; + else + FAIL; +}") + +(define_split + [(set (match_operand:DI 0 "int_reg_operand_not_pseudo" "") + (match_operand:DI 1 "const_scalar_int_operand" ""))] + "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))] + " +{ + if (rs6000_emit_set_const (operands[0], operands[1])) + DONE; + else + FAIL; +}") + +(define_split + [(set (match_operand:DI 0 "altivec_register_operand" "") + (match_operand:DI 1 "s5bit_cint_operand" ""))] + "TARGET_UPPER_REGS_DI && TARGET_VSX && reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int r = REGNO (op0); + rtx op0_v4si = gen_rtx_REG (V4SImode, r); + + emit_insn (gen_altivec_vspltisw (op0_v4si, op1)); + if (op1 != const0_rtx && op1 != constm1_rtx) + { + rtx op0_v2di = gen_rtx_REG (V2DImode, r); + emit_insn (gen_altivec_vupkhsw (op0_v2di, op0_v4si)); + } + DONE; +}) + +;; Split integer constants that can be loaded with XXSPLTIB and a +;; sign extend operation. +(define_split + [(set (match_operand:INT_ISA3 0 "altivec_register_operand" "") + (match_operand:INT_ISA3 1 "xxspltib_constant_split" ""))] + "TARGET_UPPER_REGS_DI && TARGET_P9_VECTOR && reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int r = REGNO (op0); + rtx op0_v16qi = gen_rtx_REG (V16QImode, r); + + emit_insn (gen_xxspltib_v16qi (op0_v16qi, op1)); + if (<MODE>mode == DImode) + emit_insn (gen_vsx_sign_extend_qi_di (operands[0], op0_v16qi)); + else if (<MODE>mode == SImode) + emit_insn (gen_vsx_sign_extend_qi_si (operands[0], op0_v16qi)); + else if (<MODE>mode == HImode) + { + rtx op0_v8hi = gen_rtx_REG (V8HImode, r); + emit_insn (gen_altivec_vupkhsb (op0_v8hi, op0_v16qi)); + } + DONE; +}) + + +;; TImode/PTImode is similar, except that we usually want to compute the +;; address into a register and use lsi/stsi (the exception is during reload). + +(define_insn "*mov<mode>_string" + [(set (match_operand:TI2 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r") + (match_operand:TI2 1 "input_operand" "r,r,Q,Y,r,n"))] + "! TARGET_POWERPC64 + && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode)) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" + "* +{ + switch (which_alternative) + { + default: + gcc_unreachable (); + case 0: + if (TARGET_STRING) + return \"stswi %1,%P0,16\"; + /* FALLTHRU */ + case 1: + return \"#\"; + case 2: + /* If the address is not used in the output, we can use lsi. Otherwise, + fall through to generating four loads. */ + if (TARGET_STRING + && ! reg_overlap_mentioned_p (operands[0], operands[1])) + return \"lswi %0,%P1,16\"; + /* fall through */ + case 3: + case 4: + case 5: + return \"#\"; + } +}" + [(set_attr "type" "store,store,load,load,*,*") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING") + (const_string "always") + (const_string "conditional")))]) + +(define_insn "*mov<mode>_ppc64" + [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r") + (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))] + "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode)))" +{ + return rs6000_output_move_128bit (operands); +} + [(set_attr "type" "store,store,load,load,*,*") + (set_attr "length" "8")]) + +(define_split + [(set (match_operand:TI2 0 "int_reg_operand" "") + (match_operand:TI2 1 "const_scalar_int_operand" ""))] + "TARGET_POWERPC64 + && (VECTOR_MEM_NONE_P (<MODE>mode) + || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 3) (match_dup 5))] + " +{ + operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0, + <MODE>mode); + operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0, + <MODE>mode); + if (CONST_WIDE_INT_P (operands[1])) + { + operands[4] = GEN_INT (CONST_WIDE_INT_ELT (operands[1], 1)); + operands[5] = GEN_INT (CONST_WIDE_INT_ELT (operands[1], 0)); + } + else if (CONST_INT_P (operands[1])) + { + operands[4] = GEN_INT (- (INTVAL (operands[1]) < 0)); + operands[5] = operands[1]; + } + else + FAIL; +}") + +(define_split + [(set (match_operand:TI2 0 "nonimmediate_operand" "") + (match_operand:TI2 1 "input_operand" ""))] + "reload_completed + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" + [(pc)] +{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) + +(define_expand "load_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (use (match_operand:SI 2 "" ""))])] + "TARGET_STRING && !TARGET_POWERPC64" + " +{ + int regno; + int count; + rtx op1; + int i; + + /* Support only loading a constant number of fixed-point registers from + memory and only bother with this if more than two; the machine + doesn't support more than eight. */ + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) <= 2 + || INTVAL (operands[2]) > 8 + || GET_CODE (operands[1]) != MEM + || GET_CODE (operands[0]) != REG + || REGNO (operands[0]) >= 32) + FAIL; + + count = INTVAL (operands[2]); + regno = REGNO (operands[0]); + + operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); + op1 = replace_equiv_address (operands[1], + force_reg (SImode, XEXP (operands[1], 0))); + + for (i = 0; i < count; i++) + XVECEXP (operands[3], 0, i) + = gen_rtx_SET (gen_rtx_REG (SImode, regno + i), + adjust_address_nv (op1, SImode, i * 4)); +}") + +(define_insn "*ldmsi8" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "gpc_reg_operand" "") + (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))) + (set (match_operand:SI 3 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16)))) + (set (match_operand:SI 7 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 20)))) + (set (match_operand:SI 8 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 24)))) + (set (match_operand:SI 9 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 28))))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 8" + "* +{ return rs6000_output_load_multiple (operands); }" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "length" "32")]) + +(define_insn "*ldmsi7" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "gpc_reg_operand" "") + (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))) + (set (match_operand:SI 3 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16)))) + (set (match_operand:SI 7 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 20)))) + (set (match_operand:SI 8 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 24))))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 7" + "* +{ return rs6000_output_load_multiple (operands); }" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "length" "32")]) + +(define_insn "*ldmsi6" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "gpc_reg_operand" "") + (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))) + (set (match_operand:SI 3 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16)))) + (set (match_operand:SI 7 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 20))))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 6" + "* +{ return rs6000_output_load_multiple (operands); }" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "length" "32")]) + +(define_insn "*ldmsi5" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "gpc_reg_operand" "") + (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))) + (set (match_operand:SI 3 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16))))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 5" + "* +{ return rs6000_output_load_multiple (operands); }" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "length" "32")]) + +(define_insn "*ldmsi4" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "gpc_reg_operand" "") + (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))) + (set (match_operand:SI 3 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12))))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 4" + "* +{ return rs6000_output_load_multiple (operands); }" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "length" "32")]) + +(define_insn "*ldmsi3" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "gpc_reg_operand" "") + (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))) + (set (match_operand:SI 3 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "gpc_reg_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8))))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 3" + "* +{ return rs6000_output_load_multiple (operands); }" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "length" "32")]) + +(define_expand "store_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (clobber (scratch:SI)) + (use (match_operand:SI 2 "" ""))])] + "TARGET_STRING && !TARGET_POWERPC64" + " +{ + int regno; + int count; + rtx to; + rtx op0; + int i; + + /* Support only storing a constant number of fixed-point registers to + memory and only bother with this if more than two; the machine + doesn't support more than eight. */ + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) <= 2 + || INTVAL (operands[2]) > 8 + || GET_CODE (operands[0]) != MEM + || GET_CODE (operands[1]) != REG + || REGNO (operands[1]) >= 32) + FAIL; + + count = INTVAL (operands[2]); + regno = REGNO (operands[1]); + + operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1)); + to = force_reg (SImode, XEXP (operands[0], 0)); + op0 = replace_equiv_address (operands[0], to); + + XVECEXP (operands[3], 0, 0) + = gen_rtx_SET (adjust_address_nv (op0, SImode, 0), operands[1]); + XVECEXP (operands[3], 0, 1) = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_SCRATCH (SImode)); + + for (i = 1; i < count; i++) + XVECEXP (operands[3], 0, i + 1) + = gen_rtx_SET (adjust_address_nv (op0, SImode, i * 4), + gen_rtx_REG (SImode, regno + i)); +}") + +(define_insn "*stmsi8" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")) + (match_operand:SI 2 "gpc_reg_operand" "r")) + (clobber (match_scratch:SI 3 "=X")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 4 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 5 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 6 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 7 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) + (match_operand:SI 8 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) + (match_operand:SI 9 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) + (match_operand:SI 10 "gpc_reg_operand" "r"))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 9" + "stswi %2,%1,%O0" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always")]) + +(define_insn "*stmsi7" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")) + (match_operand:SI 2 "gpc_reg_operand" "r")) + (clobber (match_scratch:SI 3 "=X")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 4 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 5 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 6 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 7 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) + (match_operand:SI 8 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) + (match_operand:SI 9 "gpc_reg_operand" "r"))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 8" + "stswi %2,%1,%O0" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always")]) + +(define_insn "*stmsi6" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")) + (match_operand:SI 2 "gpc_reg_operand" "r")) + (clobber (match_scratch:SI 3 "=X")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 4 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 5 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 6 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 7 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) + (match_operand:SI 8 "gpc_reg_operand" "r"))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 7" + "stswi %2,%1,%O0" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always")]) + +(define_insn "*stmsi5" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")) + (match_operand:SI 2 "gpc_reg_operand" "r")) + (clobber (match_scratch:SI 3 "=X")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 4 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 5 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 6 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 7 "gpc_reg_operand" "r"))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 6" + "stswi %2,%1,%O0" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always")]) + +(define_insn "*stmsi4" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")) + (match_operand:SI 2 "gpc_reg_operand" "r")) + (clobber (match_scratch:SI 3 "=X")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 4 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 5 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 6 "gpc_reg_operand" "r"))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 5" + "stswi %2,%1,%O0" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always")]) + +(define_insn "*stmsi3" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")) + (match_operand:SI 2 "gpc_reg_operand" "r")) + (clobber (match_scratch:SI 3 "=X")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 4 "gpc_reg_operand" "r")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 5 "gpc_reg_operand" "r"))])] + "TARGET_STRING && XVECLEN (operands[0], 0) == 4" + "stswi %2,%1,%O0" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always")]) + +(define_expand "setmemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand 2 "const_int_operand" "")) + (use (match_operand:SI 1 "" "")) + (use (match_operand:SI 3 "" ""))])] + "" + " +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (expand_block_clear (operands)) + DONE; + else + FAIL; +}") + +;; String compare N insn. +;; Argument 0 is the target (result) +;; Argument 1 is the destination +;; Argument 2 is the source +;; Argument 3 is the length +;; Argument 4 is the alignment + +(define_expand "cmpstrnsi" + [(parallel [(set (match_operand:SI 0) + (compare:SI (match_operand:BLK 1) + (match_operand:BLK 2))) + (use (match_operand:SI 3)) + (use (match_operand:SI 4))])] + "TARGET_CMPB && (BYTES_BIG_ENDIAN || TARGET_LDBRX)" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + if (expand_strn_compare (operands, 0)) + DONE; + else + FAIL; +}) + +;; String compare insn. +;; Argument 0 is the target (result) +;; Argument 1 is the destination +;; Argument 2 is the source +;; Argument 3 is the alignment + +(define_expand "cmpstrsi" + [(parallel [(set (match_operand:SI 0) + (compare:SI (match_operand:BLK 1) + (match_operand:BLK 2))) + (use (match_operand:SI 3))])] + "TARGET_CMPB && (BYTES_BIG_ENDIAN || TARGET_LDBRX)" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + if (expand_strn_compare (operands, 1)) + DONE; + else + FAIL; +}) + +;; Block compare insn. +;; Argument 0 is the target (result) +;; Argument 1 is the destination +;; Argument 2 is the source +;; Argument 3 is the length +;; Argument 4 is the alignment + +(define_expand "cmpmemsi" + [(parallel [(set (match_operand:SI 0) + (compare:SI (match_operand:BLK 1) + (match_operand:BLK 2))) + (use (match_operand:SI 3)) + (use (match_operand:SI 4))])] + "TARGET_POPCNTD" +{ + if (expand_block_compare (operands)) + DONE; + else + FAIL; +}) + +;; String/block move insn. +;; Argument 0 is the destination +;; Argument 1 is the source +;; Argument 2 is the length +;; Argument 3 is the alignment + +(define_expand "movmemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (use (match_operand:SI 2 "" "")) + (use (match_operand:SI 3 "" ""))])] + "" + " +{ + if (expand_block_move (operands)) + DONE; + else + FAIL; +}") + +;; Move up to 32 bytes at a time. The fixed registers are needed because the +;; register allocator doesn't have a clue about allocating 8 word registers. +;; rD/rS = r5 is preferred, efficient form. +(define_expand "movmemsi_8reg" + [(parallel [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (clobber (reg:SI 5)) + (clobber (reg:SI 6)) + (clobber (reg:SI 7)) + (clobber (reg:SI 8)) + (clobber (reg:SI 9)) + (clobber (reg:SI 10)) + (clobber (reg:SI 11)) + (clobber (reg:SI 12)) + (clobber (match_scratch:SI 4 ""))])] + "TARGET_STRING" + "") + +(define_insn "" + [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b")) + (mem:BLK (match_operand:P 1 "gpc_reg_operand" "b"))) + (use (match_operand:SI 2 "immediate_operand" "i")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r")) + (clobber (reg:SI 6)) + (clobber (reg:SI 7)) + (clobber (reg:SI 8)) + (clobber (reg:SI 9)) + (clobber (reg:SI 10)) + (clobber (reg:SI 11)) + (clobber (reg:SI 12)) + (clobber (match_scratch:SI 5 "=X"))] + "TARGET_STRING + && ((INTVAL (operands[2]) > 24 && INTVAL (operands[2]) < 32) + || INTVAL (operands[2]) == 0) + && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 12) + && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 12) + && REGNO (operands[4]) == 5" + "lswi %4,%1,%2\;stswi %4,%0,%2" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always") + (set_attr "length" "8")]) + +;; Move up to 24 bytes at a time. The fixed registers are needed because the +;; register allocator doesn't have a clue about allocating 6 word registers. +;; rD/rS = r5 is preferred, efficient form. +(define_expand "movmemsi_6reg" + [(parallel [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (clobber (reg:SI 5)) + (clobber (reg:SI 6)) + (clobber (reg:SI 7)) + (clobber (reg:SI 8)) + (clobber (reg:SI 9)) + (clobber (reg:SI 10)) + (clobber (match_scratch:SI 4 ""))])] + "TARGET_STRING" + "") + +(define_insn "" + [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b")) + (mem:BLK (match_operand:P 1 "gpc_reg_operand" "b"))) + (use (match_operand:SI 2 "immediate_operand" "i")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r")) + (clobber (reg:SI 6)) + (clobber (reg:SI 7)) + (clobber (reg:SI 8)) + (clobber (reg:SI 9)) + (clobber (reg:SI 10)) + (clobber (match_scratch:SI 5 "=X"))] + "TARGET_STRING + && INTVAL (operands[2]) > 16 && INTVAL (operands[2]) <= 32 + && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 10) + && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 10) + && REGNO (operands[4]) == 5" + "lswi %4,%1,%2\;stswi %4,%0,%2" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always") + (set_attr "length" "8")]) + +;; Move up to 16 bytes at a time, using 4 fixed registers to avoid spill +;; problems with TImode. +;; rD/rS = r5 is preferred, efficient form. +(define_expand "movmemsi_4reg" + [(parallel [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (clobber (reg:SI 5)) + (clobber (reg:SI 6)) + (clobber (reg:SI 7)) + (clobber (reg:SI 8)) + (clobber (match_scratch:SI 4 ""))])] + "TARGET_STRING" + "") + +(define_insn "" + [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b")) + (mem:BLK (match_operand:P 1 "gpc_reg_operand" "b"))) + (use (match_operand:SI 2 "immediate_operand" "i")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r")) + (clobber (reg:SI 6)) + (clobber (reg:SI 7)) + (clobber (reg:SI 8)) + (clobber (match_scratch:SI 5 "=X"))] + "TARGET_STRING + && INTVAL (operands[2]) > 8 && INTVAL (operands[2]) <= 16 + && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 8) + && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 8) + && REGNO (operands[4]) == 5" + "lswi %4,%1,%2\;stswi %4,%0,%2" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always") + (set_attr "length" "8")]) + +;; Move up to 8 bytes at a time. +(define_expand "movmemsi_2reg" + [(parallel [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (clobber (match_scratch:DI 4 "")) + (clobber (match_scratch:SI 5 ""))])] + "TARGET_STRING && ! TARGET_POWERPC64" + "") + +(define_insn "" + [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b")) + (mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b"))) + (use (match_operand:SI 2 "immediate_operand" "i")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_scratch:DI 4 "=&r")) + (clobber (match_scratch:SI 5 "=X"))] + "TARGET_STRING && ! TARGET_POWERPC64 + && INTVAL (operands[2]) > 4 && INTVAL (operands[2]) <= 8" + "lswi %4,%1,%2\;stswi %4,%0,%2" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always") + (set_attr "length" "8")]) + +;; Move up to 4 bytes at a time. +(define_expand "movmemsi_1reg" + [(parallel [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (clobber (match_scratch:SI 4 "")) + (clobber (match_scratch:SI 5 ""))])] + "TARGET_STRING" + "") + +(define_insn "" + [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b")) + (mem:BLK (match_operand:P 1 "gpc_reg_operand" "b"))) + (use (match_operand:SI 2 "immediate_operand" "i")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (match_scratch:SI 5 "=X"))] + "TARGET_STRING && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 4" + "lswi %4,%1,%2\;stswi %4,%0,%2" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always") + (set_attr "length" "8")]) + +;; Define insns that do load or store with update. Some of these we can +;; get by using pre-decrement or pre-increment, but the hardware can also +;; do cases where the increment is not the size of the object. +;; +;; In all these cases, we use operands 0 and 1 for the register being +;; incremented because those are the operands that local-alloc will +;; tie and these are the pair most likely to be tieable (and the ones +;; that will benefit the most). + +(define_insn "*movdi_update1" + [(set (match_operand:DI 3 "gpc_reg_operand" "=r,r") + (mem:DI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0,0") + (match_operand:DI 2 "reg_or_aligned_short_operand" "r,I")))) + (set (match_operand:DI 0 "gpc_reg_operand" "=b,b") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_POWERPC64 && TARGET_UPDATE + && (!avoiding_indexed_address_p (DImode) + || !gpc_reg_operand (operands[2], DImode))" + "@ + ldux %3,%0,%2 + ldu %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "movdi_<mode>_update" + [(set (mem:DI (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0") + (match_operand:P 2 "reg_or_aligned_short_operand" "r,I"))) + (match_operand:DI 3 "gpc_reg_operand" "r,r")) + (set (match_operand:P 0 "gpc_reg_operand" "=b,b") + (plus:P (match_dup 1) (match_dup 2)))] + "TARGET_POWERPC64 && TARGET_UPDATE + && (!avoiding_indexed_address_p (Pmode) + || !gpc_reg_operand (operands[2], Pmode) + || (REG_P (operands[0]) + && REGNO (operands[0]) == STACK_POINTER_REGNUM))" + "@ + stdux %3,%0,%2 + stdu %3,%2(%0)" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +;; This pattern is only conditional on TARGET_POWERPC64, as it is +;; needed for stack allocation, even if the user passes -mno-update. +(define_insn "movdi_<mode>_update_stack" + [(set (mem:DI (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0") + (match_operand:P 2 "reg_or_aligned_short_operand" "r,I"))) + (match_operand:DI 3 "gpc_reg_operand" "r,r")) + (set (match_operand:P 0 "gpc_reg_operand" "=b,b") + (plus:P (match_dup 1) (match_dup 2)))] + "TARGET_POWERPC64" + "@ + stdux %3,%0,%2 + stdu %3,%2(%0)" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movsi_update1" + [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") + (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I")))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lwzux %3,%0,%2 + lwzu %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movsi_update2" + [(set (match_operand:DI 3 "gpc_reg_operand" "=r") + (sign_extend:DI + (mem:SI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0") + (match_operand:DI 2 "gpc_reg_operand" "r"))))) + (set (match_operand:DI 0 "gpc_reg_operand" "=b") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_POWERPC64 && rs6000_gen_cell_microcode + && !avoiding_indexed_address_p (DImode)" + "lwaux %3,%0,%2" + [(set_attr "type" "load") + (set_attr "sign_extend" "yes") + (set_attr "update" "yes") + (set_attr "indexed" "yes")]) + +(define_insn "movsi_update" + [(set (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))) + (match_operand:SI 3 "gpc_reg_operand" "r,r")) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode) + || (REG_P (operands[0]) + && REGNO (operands[0]) == STACK_POINTER_REGNUM))" + "@ + stwux %3,%0,%2 + stwu %3,%2(%0)" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +;; This is an unconditional pattern; needed for stack allocation, even +;; if the user passes -mno-update. +(define_insn "movsi_update_stack" + [(set (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))) + (match_operand:SI 3 "gpc_reg_operand" "r,r")) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "@ + stwux %3,%0,%2 + stwu %3,%2(%0)" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movhi_update1" + [(set (match_operand:HI 3 "gpc_reg_operand" "=r,r") + (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I")))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lhzux %3,%0,%2 + lhzu %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movhi_update2" + [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") + (zero_extend:SI + (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lhzux %3,%0,%2 + lhzu %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movhi_update3" + [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") + (sign_extend:SI + (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE && rs6000_gen_cell_microcode + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lhaux %3,%0,%2 + lhau %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "sign_extend" "yes") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movhi_update4" + [(set (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))) + (match_operand:HI 3 "gpc_reg_operand" "r,r")) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + sthux %3,%0,%2 + sthu %3,%2(%0)" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movqi_update1" + [(set (match_operand:QI 3 "gpc_reg_operand" "=r,r") + (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I")))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lbzux %3,%0,%2 + lbzu %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movqi_update2" + [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") + (zero_extend:SI + (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lbzux %3,%0,%2 + lbzu %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movqi_update3" + [(set (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))) + (match_operand:QI 3 "gpc_reg_operand" "r,r")) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + stbux %3,%0,%2 + stbu %3,%2(%0)" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movsf_update1" + [(set (match_operand:SF 3 "gpc_reg_operand" "=f,f") + (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I")))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lfsux %3,%0,%2 + lfsu %3,%2(%0)" + [(set_attr "type" "fpload") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movsf_update2" + [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))) + (match_operand:SF 3 "gpc_reg_operand" "f,f")) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + stfsux %3,%0,%2 + stfsu %3,%2(%0)" + [(set_attr "type" "fpstore") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movsf_update3" + [(set (match_operand:SF 3 "gpc_reg_operand" "=r,r") + (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I")))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "(TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lwzux %3,%0,%2 + lwzu %3,%2(%0)" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movsf_update4" + [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))) + (match_operand:SF 3 "gpc_reg_operand" "r,r")) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "(TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + stwux %3,%0,%2 + stwu %3,%2(%0)" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + +(define_insn "*movdf_update1" + [(set (match_operand:DF 3 "gpc_reg_operand" "=d,d") + (mem:DF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I")))) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + lfdux %3,%0,%2 + lfdu %3,%2(%0)" + [(set_attr "type" "fpload") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no") + (set_attr "size" "64")]) + +(define_insn "*movdf_update2" + [(set (mem:DF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") + (match_operand:SI 2 "reg_or_short_operand" "r,I"))) + (match_operand:DF 3 "gpc_reg_operand" "d,d")) + (set (match_operand:SI 0 "gpc_reg_operand" "=b,b") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_UPDATE + && (!avoiding_indexed_address_p (SImode) + || !gpc_reg_operand (operands[2], SImode))" + "@ + stfdux %3,%0,%2 + stfdu %3,%2(%0)" + [(set_attr "type" "fpstore") + (set_attr "update" "yes") + (set_attr "indexed" "yes,no")]) + + +;; After inserting conditional returns we can sometimes have +;; unnecessary register moves. Unfortunately we cannot have a +;; modeless peephole here, because some single SImode sets have early +;; clobber outputs. Although those sets expand to multi-ppc-insn +;; sequences, using get_attr_length here will smash the operands +;; array. Neither is there an early_cobbler_p predicate. +;; Disallow subregs for E500 so we don't munge frob_di_df_2. +;; Also this optimization interferes with scalars going into +;; altivec registers (the code does reloading through the FPRs). +(define_peephole2 + [(set (match_operand:DF 0 "gpc_reg_operand" "") + (match_operand:DF 1 "any_operand" "")) + (set (match_operand:DF 2 "gpc_reg_operand" "") + (match_dup 0))] + "!(TARGET_E500_DOUBLE && GET_CODE (operands[2]) == SUBREG) + && !TARGET_UPPER_REGS_DF + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) (match_dup 1))]) + +(define_peephole2 + [(set (match_operand:SF 0 "gpc_reg_operand" "") + (match_operand:SF 1 "any_operand" "")) + (set (match_operand:SF 2 "gpc_reg_operand" "") + (match_dup 0))] + "!TARGET_UPPER_REGS_SF + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) (match_dup 1))]) + + +;; TLS support. + +;; Mode attributes for different ABIs. +(define_mode_iterator TLSmode [(SI "! TARGET_64BIT") (DI "TARGET_64BIT")]) +(define_mode_attr tls_abi_suffix [(SI "32") (DI "64")]) +(define_mode_attr tls_sysv_suffix [(SI "si") (DI "di")]) +(define_mode_attr tls_insn_suffix [(SI "wz") (DI "d")]) + +(define_insn_and_split "tls_gd_aix<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 3 "symbol_ref_operand" "s")) + (match_operand 4 "" "g"))) + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)" +{ + if (TARGET_CMODEL != CMODEL_SMALL) + return "addis %0,%1,%2@got@tlsgd@ha\;addi %0,%0,%2@got@tlsgd@l\;" + "bl %z3\;nop"; + else + return "addi %0,%1,%2@got@tlsgd\;bl %z3\;nop"; +} + "&& TARGET_TLS_MARKERS" + [(set (match_dup 0) + (unspec:TLSmode [(match_dup 1) + (match_dup 2)] + UNSPEC_TLSGD)) + (parallel [(set (match_dup 0) + (call (mem:TLSmode (match_dup 3)) + (match_dup 4))) + (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD) + (clobber (reg:SI LR_REGNO))])] + "" + [(set_attr "type" "two") + (set (attr "length") + (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL")) + (const_int 16) + (const_int 12)))]) + +(define_insn_and_split "tls_gd_sysv<TLSmode:tls_sysv_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 3 "symbol_ref_operand" "s")) + (match_operand 4 "" "g"))) + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4" +{ + if (flag_pic) + { + if (TARGET_SECURE_PLT && flag_pic == 2) + return "addi %0,%1,%2@got@tlsgd\;bl %z3+32768@plt"; + else + return "addi %0,%1,%2@got@tlsgd\;bl %z3@plt"; + } + else + return "addi %0,%1,%2@got@tlsgd\;bl %z3"; +} + "&& TARGET_TLS_MARKERS" + [(set (match_dup 0) + (unspec:TLSmode [(match_dup 1) + (match_dup 2)] + UNSPEC_TLSGD)) + (parallel [(set (match_dup 0) + (call (mem:TLSmode (match_dup 3)) + (match_dup 4))) + (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD) + (clobber (reg:SI LR_REGNO))])] + "" + [(set_attr "type" "two") + (set_attr "length" "8")]) + +(define_insn_and_split "*tls_gd<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGD))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS" + "addi %0,%1,%2@got@tlsgd" + "&& TARGET_CMODEL != CMODEL_SMALL" + [(set (match_dup 3) + (high:TLSmode + (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGD))) + (set (match_dup 0) + (lo_sum:TLSmode (match_dup 3) + (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGD)))] + " +{ + operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode); +}" + [(set (attr "length") + (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL")) + (const_int 8) + (const_int 4)))]) + +(define_insn "*tls_gd_high<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (high:TLSmode + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGD)))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%1,%2@got@tlsgd@ha" + [(set_attr "length" "4")]) + +(define_insn "*tls_gd_low<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b") + (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGD)))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL" + "addi %0,%1,%2@got@tlsgd@l" + [(set_attr "length" "4")]) + +(define_insn "*tls_gd_call_aix<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s")) + (match_operand 2 "" "g"))) + (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS + && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)" + "bl %z1(%3@tlsgd)\;nop" + [(set_attr "type" "branch") + (set_attr "length" "8")]) + +(define_insn "*tls_gd_call_sysv<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s")) + (match_operand 2 "" "g"))) + (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4 && TARGET_TLS_MARKERS" +{ + if (flag_pic) + { + if (TARGET_SECURE_PLT && flag_pic == 2) + return "bl %z1+32768(%3@tlsgd)@plt"; + return "bl %z1(%3@tlsgd)@plt"; + } + return "bl %z1(%3@tlsgd)"; +} + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn_and_split "tls_ld_aix<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 2 "symbol_ref_operand" "s")) + (match_operand 3 "" "g"))) + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")] + UNSPEC_TLSLD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)" +{ + if (TARGET_CMODEL != CMODEL_SMALL) + return "addis %0,%1,%&@got@tlsld@ha\;addi %0,%0,%&@got@tlsld@l\;" + "bl %z2\;nop"; + else + return "addi %0,%1,%&@got@tlsld\;bl %z2\;nop"; +} + "&& TARGET_TLS_MARKERS" + [(set (match_dup 0) + (unspec:TLSmode [(match_dup 1)] + UNSPEC_TLSLD)) + (parallel [(set (match_dup 0) + (call (mem:TLSmode (match_dup 2)) + (match_dup 3))) + (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD) + (clobber (reg:SI LR_REGNO))])] + "" + [(set_attr "type" "two") + (set (attr "length") + (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL")) + (const_int 16) + (const_int 12)))]) + +(define_insn_and_split "tls_ld_sysv<TLSmode:tls_sysv_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 2 "symbol_ref_operand" "s")) + (match_operand 3 "" "g"))) + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")] + UNSPEC_TLSLD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4" +{ + if (flag_pic) + { + if (TARGET_SECURE_PLT && flag_pic == 2) + return "addi %0,%1,%&@got@tlsld\;bl %z2+32768@plt"; + else + return "addi %0,%1,%&@got@tlsld\;bl %z2@plt"; + } + else + return "addi %0,%1,%&@got@tlsld\;bl %z2"; +} + "&& TARGET_TLS_MARKERS" + [(set (match_dup 0) + (unspec:TLSmode [(match_dup 1)] + UNSPEC_TLSLD)) + (parallel [(set (match_dup 0) + (call (mem:TLSmode (match_dup 2)) + (match_dup 3))) + (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD) + (clobber (reg:SI LR_REGNO))])] + "" + [(set_attr "length" "8")]) + +(define_insn_and_split "*tls_ld<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")] + UNSPEC_TLSLD))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS" + "addi %0,%1,%&@got@tlsld" + "&& TARGET_CMODEL != CMODEL_SMALL" + [(set (match_dup 2) + (high:TLSmode + (unspec:TLSmode [(const_int 0) (match_dup 1)] UNSPEC_TLSLD))) + (set (match_dup 0) + (lo_sum:TLSmode (match_dup 2) + (unspec:TLSmode [(const_int 0) (match_dup 1)] UNSPEC_TLSLD)))] + " +{ + operands[2] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode); +}" + [(set (attr "length") + (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL")) + (const_int 8) + (const_int 4)))]) + +(define_insn "*tls_ld_high<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (high:TLSmode + (unspec:TLSmode [(const_int 0) + (match_operand:TLSmode 1 "gpc_reg_operand" "b")] + UNSPEC_TLSLD)))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%1,%&@got@tlsld@ha" + [(set_attr "length" "4")]) + +(define_insn "*tls_ld_low<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b") + (unspec:TLSmode [(const_int 0) + (match_operand:TLSmode 2 "gpc_reg_operand" "b")] + UNSPEC_TLSLD)))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL" + "addi %0,%1,%&@got@tlsld@l" + [(set_attr "length" "4")]) + +(define_insn "*tls_ld_call_aix<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s")) + (match_operand 2 "" "g"))) + (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && TARGET_TLS_MARKERS + && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)" + "bl %z1(%&@tlsld)\;nop" + [(set_attr "type" "branch") + (set_attr "length" "8")]) + +(define_insn "*tls_ld_call_sysv<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s")) + (match_operand 2 "" "g"))) + (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD) + (clobber (reg:SI LR_REGNO))] + "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4 && TARGET_TLS_MARKERS" +{ + if (flag_pic) + { + if (TARGET_SECURE_PLT && flag_pic == 2) + return "bl %z1+32768(%&@tlsld)@plt"; + return "bl %z1(%&@tlsld)@plt"; + } + return "bl %z1(%&@tlsld)"; +} + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "tls_dtprel_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSDTPREL))] + "HAVE_AS_TLS" + "addi %0,%1,%2@dtprel") + +(define_insn "tls_dtprel_ha_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSDTPRELHA))] + "HAVE_AS_TLS" + "addis %0,%1,%2@dtprel@ha") + +(define_insn "tls_dtprel_lo_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSDTPRELLO))] + "HAVE_AS_TLS" + "addi %0,%1,%2@dtprel@l") + +(define_insn_and_split "tls_got_dtprel_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGOTDTPREL))] + "HAVE_AS_TLS" + "l<TLSmode:tls_insn_suffix> %0,%2@got@dtprel(%1)" + "&& TARGET_CMODEL != CMODEL_SMALL" + [(set (match_dup 3) + (high:TLSmode + (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTDTPREL))) + (set (match_dup 0) + (lo_sum:TLSmode (match_dup 3) + (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTDTPREL)))] + " +{ + operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode); +}" + [(set (attr "length") + (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL")) + (const_int 8) + (const_int 4)))]) + +(define_insn "*tls_got_dtprel_high<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (high:TLSmode + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGOTDTPREL)))] + "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%1,%2@got@dtprel@ha" + [(set_attr "length" "4")]) + +(define_insn "*tls_got_dtprel_low<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b") + (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGOTDTPREL)))] + "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL" + "l<TLSmode:tls_insn_suffix> %0,%2@got@dtprel@l(%1)" + [(set_attr "length" "4")]) + +(define_insn "tls_tprel_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSTPREL))] + "HAVE_AS_TLS" + "addi %0,%1,%2@tprel") + +(define_insn "tls_tprel_ha_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSTPRELHA))] + "HAVE_AS_TLS" + "addis %0,%1,%2@tprel@ha") + +(define_insn "tls_tprel_lo_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSTPRELLO))] + "HAVE_AS_TLS" + "addi %0,%1,%2@tprel@l") + +;; "b" output constraint here and on tls_tls input to support linker tls +;; optimization. The linker may edit the instructions emitted by a +;; tls_got_tprel/tls_tls pair to addis,addi. +(define_insn_and_split "tls_got_tprel_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGOTTPREL))] + "HAVE_AS_TLS" + "l<TLSmode:tls_insn_suffix> %0,%2@got@tprel(%1)" + "&& TARGET_CMODEL != CMODEL_SMALL" + [(set (match_dup 3) + (high:TLSmode + (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTTPREL))) + (set (match_dup 0) + (lo_sum:TLSmode (match_dup 3) + (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTTPREL)))] + " +{ + operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode); +}" + [(set (attr "length") + (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL")) + (const_int 8) + (const_int 4)))]) + +(define_insn "*tls_got_tprel_high<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b") + (high:TLSmode + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGOTTPREL)))] + "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%1,%2@got@tprel@ha" + [(set_attr "length" "4")]) + +(define_insn "*tls_got_tprel_low<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b") + (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSGOTTPREL)))] + "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL" + "l<TLSmode:tls_insn_suffix> %0,%2@got@tprel@l(%1)" + [(set_attr "length" "4")]) + +(define_insn "tls_tls_<TLSmode:tls_abi_suffix>" + [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r") + (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b") + (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")] + UNSPEC_TLSTLS))] + "TARGET_ELF && HAVE_AS_TLS" + "add %0,%1,%2@tls") + +(define_expand "tls_get_tpointer" + [(set (match_operand:SI 0 "gpc_reg_operand" "") + (unspec:SI [(const_int 0)] UNSPEC_TLSTLS))] + "TARGET_XCOFF && HAVE_AS_TLS" + " +{ + emit_insn (gen_tls_get_tpointer_internal ()); + emit_move_insn (operands[0], gen_rtx_REG (SImode, 3)); + DONE; +}") + +(define_insn "tls_get_tpointer_internal" + [(set (reg:SI 3) + (unspec:SI [(const_int 0)] UNSPEC_TLSTLS)) + (clobber (reg:SI LR_REGNO))] + "TARGET_XCOFF && HAVE_AS_TLS" + "bla __get_tpointer") + +(define_expand "tls_get_addr<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "") + (unspec:P [(match_operand:P 1 "gpc_reg_operand" "") + (match_operand:P 2 "gpc_reg_operand" "")] UNSPEC_TLSTLS))] + "TARGET_XCOFF && HAVE_AS_TLS" + " +{ + emit_move_insn (gen_rtx_REG (Pmode, 3), operands[1]); + emit_move_insn (gen_rtx_REG (Pmode, 4), operands[2]); + emit_insn (gen_tls_get_addr_internal<mode> ()); + emit_move_insn (operands[0], gen_rtx_REG (Pmode, 3)); + DONE; +}") + +(define_insn "tls_get_addr_internal<mode>" + [(set (reg:P 3) + (unspec:P [(reg:P 3) (reg:P 4)] UNSPEC_TLSTLS)) + (clobber (reg:P 0)) + (clobber (reg:P 4)) + (clobber (reg:P 5)) + (clobber (reg:P 11)) + (clobber (reg:CC CR0_REGNO)) + (clobber (reg:P LR_REGNO))] + "TARGET_XCOFF && HAVE_AS_TLS" + "bla __tls_get_addr") + +;; Next come insns related to the calling sequence. +;; +;; First, an insn to allocate new stack space for dynamic use (e.g., alloca). +;; We move the back-chain and decrement the stack pointer. + +(define_expand "allocate_stack" + [(set (match_operand 0 "gpc_reg_operand" "") + (minus (reg 1) (match_operand 1 "reg_or_short_operand" ""))) + (set (reg 1) + (minus (reg 1) (match_dup 1)))] + "" + " +{ rtx chain = gen_reg_rtx (Pmode); + rtx stack_bot = gen_rtx_MEM (Pmode, stack_pointer_rtx); + rtx neg_op0; + rtx insn, par, set, mem; + + emit_move_insn (chain, stack_bot); + + /* Check stack bounds if necessary. */ + if (crtl->limit_stack) + { + rtx available; + available = expand_binop (Pmode, sub_optab, + stack_pointer_rtx, stack_limit_rtx, + NULL_RTX, 1, OPTAB_WIDEN); + emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx)); + } + + if (GET_CODE (operands[1]) != CONST_INT + || INTVAL (operands[1]) < -32767 + || INTVAL (operands[1]) > 32768) + { + neg_op0 = gen_reg_rtx (Pmode); + if (TARGET_32BIT) + emit_insn (gen_negsi2 (neg_op0, operands[1])); + else + emit_insn (gen_negdi2 (neg_op0, operands[1])); + } + else + neg_op0 = GEN_INT (- INTVAL (operands[1])); + + insn = emit_insn ((* ((TARGET_32BIT) ? gen_movsi_update_stack + : gen_movdi_di_update_stack)) + (stack_pointer_rtx, stack_pointer_rtx, neg_op0, + chain)); + /* Since we didn't use gen_frame_mem to generate the MEM, grab + it now and set the alias set/attributes. The above gen_*_update + calls will generate a PARALLEL with the MEM set being the first + operation. */ + par = PATTERN (insn); + gcc_assert (GET_CODE (par) == PARALLEL); + set = XVECEXP (par, 0, 0); + gcc_assert (GET_CODE (set) == SET); + mem = SET_DEST (set); + gcc_assert (MEM_P (mem)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, get_frame_alias_set ()); + + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; +}") + +;; These patterns say how to save and restore the stack pointer. We need not +;; save the stack pointer at function level since we are careful to +;; preserve the backchain. At block level, we have to restore the backchain +;; when we restore the stack pointer. +;; +;; For nonlocal gotos, we must save both the stack pointer and its +;; backchain and restore both. Note that in the nonlocal case, the +;; save area is a memory location. + +(define_expand "save_stack_function" + [(match_operand 0 "any_operand" "") + (match_operand 1 "any_operand" "")] + "" + "DONE;") + +(define_expand "restore_stack_function" + [(match_operand 0 "any_operand" "") + (match_operand 1 "any_operand" "")] + "" + "DONE;") + +;; Adjust stack pointer (op0) to a new value (op1). +;; First copy old stack backchain to new location, and ensure that the +;; scheduler won't reorder the sp assignment before the backchain write. +(define_expand "restore_stack_block" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 2)) + (match_dup 5) + (set (match_operand 0 "register_operand" "") + (match_operand 1 "register_operand" ""))] + "" + " +{ + rtvec p; + + operands[1] = force_reg (Pmode, operands[1]); + operands[2] = gen_reg_rtx (Pmode); + operands[3] = gen_frame_mem (Pmode, operands[0]); + operands[4] = gen_frame_mem (Pmode, operands[1]); + p = rtvec_alloc (1); + RTVEC_ELT (p, 0) = gen_rtx_SET (gen_frame_mem (BLKmode, operands[0]), + const0_rtx); + operands[5] = gen_rtx_PARALLEL (VOIDmode, p); +}") + +(define_expand "save_stack_nonlocal" + [(set (match_dup 3) (match_dup 4)) + (set (match_operand 0 "memory_operand" "") (match_dup 3)) + (set (match_dup 2) (match_operand 1 "register_operand" ""))] + "" + " +{ + int units_per_word = (TARGET_32BIT) ? 4 : 8; + + /* Copy the backchain to the first word, sp to the second. */ + operands[0] = adjust_address_nv (operands[0], Pmode, 0); + operands[2] = adjust_address_nv (operands[0], Pmode, units_per_word); + operands[3] = gen_reg_rtx (Pmode); + operands[4] = gen_frame_mem (Pmode, operands[1]); +}") + +(define_expand "restore_stack_nonlocal" + [(set (match_dup 2) (match_operand 1 "memory_operand" "")) + (set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 2)) + (match_dup 6) + (set (match_operand 0 "register_operand" "") (match_dup 3))] + "" + " +{ + int units_per_word = (TARGET_32BIT) ? 4 : 8; + rtvec p; + + /* Restore the backchain from the first word, sp from the second. */ + operands[2] = gen_reg_rtx (Pmode); + operands[3] = gen_reg_rtx (Pmode); + operands[1] = adjust_address_nv (operands[1], Pmode, 0); + operands[4] = adjust_address_nv (operands[1], Pmode, units_per_word); + operands[5] = gen_frame_mem (Pmode, operands[3]); + p = rtvec_alloc (1); + RTVEC_ELT (p, 0) = gen_rtx_SET (gen_frame_mem (BLKmode, operands[0]), + const0_rtx); + operands[6] = gen_rtx_PARALLEL (VOIDmode, p); +}") + +;; TOC register handling. + +;; Code to initialize the TOC register... + +(define_insn "load_toc_aix_si" + [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TOC)) + (use (reg:SI 2))])] + "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_32BIT" + "* +{ + char buf[30]; + extern int need_toc_init; + need_toc_init = 1; + ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\", 1); + operands[1] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); + operands[2] = gen_rtx_REG (Pmode, 2); + return \"lwz %0,%1(%2)\"; +}" + [(set_attr "type" "load") + (set_attr "update" "no") + (set_attr "indexed" "no")]) + +(define_insn "load_toc_aix_di" + [(parallel [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_TOC)) + (use (reg:DI 2))])] + "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_64BIT" + "* +{ + char buf[30]; + extern int need_toc_init; + need_toc_init = 1; + ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\", + !TARGET_ELF || !TARGET_MINIMAL_TOC); + if (TARGET_ELF) + strcat (buf, \"@toc\"); + operands[1] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); + operands[2] = gen_rtx_REG (Pmode, 2); + return \"ld %0,%1(%2)\"; +}" + [(set_attr "type" "load") + (set_attr "update" "no") + (set_attr "indexed" "no")]) + +(define_insn "load_toc_v4_pic_si" + [(set (reg:SI LR_REGNO) + (unspec:SI [(const_int 0)] UNSPEC_TOC))] + "DEFAULT_ABI == ABI_V4 && flag_pic == 1 && TARGET_32BIT" + "bl _GLOBAL_OFFSET_TABLE_@local-4" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_expand "load_toc_v4_PIC_1" + [(parallel [(set (reg:SI LR_REGNO) + (match_operand:SI 0 "immediate_operand" "s")) + (use (unspec [(match_dup 0)] UNSPEC_TOC))])] + "TARGET_ELF && DEFAULT_ABI == ABI_V4 + && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))" + "") + +(define_insn "load_toc_v4_PIC_1_normal" + [(set (reg:SI LR_REGNO) + (match_operand:SI 0 "immediate_operand" "s")) + (use (unspec [(match_dup 0)] UNSPEC_TOC))] + "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 + && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))" + "bcl 20,31,%0\\n%0:" + [(set_attr "type" "branch") + (set_attr "length" "4") + (set_attr "cannot_copy" "yes")]) + +(define_insn "load_toc_v4_PIC_1_476" + [(set (reg:SI LR_REGNO) + (match_operand:SI 0 "immediate_operand" "s")) + (use (unspec [(match_dup 0)] UNSPEC_TOC))] + "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 + && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))" + "* +{ + char name[32]; + static char templ[32]; + + get_ppc476_thunk_name (name); + sprintf (templ, \"bl %s\\n%%0:\", name); + return templ; +}" + [(set_attr "type" "branch") + (set_attr "length" "4") + (set_attr "cannot_copy" "yes")]) + +(define_expand "load_toc_v4_PIC_1b" + [(parallel [(set (reg:SI LR_REGNO) + (unspec:SI [(match_operand:SI 0 "immediate_operand" "s") + (label_ref (match_operand 1 "" ""))] + UNSPEC_TOCPTR)) + (match_dup 1)])] + "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2" + "") + +(define_insn "load_toc_v4_PIC_1b_normal" + [(set (reg:SI LR_REGNO) + (unspec:SI [(match_operand:SI 0 "immediate_operand" "s") + (label_ref (match_operand 1 "" ""))] + UNSPEC_TOCPTR)) + (match_dup 1)] + "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2" + "bcl 20,31,$+8\;.long %0-$" + [(set_attr "type" "branch") + (set_attr "length" "8")]) + +(define_insn "load_toc_v4_PIC_1b_476" + [(set (reg:SI LR_REGNO) + (unspec:SI [(match_operand:SI 0 "immediate_operand" "s") + (label_ref (match_operand 1 "" ""))] + UNSPEC_TOCPTR)) + (match_dup 1)] + "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2" + "* +{ + char name[32]; + static char templ[32]; + + get_ppc476_thunk_name (name); + sprintf (templ, \"bl %s\\n\\tb $+8\\n\\t.long %%0-$\", name); + return templ; +}" + [(set_attr "type" "branch") + (set_attr "length" "16")]) + +(define_insn "load_toc_v4_PIC_2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (minus:SI (match_operand:SI 2 "immediate_operand" "s") + (match_operand:SI 3 "immediate_operand" "s")))))] + "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2" + "lwz %0,%2-%3(%1)" + [(set_attr "type" "load")]) + +(define_insn "load_toc_v4_PIC_3b" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (high:SI + (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s") + (match_operand:SI 3 "symbol_ref_operand" "s")))))] + "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic" + "addis %0,%1,%2-%3@ha") + +(define_insn "load_toc_v4_PIC_3c" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s") + (match_operand:SI 3 "symbol_ref_operand" "s"))))] + "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic" + "addi %0,%1,%2-%3@l") + +;; If the TOC is shared over a translation unit, as happens with all +;; the kinds of PIC that we support, we need to restore the TOC +;; pointer only when jumping over units of translation. +;; On Darwin, we need to reload the picbase. + +(define_expand "builtin_setjmp_receiver" + [(use (label_ref (match_operand 0 "" "")))] + "(DEFAULT_ABI == ABI_V4 && flag_pic == 1) + || (TARGET_TOC && TARGET_MINIMAL_TOC) + || (DEFAULT_ABI == ABI_DARWIN && flag_pic)" + " +{ +#if TARGET_MACHO + if (DEFAULT_ABI == ABI_DARWIN) + { + rtx picrtx = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME); + rtx picreg = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); + rtx tmplabrtx; + char tmplab[20]; + + crtl->uses_pic_offset_table = 1; + ASM_GENERATE_INTERNAL_LABEL(tmplab, \"LSJR\", + CODE_LABEL_NUMBER (operands[0])); + tmplabrtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tmplab)); + + emit_insn (gen_load_macho_picbase (tmplabrtx)); + emit_move_insn (picreg, gen_rtx_REG (Pmode, LR_REGNO)); + emit_insn (gen_macho_correct_pic (picreg, picreg, picrtx, tmplabrtx)); + } + else +#endif + rs6000_emit_load_toc_table (FALSE); + DONE; +}") + +;; Largetoc support +(define_insn "*largetoc_high" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b*r") + (high:DI + (unspec [(match_operand:DI 1 "" "") + (match_operand:DI 2 "gpc_reg_operand" "b")] + UNSPEC_TOCREL)))] + "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%2,%1@toc@ha") + +(define_insn "*largetoc_high_aix<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=b*r") + (high:P + (unspec [(match_operand:P 1 "" "") + (match_operand:P 2 "gpc_reg_operand" "b")] + UNSPEC_TOCREL)))] + "TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%1@u(%2)") + +(define_insn "*largetoc_high_plus" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b*r") + (high:DI + (plus:DI + (unspec [(match_operand:DI 1 "" "") + (match_operand:DI 2 "gpc_reg_operand" "b")] + UNSPEC_TOCREL) + (match_operand:DI 3 "add_cint_operand" "n"))))] + "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%2,%1+%3@toc@ha") + +(define_insn "*largetoc_high_plus_aix<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=b*r") + (high:P + (plus:P + (unspec [(match_operand:P 1 "" "") + (match_operand:P 2 "gpc_reg_operand" "b")] + UNSPEC_TOCREL) + (match_operand:P 3 "add_cint_operand" "n"))))] + "TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL" + "addis %0,%1+%3@u(%2)") + +(define_insn "*largetoc_low" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b") + (match_operand:DI 2 "" "")))] + "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL" + "addi %0,%1,%2@l") + +(define_insn "*largetoc_low_aix<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (lo_sum:P (match_operand:P 1 "gpc_reg_operand" "b") + (match_operand:P 2 "" "")))] + "TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL" + "la %0,%2@l(%1)") + +(define_insn_and_split "*tocref<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=b") + (match_operand:P 1 "small_toc_ref" "R"))] + "TARGET_TOC" + "la %0,%a1" + "&& TARGET_CMODEL != CMODEL_SMALL && reload_completed" + [(set (match_dup 0) (high:P (match_dup 1))) + (set (match_dup 0) (lo_sum:P (match_dup 0) (match_dup 1)))]) + +;; Elf specific ways of loading addresses for non-PIC code. +;; The output of this could be r0, but we make a very strong +;; preference for a base register because it will usually +;; be needed there. +(define_insn "elf_high" + [(set (match_operand:SI 0 "gpc_reg_operand" "=b*r") + (high:SI (match_operand 1 "" "")))] + "TARGET_ELF && !TARGET_64BIT && !flag_pic" + "lis %0,%1@ha") + +(define_insn "elf_low" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" "")))] + "TARGET_ELF && !TARGET_64BIT && !flag_pic" + "la %0,%2@l(%1)") + +;; Call and call_value insns +(define_expand "call" + [(parallel [(call (mem:SI (match_operand 0 "address_operand" "")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNO))])] + "" + " +{ +#if TARGET_MACHO + if (MACHOPIC_INDIRECT) + operands[0] = machopic_indirect_call_target (operands[0]); +#endif + + gcc_assert (GET_CODE (operands[0]) == MEM); + gcc_assert (GET_CODE (operands[1]) == CONST_INT); + + operands[0] = XEXP (operands[0], 0); + + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { + rs6000_call_aix (NULL_RTX, operands[0], operands[1], operands[2]); + DONE; + } + + if (GET_CODE (operands[0]) != SYMBOL_REF + || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[2]) & CALL_LONG) != 0)) + { + if (INTVAL (operands[2]) & CALL_LONG) + operands[0] = rs6000_longcall_ref (operands[0]); + + switch (DEFAULT_ABI) + { + case ABI_V4: + case ABI_DARWIN: + operands[0] = force_reg (Pmode, operands[0]); + break; + + default: + gcc_unreachable (); + } + } +}") + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "address_operand" "")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNO))])] + "" + " +{ +#if TARGET_MACHO + if (MACHOPIC_INDIRECT) + operands[1] = machopic_indirect_call_target (operands[1]); +#endif + + gcc_assert (GET_CODE (operands[1]) == MEM); + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + + operands[1] = XEXP (operands[1], 0); + + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { + rs6000_call_aix (operands[0], operands[1], operands[2], operands[3]); + DONE; + } + + if (GET_CODE (operands[1]) != SYMBOL_REF + || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[3]) & CALL_LONG) != 0)) + { + if (INTVAL (operands[3]) & CALL_LONG) + operands[1] = rs6000_longcall_ref (operands[1]); + + switch (DEFAULT_ABI) + { + case ABI_V4: + case ABI_DARWIN: + operands[1] = force_reg (Pmode, operands[1]); + break; + + default: + gcc_unreachable (); + } + } +}") + +;; Call to function in current module. No TOC pointer reload needed. +;; Operand2 is nonzero if we are using the V.4 calling sequence and +;; either the function was not prototyped, or it was prototyped as a +;; variable argument function. It is > 0 if FP registers were passed +;; and < 0 if they were not. + +(define_insn "*call_local32" + [(call (mem:SI (match_operand:SI 0 "current_file_function_operand" "s,s")) + (match_operand 1 "" "g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "(INTVAL (operands[2]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z0@local\" : \"bl %z0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + +(define_insn "*call_local64" + [(call (mem:SI (match_operand:DI 0 "current_file_function_operand" "s,s")) + (match_operand 1 "" "g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "TARGET_64BIT && (INTVAL (operands[2]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z0@local\" : \"bl %z0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + +(define_insn "*call_value_local32" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "current_file_function_operand" "s,s")) + (match_operand 2 "" "g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "(INTVAL (operands[3]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z1@local\" : \"bl %z1\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + + +(define_insn "*call_value_local64" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s")) + (match_operand 2 "" "g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "TARGET_64BIT && (INTVAL (operands[3]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z1@local\" : \"bl %z1\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + + +;; A function pointer under System V is just a normal pointer +;; operands[0] is the function pointer +;; operands[1] is the stack size to clean up +;; operands[2] is the value FUNCTION_ARG returns for the VOID argument +;; which indicates how to set cr1 + +(define_insn "*call_indirect_nonlocal_sysv<mode>" + [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l,c,*l")) + (match_operand 1 "" "g,g,g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,O,n,n")) + (clobber (reg:SI LR_REGNO))] + "DEFAULT_ABI == ABI_V4 + || DEFAULT_ABI == ABI_DARWIN" +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn ("crxor 6,6,6", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn ("creqv 6,6,6", operands); + + return "b%T0l"; +} + [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg") + (set_attr "length" "4,4,8,8")]) + +(define_insn_and_split "*call_nonlocal_sysv<mode>" + [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s")) + (match_operand 1 "" "g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "(DEFAULT_ABI == ABI_DARWIN + || (DEFAULT_ABI == ABI_V4 + && (INTVAL (operands[2]) & CALL_LONG) == 0))" +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn ("crxor 6,6,6", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn ("creqv 6,6,6", operands); + +#if TARGET_MACHO + return output_call(insn, operands, 0, 2); +#else + if (DEFAULT_ABI == ABI_V4 && flag_pic) + { + gcc_assert (!TARGET_SECURE_PLT); + return "bl %z0@plt"; + } + else + return "bl %z0"; +#endif +} + "DEFAULT_ABI == ABI_V4 + && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[0]) + && (INTVAL (operands[2]) & CALL_LONG) == 0" + [(parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (reg:SI LR_REGNO))])] +{ + operands[3] = pic_offset_table_rtx; +} + [(set_attr "type" "branch,branch") + (set_attr "length" "4,8")]) + +(define_insn "*call_nonlocal_sysv_secure<mode>" + [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s")) + (match_operand 1 "" "g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,n")) + (use (match_operand:SI 3 "register_operand" "r,r")) + (clobber (reg:SI LR_REGNO))] + "(DEFAULT_ABI == ABI_V4 + && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[0]) + && (INTVAL (operands[2]) & CALL_LONG) == 0)" +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn ("crxor 6,6,6", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn ("creqv 6,6,6", operands); + + if (flag_pic == 2) + /* The magic 32768 offset here and in the other sysv call insns + corresponds to the offset of r30 in .got2, as given by LCTOC1. + See sysv4.h:toc_section. */ + return "bl %z0+32768@plt"; + else + return "bl %z0@plt"; +} + [(set_attr "type" "branch,branch") + (set_attr "length" "4,8")]) + +(define_insn "*call_value_indirect_nonlocal_sysv<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "register_operand" "c,*l,c,*l")) + (match_operand 2 "" "g,g,g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,O,n,n")) + (clobber (reg:SI LR_REGNO))] + "DEFAULT_ABI == ABI_V4 + || DEFAULT_ABI == ABI_DARWIN" +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn ("crxor 6,6,6", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn ("creqv 6,6,6", operands); + + return "b%T1l"; +} + [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg") + (set_attr "length" "4,4,8,8")]) + +(define_insn_and_split "*call_value_nonlocal_sysv<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s,s")) + (match_operand 2 "" "g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,n")) + (clobber (reg:SI LR_REGNO))] + "(DEFAULT_ABI == ABI_DARWIN + || (DEFAULT_ABI == ABI_V4 + && (INTVAL (operands[3]) & CALL_LONG) == 0))" +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn ("crxor 6,6,6", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn ("creqv 6,6,6", operands); + +#if TARGET_MACHO + return output_call(insn, operands, 1, 3); +#else + if (DEFAULT_ABI == ABI_V4 && flag_pic) + { + gcc_assert (!TARGET_SECURE_PLT); + return "bl %z1@plt"; + } + else + return "bl %z1"; +#endif +} + "DEFAULT_ABI == ABI_V4 + && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[1]) + && (INTVAL (operands[3]) & CALL_LONG) == 0" + [(parallel [(set (match_dup 0) + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (use (match_dup 3)) + (use (match_dup 4)) + (clobber (reg:SI LR_REGNO))])] +{ + operands[4] = pic_offset_table_rtx; +} + [(set_attr "type" "branch,branch") + (set_attr "length" "4,8")]) + +(define_insn "*call_value_nonlocal_sysv_secure<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s,s")) + (match_operand 2 "" "g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,n")) + (use (match_operand:SI 4 "register_operand" "r,r")) + (clobber (reg:SI LR_REGNO))] + "(DEFAULT_ABI == ABI_V4 + && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[1]) + && (INTVAL (operands[3]) & CALL_LONG) == 0)" +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn ("crxor 6,6,6", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn ("creqv 6,6,6", operands); + + if (flag_pic == 2) + return "bl %z1+32768@plt"; + else + return "bl %z1@plt"; +} + [(set_attr "type" "branch,branch") + (set_attr "length" "4,8")]) + + +;; Call to AIX abi function in the same module. + +(define_insn "*call_local_aix<mode>" + [(call (mem:SI (match_operand:P 0 "current_file_function_operand" "s")) + (match_operand 1 "" "g")) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2" + "bl %z0" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*call_value_local_aix<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "current_file_function_operand" "s")) + (match_operand 2 "" "g"))) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2" + "bl %z1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +;; Call to AIX abi function which may be in another module. +;; Restore the TOC pointer (r2) after the call. + +(define_insn "*call_nonlocal_aix<mode>" + [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s")) + (match_operand 1 "" "g")) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2" + "bl %z0\;nop" + [(set_attr "type" "branch") + (set_attr "length" "8")]) + +(define_insn "*call_value_nonlocal_aix<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s")) + (match_operand 2 "" "g"))) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2" + "bl %z1\;nop" + [(set_attr "type" "branch") + (set_attr "length" "8")]) + +;; Call to indirect functions with the AIX abi using a 3 word descriptor. +;; Operand0 is the addresss of the function to call +;; Operand2 is the location in the function descriptor to load r2 from +;; Operand3 is the offset of the stack location holding the current TOC pointer + +(define_insn "*call_indirect_aix<mode>" + [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l")) + (match_operand 1 "" "g,g")) + (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>")) + (set (reg:P TOC_REGNUM) (unspec [(match_operand:P 3 "const_int_operand" "n,n")] UNSPEC_TOCSLOT)) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_AIX" + "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3(1)" + [(set_attr "type" "jmpreg") + (set_attr "length" "12")]) + +(define_insn "*call_value_indirect_aix<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "register_operand" "c,*l")) + (match_operand 2 "" "g,g"))) + (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>")) + (set (reg:P TOC_REGNUM) (unspec [(match_operand:P 4 "const_int_operand" "n,n")] UNSPEC_TOCSLOT)) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_AIX" + "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4(1)" + [(set_attr "type" "jmpreg") + (set_attr "length" "12")]) + +;; Call to indirect functions with the ELFv2 ABI. +;; Operand0 is the addresss of the function to call +;; Operand2 is the offset of the stack location holding the current TOC pointer + +(define_insn "*call_indirect_elfv2<mode>" + [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l")) + (match_operand 1 "" "g,g")) + (set (reg:P TOC_REGNUM) (unspec [(match_operand:P 2 "const_int_operand" "n,n")] UNSPEC_TOCSLOT)) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_ELFv2" + "b%T0l\;<ptrload> 2,%2(1)" + [(set_attr "type" "jmpreg") + (set_attr "length" "8")]) + +(define_insn "*call_value_indirect_elfv2<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "register_operand" "c,*l")) + (match_operand 2 "" "g,g"))) + (set (reg:P TOC_REGNUM) (unspec [(match_operand:P 3 "const_int_operand" "n,n")] UNSPEC_TOCSLOT)) + (clobber (reg:P LR_REGNO))] + "DEFAULT_ABI == ABI_ELFv2" + "b%T1l\;<ptrload> 2,%3(1)" + [(set_attr "type" "jmpreg") + (set_attr "length" "8")]) + + +;; Call subroutine returning any type. +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" + " +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}") + +;; sibling call patterns +(define_expand "sibcall" + [(parallel [(call (mem:SI (match_operand 0 "address_operand" "")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (simple_return)])] + "" + " +{ +#if TARGET_MACHO + if (MACHOPIC_INDIRECT) + operands[0] = machopic_indirect_call_target (operands[0]); +#endif + + gcc_assert (GET_CODE (operands[0]) == MEM); + gcc_assert (GET_CODE (operands[1]) == CONST_INT); + + operands[0] = XEXP (operands[0], 0); + + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { + rs6000_sibcall_aix (NULL_RTX, operands[0], operands[1], operands[2]); + DONE; + } +}") + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "register_operand" "") + (call (mem:SI (match_operand 1 "address_operand" "")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (simple_return)])] + "" + " +{ +#if TARGET_MACHO + if (MACHOPIC_INDIRECT) + operands[1] = machopic_indirect_call_target (operands[1]); +#endif + + gcc_assert (GET_CODE (operands[1]) == MEM); + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + + operands[1] = XEXP (operands[1], 0); + + if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + { + rs6000_sibcall_aix (operands[0], operands[1], operands[2], operands[3]); + DONE; + } +}") + +(define_insn "*sibcall_local32" + [(call (mem:SI (match_operand:SI 0 "current_file_function_operand" "s,s")) + (match_operand 1 "" "g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,n")) + (simple_return)] + "(INTVAL (operands[2]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z0@local\" : \"b %z0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + +(define_insn "*sibcall_local64" + [(call (mem:SI (match_operand:DI 0 "current_file_function_operand" "s,s")) + (match_operand 1 "" "g,g")) + (use (match_operand:SI 2 "immediate_operand" "O,n")) + (simple_return)] + "TARGET_64BIT && (INTVAL (operands[2]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z0@local\" : \"b %z0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + +(define_insn "*sibcall_value_local32" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "current_file_function_operand" "s,s")) + (match_operand 2 "" "g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,n")) + (simple_return)] + "(INTVAL (operands[3]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z1@local\" : \"b %z1\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + +(define_insn "*sibcall_value_local64" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s")) + (match_operand 2 "" "g,g"))) + (use (match_operand:SI 3 "immediate_operand" "O,n")) + (simple_return)] + "TARGET_64BIT && (INTVAL (operands[3]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z1@local\" : \"b %z1\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8")]) + +(define_insn "*sibcall_nonlocal_sysv<mode>" + [(call (mem:SI (match_operand:P 0 "call_operand" "s,s,c,c")) + (match_operand 1 "" "")) + (use (match_operand 2 "immediate_operand" "O,n,O,n")) + (simple_return)] + "(DEFAULT_ABI == ABI_DARWIN + || DEFAULT_ABI == ABI_V4) + && (INTVAL (operands[2]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + if (which_alternative >= 2) + return \"b%T0\"; + else if (DEFAULT_ABI == ABI_V4 && flag_pic) + { + gcc_assert (!TARGET_SECURE_PLT); + return \"b %z0@plt\"; + } + else + return \"b %z0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8,4,8")]) + +(define_insn "*sibcall_value_nonlocal_sysv<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "call_operand" "s,s,c,c")) + (match_operand 2 "" ""))) + (use (match_operand:SI 3 "immediate_operand" "O,n,O,n")) + (simple_return)] + "(DEFAULT_ABI == ABI_DARWIN + || DEFAULT_ABI == ABI_V4) + && (INTVAL (operands[3]) & CALL_LONG) == 0" + "* +{ + if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS) + output_asm_insn (\"crxor 6,6,6\", operands); + + else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS) + output_asm_insn (\"creqv 6,6,6\", operands); + + if (which_alternative >= 2) + return \"b%T1\"; + else if (DEFAULT_ABI == ABI_V4 && flag_pic) + { + gcc_assert (!TARGET_SECURE_PLT); + return \"b %z1@plt\"; + } + else + return \"b %z1\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "4,8,4,8")]) + +;; AIX ABI sibling call patterns. + +(define_insn "*sibcall_aix<mode>" + [(call (mem:SI (match_operand:P 0 "call_operand" "s,c")) + (match_operand 1 "" "g,g")) + (simple_return)] + "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2" + "@ + b %z0 + b%T0" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*sibcall_value_aix<mode>" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:P 1 "call_operand" "s,c")) + (match_operand 2 "" "g,g"))) + (simple_return)] + "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2" + "@ + b %z1 + b%T1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_expand "sibcall_epilogue" + [(use (const_int 0))] + "" +{ + if (!TARGET_SCHED_PROLOG) + emit_insn (gen_blockage ()); + rs6000_emit_epilogue (TRUE); + DONE; +}) + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCK)] + "" + "") + +(define_expand "probe_stack_address" + [(use (match_operand 0 "address_operand"))] + "" +{ + operands[0] = gen_rtx_MEM (Pmode, operands[0]); + MEM_VOLATILE_P (operands[0]) = 1; + + if (TARGET_64BIT) + emit_insn (gen_probe_stack_di (operands[0])); + else + emit_insn (gen_probe_stack_si (operands[0])); + DONE; +}) + +(define_insn "probe_stack_<mode>" + [(set (match_operand:P 0 "memory_operand" "=m") + (unspec:P [(const_int 0)] UNSPEC_PROBE_STACK))] + "" +{ + operands[1] = gen_rtx_REG (Pmode, 0); + return "st<wd>%U0%X0 %1,%0"; +} + [(set_attr "type" "store") + (set (attr "update") + (if_then_else (match_operand 0 "update_address_mem") + (const_string "yes") + (const_string "no"))) + (set (attr "indexed") + (if_then_else (match_operand 0 "indexed_address_mem") + (const_string "yes") + (const_string "no"))) + (set_attr "length" "4")]) + +(define_insn "probe_stack_range<P:mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r")] + UNSPECV_PROBE_STACK_RANGE))] + "" + "* return output_probe_stack_range (operands[0], operands[2]);" + [(set_attr "type" "three")]) + +;; Compare insns are next. Note that the RS/6000 has two types of compares, +;; signed & unsigned, and one type of branch. +;; +;; Start with the DEFINE_EXPANDs to generate the rtl for compares, scc +;; insns, and branches. + +(define_expand "cbranch<mode>4" + [(use (match_operator 0 "rs6000_cbranch_operator" + [(match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "reg_or_short_operand" "")])) + (use (match_operand 3 ""))] + "" + " +{ + /* Take care of the possibility that operands[2] might be negative but + this might be a logical operation. That insn doesn't exist. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + { + operands[2] = force_reg (<MODE>mode, operands[2]); + operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), + GET_MODE (operands[0]), + operands[1], operands[2]); + } + + rs6000_emit_cbranch (<MODE>mode, operands); + DONE; +}") + +(define_expand "cbranch<mode>4" + [(use (match_operator 0 "rs6000_cbranch_operator" + [(match_operand:FP 1 "gpc_reg_operand" "") + (match_operand:FP 2 "gpc_reg_operand" "")])) + (use (match_operand 3 ""))] + "" + " +{ + rs6000_emit_cbranch (<MODE>mode, operands); + DONE; +}") + +(define_expand "cstore<mode>4_signed" + [(use (match_operator 1 "signed_comparison_operator" + [(match_operand:P 2 "gpc_reg_operand") + (match_operand:P 3 "gpc_reg_operand")])) + (clobber (match_operand:P 0 "gpc_reg_operand"))] + "" +{ + enum rtx_code cond_code = GET_CODE (operands[1]); + + rtx op0 = operands[0]; + rtx op1 = operands[2]; + rtx op2 = operands[3]; + + if (cond_code == GE || cond_code == LT) + { + cond_code = swap_condition (cond_code); + std::swap (op1, op2); + } + + rtx tmp1 = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + rtx tmp3 = gen_reg_rtx (<MODE>mode); + + int sh = GET_MODE_BITSIZE (<MODE>mode) - 1; + emit_insn (gen_lshr<mode>3 (tmp1, op1, GEN_INT (sh))); + emit_insn (gen_ashr<mode>3 (tmp2, op2, GEN_INT (sh))); + + emit_insn (gen_subf<mode>3_carry (tmp3, op1, op2)); + + if (cond_code == LE) + emit_insn (gen_add<mode>3_carry_in (op0, tmp1, tmp2)); + else + { + rtx tmp4 = gen_reg_rtx (<MODE>mode); + emit_insn (gen_add<mode>3_carry_in (tmp4, tmp1, tmp2)); + emit_insn (gen_xor<mode>3 (op0, tmp4, const1_rtx)); + } + + DONE; +}) + +(define_expand "cstore<mode>4_unsigned" + [(use (match_operator 1 "unsigned_comparison_operator" + [(match_operand:P 2 "gpc_reg_operand") + (match_operand:P 3 "reg_or_short_operand")])) + (clobber (match_operand:P 0 "gpc_reg_operand"))] + "" +{ + enum rtx_code cond_code = GET_CODE (operands[1]); + + rtx op0 = operands[0]; + rtx op1 = operands[2]; + rtx op2 = operands[3]; + + if (cond_code == GEU || cond_code == LTU) + { + cond_code = swap_condition (cond_code); + std::swap (op1, op2); + } + + if (!gpc_reg_operand (op1, <MODE>mode)) + op1 = force_reg (<MODE>mode, op1); + if (!reg_or_short_operand (op2, <MODE>mode)) + op2 = force_reg (<MODE>mode, op2); + + rtx tmp = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_subf<mode>3_carry (tmp, op1, op2)); + emit_insn (gen_subf<mode>3_carry_in_xx (tmp2)); + + if (cond_code == LEU) + emit_insn (gen_add<mode>3 (op0, tmp2, const1_rtx)); + else + emit_insn (gen_neg<mode>2 (op0, tmp2)); + + DONE; +}) + +(define_expand "cstore_si_as_di" + [(use (match_operator 1 "unsigned_comparison_operator" + [(match_operand:SI 2 "gpc_reg_operand") + (match_operand:SI 3 "reg_or_short_operand")])) + (clobber (match_operand:SI 0 "gpc_reg_operand"))] + "" +{ + int uns_flag = unsigned_comparison_operator (operands[1], VOIDmode) ? 1 : 0; + enum rtx_code cond_code = signed_condition (GET_CODE (operands[1])); + + operands[2] = force_reg (SImode, operands[2]); + operands[3] = force_reg (SImode, operands[3]); + rtx op1 = gen_reg_rtx (DImode); + rtx op2 = gen_reg_rtx (DImode); + convert_move (op1, operands[2], uns_flag); + convert_move (op2, operands[3], uns_flag); + + if (cond_code == GT || cond_code == LE) + { + cond_code = swap_condition (cond_code); + std::swap (op1, op2); + } + + rtx tmp = gen_reg_rtx (DImode); + rtx tmp2 = gen_reg_rtx (DImode); + emit_insn (gen_subdi3 (tmp, op1, op2)); + emit_insn (gen_lshrdi3 (tmp2, tmp, GEN_INT (63))); + + rtx tmp3; + switch (cond_code) + { + default: + gcc_unreachable (); + case LT: + tmp3 = tmp2; + break; + case GE: + tmp3 = gen_reg_rtx (DImode); + emit_insn (gen_xordi3 (tmp3, tmp2, const1_rtx)); + break; + } + + convert_move (operands[0], tmp3, 1); + + DONE; +}) + +(define_expand "cstore<mode>4_signed_imm" + [(use (match_operator 1 "signed_comparison_operator" + [(match_operand:GPR 2 "gpc_reg_operand") + (match_operand:GPR 3 "immediate_operand")])) + (clobber (match_operand:GPR 0 "gpc_reg_operand"))] + "" +{ + bool invert = false; + + enum rtx_code cond_code = GET_CODE (operands[1]); + + rtx op0 = operands[0]; + rtx op1 = operands[2]; + HOST_WIDE_INT val = INTVAL (operands[3]); + + if (cond_code == GE || cond_code == GT) + { + cond_code = reverse_condition (cond_code); + invert = true; + } + + if (cond_code == LE) + val++; + + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (-val))); + rtx x = gen_reg_rtx (<MODE>mode); + if (val < 0) + emit_insn (gen_and<mode>3 (x, op1, tmp)); + else + emit_insn (gen_ior<mode>3 (x, op1, tmp)); + + if (invert) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_one_cmpl<mode>2 (tmp, x)); + x = tmp; + } + + int sh = GET_MODE_BITSIZE (<MODE>mode) - 1; + emit_insn (gen_lshr<mode>3 (op0, x, GEN_INT (sh))); + + DONE; +}) + +(define_expand "cstore<mode>4_unsigned_imm" + [(use (match_operator 1 "unsigned_comparison_operator" + [(match_operand:GPR 2 "gpc_reg_operand") + (match_operand:GPR 3 "immediate_operand")])) + (clobber (match_operand:GPR 0 "gpc_reg_operand"))] + "" +{ + bool invert = false; + + enum rtx_code cond_code = GET_CODE (operands[1]); + + rtx op0 = operands[0]; + rtx op1 = operands[2]; + HOST_WIDE_INT val = INTVAL (operands[3]); + + if (cond_code == GEU || cond_code == GTU) + { + cond_code = reverse_condition (cond_code); + invert = true; + } + + if (cond_code == LEU) + val++; + + rtx tmp = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (-val))); + emit_insn (gen_one_cmpl<mode>2 (tmp2, op1)); + rtx x = gen_reg_rtx (<MODE>mode); + if (val < 0) + emit_insn (gen_ior<mode>3 (x, tmp, tmp2)); + else + emit_insn (gen_and<mode>3 (x, tmp, tmp2)); + + if (invert) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_one_cmpl<mode>2 (tmp, x)); + x = tmp; + } + + int sh = GET_MODE_BITSIZE (<MODE>mode) - 1; + emit_insn (gen_lshr<mode>3 (op0, x, GEN_INT (sh))); + + DONE; +}) + +(define_expand "cstore<mode>4" + [(use (match_operator 1 "rs6000_cbranch_operator" + [(match_operand:GPR 2 "gpc_reg_operand") + (match_operand:GPR 3 "reg_or_short_operand")])) + (clobber (match_operand:GPR 0 "gpc_reg_operand"))] + "" +{ + /* Use ISEL if the user asked for it. */ + if (TARGET_ISEL) + rs6000_emit_sISEL (<MODE>mode, operands); + + /* Expanding EQ and NE directly to some machine instructions does not help + but does hurt combine. So don't. */ + else if (GET_CODE (operands[1]) == EQ) + emit_insn (gen_eq<mode>3 (operands[0], operands[2], operands[3])); + else if (<MODE>mode == Pmode + && GET_CODE (operands[1]) == NE) + emit_insn (gen_ne<mode>3 (operands[0], operands[2], operands[3])); + else if (GET_CODE (operands[1]) == NE) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_eq<mode>3 (tmp, operands[2], operands[3])); + emit_insn (gen_xor<mode>3 (operands[0], tmp, const1_rtx)); + } + + /* Expanding the unsigned comparisons however helps a lot: all the neg_ltu + etc. combinations magically work out just right. */ + else if (<MODE>mode == Pmode + && unsigned_comparison_operator (operands[1], VOIDmode)) + emit_insn (gen_cstore<mode>4_unsigned (operands[0], operands[1], + operands[2], operands[3])); + + /* For comparisons smaller than Pmode we can cheaply do things in Pmode. */ + else if (<MODE>mode == SImode && Pmode == DImode) + emit_insn (gen_cstore_si_as_di (operands[0], operands[1], + operands[2], operands[3])); + + /* For signed comparisons against a constant, we can do some simple + bit-twiddling. */ + else if (signed_comparison_operator (operands[1], VOIDmode) + && CONST_INT_P (operands[3])) + emit_insn (gen_cstore<mode>4_signed_imm (operands[0], operands[1], + operands[2], operands[3])); + + /* And similarly for unsigned comparisons. */ + else if (unsigned_comparison_operator (operands[1], VOIDmode) + && CONST_INT_P (operands[3])) + emit_insn (gen_cstore<mode>4_unsigned_imm (operands[0], operands[1], + operands[2], operands[3])); + + /* We also do not want to use mfcr for signed comparisons. */ + else if (<MODE>mode == Pmode + && signed_comparison_operator (operands[1], VOIDmode)) + emit_insn (gen_cstore<mode>4_signed (operands[0], operands[1], + operands[2], operands[3])); + + /* Everything else, use the mfcr brute force. */ + else + rs6000_emit_sCOND (<MODE>mode, operands); + + DONE; +}) + +(define_expand "cstore<mode>4" + [(use (match_operator 1 "rs6000_cbranch_operator" + [(match_operand:FP 2 "gpc_reg_operand") + (match_operand:FP 3 "gpc_reg_operand")])) + (clobber (match_operand:SI 0 "gpc_reg_operand"))] + "" +{ + rs6000_emit_sCOND (<MODE>mode, operands); + DONE; +}) + + +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand") + (match_operand 1 "memory_operand")] + "" +{ + if (rs6000_stack_protector_guard == SSP_TLS) + { + rtx reg = gen_rtx_REG (Pmode, rs6000_stack_protector_guard_reg); + rtx offset = GEN_INT (rs6000_stack_protector_guard_offset); + rtx addr = gen_rtx_PLUS (Pmode, reg, offset); + operands[1] = gen_rtx_MEM (Pmode, addr); + } + + if (TARGET_64BIT) + emit_insn (gen_stack_protect_setdi (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_setsi (operands[0], operands[1])); + + DONE; +}) + +(define_insn "stack_protect_setsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0))] + "TARGET_32BIT" + "lwz%U1%X1 %2,%1\;stw%U0%X0 %2,%0\;li %2,0" + [(set_attr "type" "three") + (set_attr "length" "12")]) + +(define_insn "stack_protect_setdi" + [(set (match_operand:DI 0 "memory_operand" "=Y") + (unspec:DI [(match_operand:DI 1 "memory_operand" "Y")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0))] + "TARGET_64BIT" + "ld%U1%X1 %2,%1\;std%U0%X0 %2,%0\;li %2,0" + [(set_attr "type" "three") + (set_attr "length" "12")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand") + (match_operand 1 "memory_operand") + (match_operand 2 "")] + "" +{ + rtx guard = operands[1]; + + if (rs6000_stack_protector_guard == SSP_TLS) + { + rtx reg = gen_rtx_REG (Pmode, rs6000_stack_protector_guard_reg); + rtx offset = GEN_INT (rs6000_stack_protector_guard_offset); + rtx addr = gen_rtx_PLUS (Pmode, reg, offset); + guard = gen_rtx_MEM (Pmode, addr); + } + + operands[1] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, guard), UNSPEC_SP_TEST); + rtx test = gen_rtx_EQ (VOIDmode, operands[0], operands[1]); + rtx jump = gen_cbranchsi4 (test, operands[0], operands[1], operands[2]); + emit_jump_insn (jump); + + DONE; +}) + +(define_insn "stack_protect_testsi" + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y") + (unspec:CCEQ [(match_operand:SI 1 "memory_operand" "m,m") + (match_operand:SI 2 "memory_operand" "m,m")] + UNSPEC_SP_TEST)) + (set (match_scratch:SI 4 "=r,r") (const_int 0)) + (clobber (match_scratch:SI 3 "=&r,&r"))] + "TARGET_32BIT" + "@ + lwz%U1%X1 %3,%1\;lwz%U2%X2 %4,%2\;xor. %3,%3,%4\;li %4,0 + lwz%U1%X1 %3,%1\;lwz%U2%X2 %4,%2\;cmplw %0,%3,%4\;li %3,0\;li %4,0" + [(set_attr "length" "16,20")]) + +(define_insn "stack_protect_testdi" + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y") + (unspec:CCEQ [(match_operand:DI 1 "memory_operand" "Y,Y") + (match_operand:DI 2 "memory_operand" "Y,Y")] + UNSPEC_SP_TEST)) + (set (match_scratch:DI 4 "=r,r") (const_int 0)) + (clobber (match_scratch:DI 3 "=&r,&r"))] + "TARGET_64BIT" + "@ + ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;xor. %3,%3,%4\;li %4,0 + ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;cmpld %0,%3,%4\;li %3,0\;li %4,0" + [(set_attr "length" "16,20")]) + + +;; Here are the actual compare insns. +(define_insn "*cmp<mode>_signed" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (compare:CC (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "reg_or_short_operand" "rI")))] + "" + "cmp<wd>%I2 %0,%1,%2" + [(set_attr "type" "cmp")]) + +(define_insn "*cmp<mode>_unsigned" + [(set (match_operand:CCUNS 0 "cc_reg_operand" "=y") + (compare:CCUNS (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "reg_or_u_short_operand" "rK")))] + "" + "cmpl<wd>%I2 %0,%1,%2" + [(set_attr "type" "cmp")]) + +;; If we are comparing a register for equality with a large constant, +;; we can do this with an XOR followed by a compare. But this is profitable +;; only if the large constant is only used for the comparison (and in this +;; case we already have a register to reuse as scratch). +;; +;; For 64-bit registers, we could only do so if the constant's bit 15 is clear: +;; otherwise we'd need to XOR with FFFFFFFF????0000 which is not available. + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "logical_const_operand" "")) + (set (match_dup 0) (match_operator:SI 3 "boolean_or_operator" + [(match_dup 0) + (match_operand:SI 2 "logical_const_operand" "")])) + (set (match_operand:CC 4 "cc_reg_operand" "") + (compare:CC (match_operand:SI 5 "gpc_reg_operand" "") + (match_dup 0))) + (set (pc) + (if_then_else (match_operator 6 "equality_operator" + [(match_dup 4) (const_int 0)]) + (match_operand 7 "" "") + (match_operand 8 "" "")))] + "peep2_reg_dead_p (3, operands[0]) + && peep2_reg_dead_p (4, operands[4]) + && REGNO (operands[0]) != REGNO (operands[5])" + [(set (match_dup 0) (xor:SI (match_dup 5) (match_dup 9))) + (set (match_dup 4) (compare:CC (match_dup 0) (match_dup 10))) + (set (pc) (if_then_else (match_dup 6) (match_dup 7) (match_dup 8)))] + +{ + /* Get the constant we are comparing against, and see what it looks like + when sign-extended from 16 to 32 bits. Then see what constant we could + XOR with SEXTC to get the sign-extended value. */ + rtx cnst = simplify_const_binary_operation (GET_CODE (operands[3]), + SImode, + operands[1], operands[2]); + HOST_WIDE_INT c = INTVAL (cnst); + HOST_WIDE_INT sextc = ((c & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT xorv = c ^ sextc; + + operands[9] = GEN_INT (xorv); + operands[10] = GEN_INT (sextc); +}) + +;; The following two insns don't exist as single insns, but if we provide +;; them, we can swap an add and compare, which will enable us to overlap more +;; of the required delay between a compare and branch. We generate code for +;; them by splitting. + +(define_insn "" + [(set (match_operand:CC 3 "cc_reg_operand" "=y") + (compare:CC (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "short_cint_operand" "i"))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "i")))] + "" + "#" + [(set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:CCUNS 3 "cc_reg_operand" "=y") + (compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "u_short_cint_operand" "i"))) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "i")))] + "" + "#" + [(set_attr "length" "8")]) + +(define_split + [(set (match_operand:CC 3 "cc_reg_operand" "") + (compare:CC (match_operand:SI 1 "gpc_reg_operand" "") + (match_operand:SI 2 "short_cint_operand" ""))) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "")))] + "" + [(set (match_dup 3) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))]) + +(define_split + [(set (match_operand:CCUNS 3 "cc_reg_operand" "") + (compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "") + (match_operand:SI 2 "u_short_cint_operand" ""))) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "")))] + "" + [(set (match_dup 3) (compare:CCUNS (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))]) + +;; Only need to compare second words if first words equal +(define_insn "*cmp<mode>_internal1" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:IBM128 1 "gpc_reg_operand" "d") + (match_operand:IBM128 2 "gpc_reg_operand" "d")))] + "!TARGET_XL_COMPAT && FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128" + "fcmpu %0,%1,%2\;bne %0,$+8\;fcmpu %0,%L1,%L2" + [(set_attr "type" "fpcompare") + (set_attr "length" "12")]) + +(define_insn_and_split "*cmp<mode>_internal2" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:IBM128 1 "gpc_reg_operand" "d") + (match_operand:IBM128 2 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DF 3 "=d")) + (clobber (match_scratch:DF 4 "=d")) + (clobber (match_scratch:DF 5 "=d")) + (clobber (match_scratch:DF 6 "=d")) + (clobber (match_scratch:DF 7 "=d")) + (clobber (match_scratch:DF 8 "=d")) + (clobber (match_scratch:DF 9 "=d")) + (clobber (match_scratch:DF 10 "=d")) + (clobber (match_scratch:GPR 11 "=b"))] + "TARGET_XL_COMPAT && FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 14)) + (set (match_dup 4) (match_dup 15)) + (set (match_dup 9) (abs:DF (match_dup 5))) + (set (match_dup 0) (compare:CCFP (match_dup 9) (match_dup 3))) + (set (pc) (if_then_else (ne (match_dup 0) (const_int 0)) + (label_ref (match_dup 12)) + (pc))) + (set (match_dup 0) (compare:CCFP (match_dup 5) (match_dup 7))) + (set (pc) (label_ref (match_dup 13))) + (match_dup 12) + (set (match_dup 10) (minus:DF (match_dup 5) (match_dup 7))) + (set (match_dup 9) (minus:DF (match_dup 6) (match_dup 8))) + (set (match_dup 9) (plus:DF (match_dup 10) (match_dup 9))) + (set (match_dup 0) (compare:CCFP (match_dup 9) (match_dup 4))) + (match_dup 13)] +{ + REAL_VALUE_TYPE rv; + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + + operands[5] = simplify_gen_subreg (DFmode, operands[1], <MODE>mode, hi_word); + operands[6] = simplify_gen_subreg (DFmode, operands[1], <MODE>mode, lo_word); + operands[7] = simplify_gen_subreg (DFmode, operands[2], <MODE>mode, hi_word); + operands[8] = simplify_gen_subreg (DFmode, operands[2], <MODE>mode, lo_word); + operands[12] = gen_label_rtx (); + operands[13] = gen_label_rtx (); + real_inf (&rv); + operands[14] = force_const_mem (DFmode, + const_double_from_real_value (rv, DFmode)); + operands[15] = force_const_mem (DFmode, + const_double_from_real_value (dconst0, + DFmode)); + if (TARGET_TOC) + { + rtx tocref; + tocref = create_TOC_reference (XEXP (operands[14], 0), operands[11]); + operands[14] = gen_const_mem (DFmode, tocref); + tocref = create_TOC_reference (XEXP (operands[15], 0), operands[11]); + operands[15] = gen_const_mem (DFmode, tocref); + set_mem_alias_set (operands[14], get_TOC_alias_set ()); + set_mem_alias_set (operands[15], get_TOC_alias_set ()); + } +}) + +;; Now we have the scc insns. We can do some combinations because of the +;; way the machine works. +;; +;; Note that this is probably faster if we can put an insn between the +;; mfcr and rlinm, but this is tricky. Let's leave it for now. In most +;; cases the insns below which don't use an intermediate CR field will +;; be used instead. +(define_insn "" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (match_operator:SI 1 "scc_comparison_operator" + [(match_operand 2 "cc_reg_operand" "y") + (const_int 0)]))] + "" + "mfcr %0%Q2\;rlwinm %0,%0,%J1,1" + [(set (attr "type") + (cond [(match_test "TARGET_MFCRF") + (const_string "mfcrf") + ] + (const_string "mfcr"))) + (set_attr "length" "8")]) + +;; Same as above, but get the GT bit. +(define_insn "move_from_CR_gt_bit" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_operand 1 "cc_reg_operand" "y")] UNSPEC_MV_CR_GT))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "mfcr %0\;rlwinm %0,%0,%D1,31,31" + [(set_attr "type" "mfcr") + (set_attr "length" "8")]) + +;; Same as above, but get the OV/ORDERED bit. +(define_insn "move_from_CR_ov_bit" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_operand:CC 1 "cc_reg_operand" "y")] + UNSPEC_MV_CR_OV))] + "TARGET_ISEL" + "mfcr %0\;rlwinm %0,%0,%t1,1" + [(set_attr "type" "mfcr") + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (match_operator:DI 1 "scc_comparison_operator" + [(match_operand 2 "cc_reg_operand" "y") + (const_int 0)]))] + "TARGET_POWERPC64" + "mfcr %0%Q2\;rlwinm %0,%0,%J1,1" + [(set (attr "type") + (cond [(match_test "TARGET_MFCRF") + (const_string "mfcrf") + ] + (const_string "mfcr"))) + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") + (compare:CC (match_operator:SI 1 "scc_comparison_operator" + [(match_operand 2 "cc_reg_operand" "y,y") + (const_int 0)]) + (const_int 0))) + (set (match_operand:SI 3 "gpc_reg_operand" "=r,r") + (match_op_dup 1 [(match_dup 2) (const_int 0)]))] + "TARGET_32BIT" + "@ + mfcr %3%Q2\;rlwinm. %3,%3,%J1,1 + #" + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "8,16")]) + +(define_split + [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "") + (compare:CC (match_operator:SI 1 "scc_comparison_operator" + [(match_operand 2 "cc_reg_operand" "") + (const_int 0)]) + (const_int 0))) + (set (match_operand:SI 3 "gpc_reg_operand" "") + (match_op_dup 1 [(match_dup 2) (const_int 0)]))] + "TARGET_32BIT && reload_completed" + [(set (match_dup 3) + (match_op_dup 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (compare:CC (match_dup 3) + (const_int 0)))] + "") + +(define_insn "" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (ashift:SI (match_operator:SI 1 "scc_comparison_operator" + [(match_operand 2 "cc_reg_operand" "y") + (const_int 0)]) + (match_operand:SI 3 "const_int_operand" "n")))] + "" + "* +{ + int is_bit = ccr_bit (operands[1], 1); + int put_bit = 31 - (INTVAL (operands[3]) & 31); + int count; + + if (is_bit >= put_bit) + count = is_bit - put_bit; + else + count = 32 - (put_bit - is_bit); + + operands[4] = GEN_INT (count); + operands[5] = GEN_INT (put_bit); + + return \"mfcr %0%Q2\;rlwinm %0,%0,%4,%5,%5\"; +}" + [(set (attr "type") + (cond [(match_test "TARGET_MFCRF") + (const_string "mfcrf") + ] + (const_string "mfcr"))) + (set_attr "length" "8")]) + +(define_insn "" + [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") + (compare:CC + (ashift:SI (match_operator:SI 1 "scc_comparison_operator" + [(match_operand 2 "cc_reg_operand" "y,y") + (const_int 0)]) + (match_operand:SI 3 "const_int_operand" "n,n")) + (const_int 0))) + (set (match_operand:SI 4 "gpc_reg_operand" "=r,r") + (ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)]) + (match_dup 3)))] + "" + "* +{ + int is_bit = ccr_bit (operands[1], 1); + int put_bit = 31 - (INTVAL (operands[3]) & 31); + int count; + + /* Force split for non-cc0 compare. */ + if (which_alternative == 1) + return \"#\"; + + if (is_bit >= put_bit) + count = is_bit - put_bit; + else + count = 32 - (put_bit - is_bit); + + operands[5] = GEN_INT (count); + operands[6] = GEN_INT (put_bit); + + return \"mfcr %4%Q2\;rlwinm. %4,%4,%5,%6,%6\"; +}" + [(set_attr "type" "shift") + (set_attr "dot" "yes") + (set_attr "length" "8,16")]) + +(define_split + [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "") + (compare:CC + (ashift:SI (match_operator:SI 1 "scc_comparison_operator" + [(match_operand 2 "cc_reg_operand" "") + (const_int 0)]) + (match_operand:SI 3 "const_int_operand" "")) + (const_int 0))) + (set (match_operand:SI 4 "gpc_reg_operand" "") + (ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)]) + (match_dup 3)))] + "reload_completed" + [(set (match_dup 4) + (ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)]) + (match_dup 3))) + (set (match_dup 0) + (compare:CC (match_dup 4) + (const_int 0)))] + "") + + +(define_mode_attr scc_eq_op2 [(SI "rKLI") + (DI "rKJI")]) + +(define_insn_and_split "eq<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (eq:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "scc_eq_operand" "<scc_eq_op2>"))) + (clobber (match_scratch:GPR 3 "=r")) + (clobber (match_scratch:GPR 4 "=r"))] + "" + "#" + "" + [(set (match_dup 4) + (clz:GPR (match_dup 3))) + (set (match_dup 0) + (lshiftrt:GPR (match_dup 4) + (match_dup 5)))] +{ + operands[3] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[3]); + + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (<MODE>mode); + + operands[5] = GEN_INT (exact_log2 (GET_MODE_BITSIZE (<MODE>mode))); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "ne<mode>3" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (ne:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>"))) + (clobber (match_scratch:P 3 "=r")) + (clobber (match_scratch:P 4 "=r")) + (clobber (reg:P CA_REGNO))] + "!TARGET_ISEL" + "#" + "" + [(parallel [(set (match_dup 4) + (plus:P (match_dup 3) + (const_int -1))) + (set (reg:P CA_REGNO) + (ne:P (match_dup 3) + (const_int 0)))]) + (parallel [(set (match_dup 0) + (plus:P (plus:P (not:P (match_dup 4)) + (reg:P CA_REGNO)) + (match_dup 3))) + (clobber (reg:P CA_REGNO))])] +{ + operands[3] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[3]); + + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*neg_eq_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (neg:P (eq:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>")))) + (clobber (match_scratch:P 3 "=r")) + (clobber (match_scratch:P 4 "=r")) + (clobber (reg:P CA_REGNO))] + "" + "#" + "" + [(parallel [(set (match_dup 4) + (plus:P (match_dup 3) + (const_int -1))) + (set (reg:P CA_REGNO) + (ne:P (match_dup 3) + (const_int 0)))]) + (parallel [(set (match_dup 0) + (plus:P (reg:P CA_REGNO) + (const_int -1))) + (clobber (reg:P CA_REGNO))])] +{ + operands[3] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[3]); + + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*neg_ne_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (neg:P (ne:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>")))) + (clobber (match_scratch:P 3 "=r")) + (clobber (match_scratch:P 4 "=r")) + (clobber (reg:P CA_REGNO))] + "" + "#" + "" + [(parallel [(set (match_dup 4) + (neg:P (match_dup 3))) + (set (reg:P CA_REGNO) + (eq:P (match_dup 3) + (const_int 0)))]) + (parallel [(set (match_dup 0) + (plus:P (reg:P CA_REGNO) + (const_int -1))) + (clobber (reg:P CA_REGNO))])] +{ + operands[3] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[3]); + + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*plus_eq_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (plus:P (eq:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>")) + (match_operand:P 3 "gpc_reg_operand" "r"))) + (clobber (match_scratch:P 4 "=r")) + (clobber (match_scratch:P 5 "=r")) + (clobber (reg:P CA_REGNO))] + "" + "#" + "" + [(parallel [(set (match_dup 5) + (neg:P (match_dup 4))) + (set (reg:P CA_REGNO) + (eq:P (match_dup 4) + (const_int 0)))]) + (parallel [(set (match_dup 0) + (plus:P (match_dup 3) + (reg:P CA_REGNO))) + (clobber (reg:P CA_REGNO))])] +{ + operands[4] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[4]); + + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*plus_ne_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (plus:P (ne:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>")) + (match_operand:P 3 "gpc_reg_operand" "r"))) + (clobber (match_scratch:P 4 "=r")) + (clobber (match_scratch:P 5 "=r")) + (clobber (reg:P CA_REGNO))] + "" + "#" + "" + [(parallel [(set (match_dup 5) + (plus:P (match_dup 4) + (const_int -1))) + (set (reg:P CA_REGNO) + (ne:P (match_dup 4) + (const_int 0)))]) + (parallel [(set (match_dup 0) + (plus:P (match_dup 3) + (reg:P CA_REGNO))) + (clobber (reg:P CA_REGNO))])] +{ + operands[4] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[4]); + + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*minus_eq_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (minus:P (match_operand:P 3 "gpc_reg_operand" "r") + (eq:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>")))) + (clobber (match_scratch:P 4 "=r")) + (clobber (match_scratch:P 5 "=r")) + (clobber (reg:P CA_REGNO))] + "" + "#" + "" + [(parallel [(set (match_dup 5) + (plus:P (match_dup 4) + (const_int -1))) + (set (reg:P CA_REGNO) + (ne:P (match_dup 4) + (const_int 0)))]) + (parallel [(set (match_dup 0) + (plus:P (plus:P (match_dup 3) + (reg:P CA_REGNO)) + (const_int -1))) + (clobber (reg:P CA_REGNO))])] +{ + operands[4] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[4]); + + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*minus_ne_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (minus:P (match_operand:P 3 "gpc_reg_operand" "r") + (ne:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>")))) + (clobber (match_scratch:P 4 "=r")) + (clobber (match_scratch:P 5 "=r")) + (clobber (reg:P CA_REGNO))] + "" + "#" + "" + [(parallel [(set (match_dup 5) + (neg:P (match_dup 4))) + (set (reg:P CA_REGNO) + (eq:P (match_dup 4) + (const_int 0)))]) + (parallel [(set (match_dup 0) + (plus:P (plus:P (match_dup 3) + (reg:P CA_REGNO)) + (const_int -1))) + (clobber (reg:P CA_REGNO))])] +{ + operands[4] = rs6000_emit_eqne (<MODE>mode, + operands[1], operands[2], operands[4]); + + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*eqsi3_ext<mode>" + [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") + (eq:EXTSI (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "scc_eq_operand" "rKLI"))) + (clobber (match_scratch:SI 3 "=r")) + (clobber (match_scratch:SI 4 "=r"))] + "" + "#" + "" + [(set (match_dup 4) + (clz:SI (match_dup 3))) + (set (match_dup 0) + (zero_extend:EXTSI + (lshiftrt:SI (match_dup 4) + (const_int 5))))] +{ + operands[3] = rs6000_emit_eqne (SImode, + operands[1], operands[2], operands[3]); + + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (SImode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "8") + (const_string "12")))]) + +(define_insn_and_split "*nesi3_ext<mode>" + [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") + (ne:EXTSI (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "scc_eq_operand" "rKLI"))) + (clobber (match_scratch:SI 3 "=r")) + (clobber (match_scratch:SI 4 "=r")) + (clobber (match_scratch:EXTSI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 4) + (clz:SI (match_dup 3))) + (set (match_dup 5) + (zero_extend:EXTSI + (lshiftrt:SI (match_dup 4) + (const_int 5)))) + (set (match_dup 0) + (xor:EXTSI (match_dup 5) + (const_int 1)))] +{ + operands[3] = rs6000_emit_eqne (SImode, + operands[1], operands[2], operands[3]); + + if (GET_CODE (operands[4]) == SCRATCH) + operands[4] = gen_reg_rtx (SImode); + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (<MODE>mode); +} + [(set (attr "length") + (if_then_else (match_test "operands[2] == const0_rtx") + (const_string "12") + (const_string "16")))]) + +;; Define both directions of branch and return. If we need a reload +;; register, we'd rather use CR0 since it is much easier to copy a +;; register CC value to there. + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 1 "branch_comparison_operator" + [(match_operand 2 + "cc_reg_operand" "y") + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* +{ + return output_cbranch (operands[1], \"%l0\", 0, insn); +}" + [(set_attr "type" "branch")]) + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 0 "branch_comparison_operator" + [(match_operand 1 + "cc_reg_operand" "y") + (const_int 0)]) + (any_return) + (pc)))] + "<return_pred>" + "* +{ + return output_cbranch (operands[0], NULL, 0, insn); +}" + [(set_attr "type" "jmpreg") + (set_attr "length" "4")]) + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 1 "branch_comparison_operator" + [(match_operand 2 + "cc_reg_operand" "y") + (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "* +{ + return output_cbranch (operands[1], \"%l0\", 1, insn); +}" + [(set_attr "type" "branch")]) + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 0 "branch_comparison_operator" + [(match_operand 1 + "cc_reg_operand" "y") + (const_int 0)]) + (pc) + (any_return)))] + "<return_pred>" + "* +{ + return output_cbranch (operands[0], NULL, 1, insn); +}" + [(set_attr "type" "jmpreg") + (set_attr "length" "4")]) + +;; Logic on condition register values. + +; This pattern matches things like +; (set (reg:CCEQ 68) (compare:CCEQ (ior:SI (gt:SI (reg:CCFP 68) (const_int 0)) +; (eq:SI (reg:CCFP 68) (const_int 0))) +; (const_int 1))) +; which are generated by the branch logic. +; Prefer destructive operations where BT = BB (for crXX BT,BA,BB) + +(define_insn "*cceq_ior_compare" + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y") + (compare:CCEQ (match_operator:SI 1 "boolean_operator" + [(match_operator:SI 2 + "branch_positive_comparison_operator" + [(match_operand 3 + "cc_reg_operand" "y,y") + (const_int 0)]) + (match_operator:SI 4 + "branch_positive_comparison_operator" + [(match_operand 5 + "cc_reg_operand" "0,y") + (const_int 0)])]) + (const_int 1)))] + "" + "cr%q1 %E0,%j2,%j4" + [(set_attr "type" "cr_logical,delayed_cr")]) + +; Why is the constant -1 here, but 1 in the previous pattern? +; Because ~1 has all but the low bit set. +(define_insn "" + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y") + (compare:CCEQ (match_operator:SI 1 "boolean_or_operator" + [(not:SI (match_operator:SI 2 + "branch_positive_comparison_operator" + [(match_operand 3 + "cc_reg_operand" "y,y") + (const_int 0)])) + (match_operator:SI 4 + "branch_positive_comparison_operator" + [(match_operand 5 + "cc_reg_operand" "0,y") + (const_int 0)])]) + (const_int -1)))] + "" + "cr%q1 %E0,%j2,%j4" + [(set_attr "type" "cr_logical,delayed_cr")]) + +(define_insn "*cceq_rev_compare" + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y") + (compare:CCEQ (match_operator:SI 1 + "branch_positive_comparison_operator" + [(match_operand 2 + "cc_reg_operand" "0,y") + (const_int 0)]) + (const_int 0)))] + "" + "crnot %E0,%j1" + [(set_attr "type" "cr_logical,delayed_cr")]) + +;; If we are comparing the result of two comparisons, this can be done +;; using creqv or crxor. + +(define_insn_and_split "" + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y") + (compare:CCEQ (match_operator 1 "branch_comparison_operator" + [(match_operand 2 "cc_reg_operand" "y") + (const_int 0)]) + (match_operator 3 "branch_comparison_operator" + [(match_operand 4 "cc_reg_operand" "y") + (const_int 0)])))] + "" + "#" + "" + [(set (match_dup 0) (compare:CCEQ (xor:SI (match_dup 1) (match_dup 3)) + (match_dup 5)))] + " +{ + int positive_1, positive_2; + + positive_1 = branch_positive_comparison_operator (operands[1], + GET_MODE (operands[1])); + positive_2 = branch_positive_comparison_operator (operands[3], + GET_MODE (operands[3])); + + if (! positive_1) + operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[2]), + GET_CODE (operands[1])), + SImode, + operands[2], const0_rtx); + else if (GET_MODE (operands[1]) != SImode) + operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, + operands[2], const0_rtx); + + if (! positive_2) + operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[4]), + GET_CODE (operands[3])), + SImode, + operands[4], const0_rtx); + else if (GET_MODE (operands[3]) != SImode) + operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode, + operands[4], const0_rtx); + + if (positive_1 == positive_2) + { + operands[1] = gen_rtx_NOT (SImode, operands[1]); + operands[5] = constm1_rtx; + } + else + { + operands[5] = const1_rtx; + } +}") + +;; Unconditional branch and return. + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "b %l0" + [(set_attr "type" "branch")]) + +(define_insn "<return_str>return" + [(any_return)] + "<return_pred>" + "blr" + [(set_attr "type" "jmpreg")]) + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "register_operand" ""))]) + +(define_insn "*indirect_jump<mode>" + [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))] + "" + "@ + bctr + blr" + [(set_attr "type" "jmpreg")]) + +;; Table jump for switch statements: +(define_expand "tablejump" + [(use (match_operand 0 "" "")) + (use (label_ref (match_operand 1 "" "")))] + "" + " +{ + if (TARGET_32BIT) + emit_jump_insn (gen_tablejumpsi (operands[0], operands[1])); + else + emit_jump_insn (gen_tablejumpdi (operands[0], operands[1])); + DONE; +}") + +(define_expand "tablejumpsi" + [(set (match_dup 3) + (plus:SI (match_operand:SI 0 "" "") + (match_dup 2))) + (parallel [(set (pc) (match_dup 3)) + (use (label_ref (match_operand 1 "" "")))])] + "TARGET_32BIT" + " +{ operands[0] = force_reg (SImode, operands[0]); + operands[2] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1])); + operands[3] = gen_reg_rtx (SImode); +}") + +(define_expand "tablejumpdi" + [(set (match_dup 4) + (sign_extend:DI (match_operand:SI 0 "lwa_operand" ""))) + (set (match_dup 3) + (plus:DI (match_dup 4) + (match_dup 2))) + (parallel [(set (pc) (match_dup 3)) + (use (label_ref (match_operand 1 "" "")))])] + "TARGET_64BIT" + " +{ operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1])); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}") + +(define_insn "*tablejump<mode>_internal1" + [(set (pc) + (match_operand:P 0 "register_operand" "c,*l")) + (use (label_ref (match_operand 1 "" "")))] + "" + "@ + bctr + blr" + [(set_attr "type" "jmpreg")]) + +(define_insn "nop" + [(unspec [(const_int 0)] UNSPEC_NOP)] + "" + "nop") + +(define_insn "group_ending_nop" + [(unspec [(const_int 0)] UNSPEC_GRP_END_NOP)] + "" + "* +{ + if (rs6000_cpu_attr == CPU_POWER6) + return \"ori 1,1,0\"; + return \"ori 2,2,0\"; +}") + +;; Define the subtract-one-and-jump insns, starting with the template +;; so loop.c knows what to generate. + +(define_expand "doloop_end" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" ""))] ; label + "" + " +{ + if (TARGET_64BIT) + { + if (GET_MODE (operands[0]) != DImode) + FAIL; + emit_jump_insn (gen_ctrdi (operands[0], operands[1])); + } + else + { + if (GET_MODE (operands[0]) != SImode) + FAIL; + emit_jump_insn (gen_ctrsi (operands[0], operands[1])); + } + DONE; +}") + +(define_expand "ctr<mode>" + [(parallel [(set (pc) + (if_then_else (ne (match_operand:P 0 "register_operand" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:P (match_dup 0) + (const_int -1))) + (clobber (match_scratch:CC 2 "")) + (clobber (match_scratch:P 3 ""))])] + "" + "") + +;; We need to be able to do this for any operand, including MEM, or we +;; will cause reload to blow up since we don't allow output reloads on +;; JUMP_INSNs. +;; For the length attribute to be calculated correctly, the +;; label MUST be operand 0. +;; rs6000_legitimate_combined_insn prevents combine creating any of +;; the ctr<mode> insns. + +(define_insn "ctr<mode>_internal1" + [(set (pc) + (if_then_else (ne (match_operand:P 1 "register_operand" "c,*b,*b,*b") + (const_int 1)) + (label_ref (match_operand 0 "" "")) + (pc))) + (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*d*wi*c*l") + (plus:P (match_dup 1) + (const_int -1))) + (clobber (match_scratch:CC 3 "=X,&x,&x,&x")) + (clobber (match_scratch:P 4 "=X,X,&r,r"))] + "" + "* +{ + if (which_alternative != 0) + return \"#\"; + else if (get_attr_length (insn) == 4) + return \"bdnz %l0\"; + else + return \"bdz $+8\;b %l0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "*,16,20,20")]) + +(define_insn "ctr<mode>_internal2" + [(set (pc) + (if_then_else (ne (match_operand:P 1 "register_operand" "c,*b,*b,*b") + (const_int 1)) + (pc) + (label_ref (match_operand 0 "" "")))) + (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*d*wi*c*l") + (plus:P (match_dup 1) + (const_int -1))) + (clobber (match_scratch:CC 3 "=X,&x,&x,&x")) + (clobber (match_scratch:P 4 "=X,X,&r,r"))] + "" + "* +{ + if (which_alternative != 0) + return \"#\"; + else if (get_attr_length (insn) == 4) + return \"bdz %l0\"; + else + return \"bdnz $+8\;b %l0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "*,16,20,20")]) + +;; Similar but use EQ + +(define_insn "ctr<mode>_internal3" + [(set (pc) + (if_then_else (eq (match_operand:P 1 "register_operand" "c,*b,*b,*b") + (const_int 1)) + (label_ref (match_operand 0 "" "")) + (pc))) + (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*d*wi*c*l") + (plus:P (match_dup 1) + (const_int -1))) + (clobber (match_scratch:CC 3 "=X,&x,&x,&x")) + (clobber (match_scratch:P 4 "=X,X,&r,r"))] + "" + "* +{ + if (which_alternative != 0) + return \"#\"; + else if (get_attr_length (insn) == 4) + return \"bdz %l0\"; + else + return \"bdnz $+8\;b %l0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "*,16,20,20")]) + +(define_insn "ctr<mode>_internal4" + [(set (pc) + (if_then_else (eq (match_operand:P 1 "register_operand" "c,*b,*b,*b") + (const_int 1)) + (pc) + (label_ref (match_operand 0 "" "")))) + (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*d*wi*c*l") + (plus:P (match_dup 1) + (const_int -1))) + (clobber (match_scratch:CC 3 "=X,&x,&x,&x")) + (clobber (match_scratch:P 4 "=X,X,&r,r"))] + "" + "* +{ + if (which_alternative != 0) + return \"#\"; + else if (get_attr_length (insn) == 4) + return \"bdnz %l0\"; + else + return \"bdz $+8\;b %l0\"; +}" + [(set_attr "type" "branch") + (set_attr "length" "*,16,20,20")]) + +;; Now the splitters if we could not allocate the CTR register + +(define_split + [(set (pc) + (if_then_else (match_operator 2 "comparison_operator" + [(match_operand:P 1 "gpc_reg_operand" "") + (const_int 1)]) + (match_operand 5 "" "") + (match_operand 6 "" ""))) + (set (match_operand:P 0 "int_reg_operand" "") + (plus:P (match_dup 1) (const_int -1))) + (clobber (match_scratch:CC 3 "")) + (clobber (match_scratch:P 4 ""))] + "reload_completed" + [(set (match_dup 3) + (compare:CC (match_dup 1) + (const_int 1))) + (set (match_dup 0) + (plus:P (match_dup 1) + (const_int -1))) + (set (pc) (if_then_else (match_dup 7) + (match_dup 5) + (match_dup 6)))] + " +{ operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[2]), VOIDmode, + operands[3], const0_rtx); }") + +(define_split + [(set (pc) + (if_then_else (match_operator 2 "comparison_operator" + [(match_operand:P 1 "gpc_reg_operand" "") + (const_int 1)]) + (match_operand 5 "" "") + (match_operand 6 "" ""))) + (set (match_operand:P 0 "nonimmediate_operand" "") + (plus:P (match_dup 1) (const_int -1))) + (clobber (match_scratch:CC 3 "")) + (clobber (match_scratch:P 4 ""))] + "reload_completed && ! gpc_reg_operand (operands[0], SImode)" + [(set (match_dup 3) + (compare:CC (match_dup 1) + (const_int 1))) + (set (match_dup 4) + (plus:P (match_dup 1) + (const_int -1))) + (set (match_dup 0) + (match_dup 4)) + (set (pc) (if_then_else (match_dup 7) + (match_dup 5) + (match_dup 6)))] + " +{ operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[2]), VOIDmode, + operands[3], const0_rtx); }") + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "trap" + [(set_attr "type" "trap")]) + +(define_expand "ctrap<mode>4" + [(trap_if (match_operator 0 "ordered_comparison_operator" + [(match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "reg_or_short_operand")]) + (match_operand 3 "zero_constant" ""))] + "" + "") + +(define_insn "" + [(trap_if (match_operator 0 "ordered_comparison_operator" + [(match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "reg_or_short_operand" "rI")]) + (const_int 0))] + "" + "t<wd>%V0%I2 %1,%2" + [(set_attr "type" "trap")]) + +;; Insns related to generating the function prologue and epilogue. + +(define_expand "prologue" + [(use (const_int 0))] + "" +{ + rs6000_emit_prologue (); + if (!TARGET_SCHED_PROLOG) + emit_insn (gen_blockage ()); + DONE; +}) + +(define_insn "*movesi_from_cr_one" + [(match_parallel 0 "mfcr_operation" + [(set (match_operand:SI 1 "gpc_reg_operand" "=r") + (unspec:SI [(match_operand:CC 2 "cc_reg_operand" "y") + (match_operand 3 "immediate_operand" "n")] + UNSPEC_MOVESI_FROM_CR))])] + "TARGET_MFCRF" + "* +{ + int mask = 0; + int i; + for (i = 0; i < XVECLEN (operands[0], 0); i++) + { + mask = INTVAL (XVECEXP (SET_SRC (XVECEXP (operands[0], 0, i)), 0, 1)); + operands[4] = GEN_INT (mask); + output_asm_insn (\"mfcr %1,%4\", operands); + } + return \"\"; +}" + [(set_attr "type" "mfcrf")]) + +(define_insn "movesi_from_cr" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(reg:CC CR0_REGNO) (reg:CC CR1_REGNO) + (reg:CC CR2_REGNO) (reg:CC CR3_REGNO) + (reg:CC CR4_REGNO) (reg:CC CR5_REGNO) + (reg:CC CR6_REGNO) (reg:CC CR7_REGNO)] + UNSPEC_MOVESI_FROM_CR))] + "" + "mfcr %0" + [(set_attr "type" "mfcr")]) + +(define_insn "*crsave" + [(match_parallel 0 "crsave_operation" + [(set (match_operand:SI 1 "memory_operand" "=m") + (match_operand:SI 2 "gpc_reg_operand" "r"))])] + "" + "stw %2,%1" + [(set_attr "type" "store")]) + +(define_insn "*stmw" + [(match_parallel 0 "stmw_operation" + [(set (match_operand:SI 1 "memory_operand" "=m") + (match_operand:SI 2 "gpc_reg_operand" "r"))])] + "TARGET_MULTIPLE" + "stmw %2,%1" + [(set_attr "type" "store") + (set_attr "update" "yes") + (set_attr "indexed" "yes")]) + +; The following comment applies to: +; save_gpregs_* +; save_fpregs_* +; restore_gpregs* +; return_and_restore_gpregs* +; return_and_restore_fpregs* +; return_and_restore_fpregs_aix* +; +; The out-of-line save / restore functions expects one input argument. +; Since those are not standard call_insn's, we must avoid using +; MATCH_OPERAND for that argument. That way the register rename +; optimization will not try to rename this register. +; Each pattern is repeated for each possible register number used in +; various ABIs (r11, r1, and for some functions r12) + +(define_insn "*save_gpregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:P 2 "memory_operand" "=m") + (match_operand:P 3 "gpc_reg_operand" "r"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*save_gpregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 12)) + (set (match_operand:P 2 "memory_operand" "=m") + (match_operand:P 3 "gpc_reg_operand" "r"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*save_gpregs_<mode>_r1" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 1)) + (set (match_operand:P 2 "memory_operand" "=m") + (match_operand:P 3 "gpc_reg_operand" "r"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*save_fpregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:DF 2 "memory_operand" "=m") + (match_operand:DF 3 "gpc_reg_operand" "d"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*save_fpregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 12)) + (set (match_operand:DF 2 "memory_operand" "=m") + (match_operand:DF 3 "gpc_reg_operand" "d"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*save_fpregs_<mode>_r1" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 1)) + (set (match_operand:DF 2 "memory_operand" "=m") + (match_operand:DF 3 "gpc_reg_operand" "d"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +; This is to explain that changes to the stack pointer should +; not be moved over loads from or stores to stack memory. +(define_insn "stack_tie" + [(match_parallel 0 "tie_operand" + [(set (mem:BLK (reg 1)) (const_int 0))])] + "" + "" + [(set_attr "length" "0")]) + +; Some 32-bit ABIs do not have a red zone, so the stack deallocation has to +; stay behind all restores from the stack, it cannot be reordered to before +; one. See PR77687. This insn is an add or mr, and a stack_tie on the +; operands of that. +(define_insn "stack_restore_tie" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") + (plus:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") + (match_operand:SI 2 "reg_or_cint_operand" "O,rI"))) + (set (mem:BLK (match_dup 0)) (const_int 0)) + (set (mem:BLK (match_dup 1)) (const_int 0))] + "TARGET_32BIT" + "@ + mr %0,%1 + add%I2 %0,%1,%2" + [(set_attr "type" "*,add")]) + +(define_expand "epilogue" + [(use (const_int 0))] + "" +{ + if (!TARGET_SCHED_PROLOG) + emit_insn (gen_blockage ()); + rs6000_emit_epilogue (FALSE); + DONE; +}) + +; On some processors, doing the mtcrf one CC register at a time is +; faster (like on the 604e). On others, doing them all at once is +; faster; for instance, on the 601 and 750. + +(define_expand "movsi_to_cr_one" + [(set (match_operand:CC 0 "cc_reg_operand" "") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "") + (match_dup 2)] UNSPEC_MOVESI_TO_CR))] + "" + "operands[2] = GEN_INT (1 << (75 - REGNO (operands[0])));") + +(define_insn "*movsi_to_cr" + [(match_parallel 0 "mtcrf_operation" + [(set (match_operand:CC 1 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 2 "gpc_reg_operand" "r") + (match_operand 3 "immediate_operand" "n")] + UNSPEC_MOVESI_TO_CR))])] + "" + "* +{ + int mask = 0; + int i; + for (i = 0; i < XVECLEN (operands[0], 0); i++) + mask |= INTVAL (XVECEXP (SET_SRC (XVECEXP (operands[0], 0, i)), 0, 1)); + operands[4] = GEN_INT (mask); + return \"mtcrf %4,%2\"; +}" + [(set_attr "type" "mtcr")]) + +(define_insn "*mtcrfsi" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand 2 "immediate_operand" "n")] + UNSPEC_MOVESI_TO_CR))] + "GET_CODE (operands[0]) == REG + && CR_REGNO_P (REGNO (operands[0])) + && GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 << (75 - REGNO (operands[0]))" + "mtcrf %R0,%1" + [(set_attr "type" "mtcr")]) + +; The load-multiple instructions have similar properties. +; Note that "load_multiple" is a name known to the machine-independent +; code that actually corresponds to the PowerPC load-string. + +(define_insn "*lmw" + [(match_parallel 0 "lmw_operation" + [(set (match_operand:SI 1 "gpc_reg_operand" "=r") + (match_operand:SI 2 "memory_operand" "m"))])] + "TARGET_MULTIPLE" + "lmw %1,%2" + [(set_attr "type" "load") + (set_attr "update" "yes") + (set_attr "indexed" "yes") + (set_attr "cell_micro" "always")]) + +; FIXME: This would probably be somewhat simpler if the Cygnus sibcall +; stuff was in GCC. Oh, and "any_parallel_operand" is a bit flexible... + +; The following comment applies to: +; save_gpregs_* +; save_fpregs_* +; restore_gpregs* +; return_and_restore_gpregs* +; return_and_restore_fpregs* +; return_and_restore_fpregs_aix* +; +; The out-of-line save / restore functions expects one input argument. +; Since those are not standard call_insn's, we must avoid using +; MATCH_OPERAND for that argument. That way the register rename +; optimization will not try to rename this register. +; Each pattern is repeated for each possible register number used in +; various ABIs (r11, r1, and for some functions r12) + +(define_insn "*restore_gpregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:P 2 "gpc_reg_operand" "=r") + (match_operand:P 3 "memory_operand" "m"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_gpregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 12)) + (set (match_operand:P 2 "gpc_reg_operand" "=r") + (match_operand:P 3 "memory_operand" "m"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_gpregs_<mode>_r1" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 1)) + (set (match_operand:P 2 "gpc_reg_operand" "=r") + (match_operand:P 3 "memory_operand" "m"))])] + "" + "bl %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_gpregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(return) + (clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:P 2 "gpc_reg_operand" "=r") + (match_operand:P 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_gpregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(return) + (clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 12)) + (set (match_operand:P 2 "gpc_reg_operand" "=r") + (match_operand:P 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_gpregs_<mode>_r1" + [(match_parallel 0 "any_parallel_operand" + [(return) + (clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 1)) + (set (match_operand:P 2 "gpc_reg_operand" "=r") + (match_operand:P 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_fpregs_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(return) + (clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:DF 2 "gpc_reg_operand" "=d") + (match_operand:DF 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_fpregs_<mode>_r12" + [(match_parallel 0 "any_parallel_operand" + [(return) + (clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 12)) + (set (match_operand:DF 2 "gpc_reg_operand" "=d") + (match_operand:DF 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_fpregs_<mode>_r1" + [(match_parallel 0 "any_parallel_operand" + [(return) + (clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 1)) + (set (match_operand:DF 2 "gpc_reg_operand" "=d") + (match_operand:DF 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_fpregs_aix_<mode>_r11" + [(match_parallel 0 "any_parallel_operand" + [(return) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:DF 2 "gpc_reg_operand" "=d") + (match_operand:DF 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_fpregs_aix_<mode>_r1" + [(match_parallel 0 "any_parallel_operand" + [(return) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 1)) + (set (match_operand:DF 2 "gpc_reg_operand" "=d") + (match_operand:DF 3 "memory_operand" "m"))])] + "" + "b %1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +; This is used in compiling the unwind routines. +(define_expand "eh_return" + [(use (match_operand 0 "general_operand" ""))] + "" + " +{ + if (TARGET_32BIT) + emit_insn (gen_eh_set_lr_si (operands[0])); + else + emit_insn (gen_eh_set_lr_di (operands[0])); + DONE; +}") + +; We can't expand this before we know where the link register is stored. +(define_insn "eh_set_lr_<mode>" + [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] + UNSPECV_EH_RR) + (clobber (match_scratch:P 1 "=&b"))] + "" + "#") + +(define_split + [(unspec_volatile [(match_operand 0 "register_operand" "")] UNSPECV_EH_RR) + (clobber (match_scratch 1 ""))] + "reload_completed" + [(const_int 0)] + " +{ + rs6000_emit_eh_reg_restore (operands[0], operands[1]); + DONE; +}") + +(define_insn "prefetch" + [(prefetch (match_operand 0 "indexed_or_indirect_address" "a") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "" + "* +{ + if (GET_CODE (operands[0]) == REG) + return INTVAL (operands[1]) ? \"dcbtst 0,%0\" : \"dcbt 0,%0\"; + return INTVAL (operands[1]) ? \"dcbtst %a0\" : \"dcbt %a0\"; +}" + [(set_attr "type" "load")]) + +;; Handle -fsplit-stack. + +(define_expand "split_stack_prologue" + [(const_int 0)] + "" +{ + rs6000_expand_split_stack_prologue (); + DONE; +}) + +(define_expand "load_split_stack_limit" + [(set (match_operand 0) + (unspec [(const_int 0)] UNSPEC_STACK_CHECK))] + "" +{ + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, const0_rtx), + UNSPEC_STACK_CHECK))); + DONE; +}) + +(define_insn "load_split_stack_limit_di" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_STACK_CHECK))] + "TARGET_64BIT" + "ld %0,-0x7040(13)" + [(set_attr "type" "load") + (set_attr "update" "no") + (set_attr "indexed" "no")]) + +(define_insn "load_split_stack_limit_si" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_STACK_CHECK))] + "!TARGET_64BIT" + "lwz %0,-0x7020(2)" + [(set_attr "type" "load") + (set_attr "update" "no") + (set_attr "indexed" "no")]) + +;; A return instruction which the middle-end doesn't see. +;; Use r0 to stop regrename twiddling with lr restore insns emitted +;; after the call to __morestack. +(define_insn "split_stack_return" + [(unspec_volatile [(use (reg:SI 0))] UNSPECV_SPLIT_STACK_RETURN)] + "" + "blr" + [(set_attr "type" "jmpreg")]) + +;; If there are operand 0 bytes available on the stack, jump to +;; operand 1. +(define_expand "split_stack_space_check" + [(set (match_dup 2) + (unspec [(const_int 0)] UNSPEC_STACK_CHECK)) + (set (match_dup 3) + (minus (reg STACK_POINTER_REGNUM) + (match_operand 0))) + (set (match_dup 4) (compare:CCUNS (match_dup 3) (match_dup 2))) + (set (pc) (if_then_else + (geu (match_dup 4) (const_int 0)) + (label_ref (match_operand 1)) + (pc)))] + "" +{ + rs6000_split_stack_space_check (operands[0], operands[1]); + DONE; +}) + +(define_insn "bpermd_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (unspec:P [(match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))] + "TARGET_POPCNTD" + "bpermd %0,%1,%2" + [(set_attr "type" "popcnt")]) + + +;; Builtin fma support. Handle +;; Note that the conditions for expansion are in the FMA_F iterator. + +(define_expand "fma<mode>4" + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") + (fma:FMA_F + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (match_operand:FMA_F 3 "gpc_reg_operand" "")))] + "" + "") + +(define_insn "*fma<mode>4_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>,<Fv2>") + (fma:SFDF + (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv2>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>,0") + (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv2>")))] + "TARGET_<MODE>_FPR" + "@ + fmadd<Ftrad> %0,%1,%2,%3 + xsmadda<Fvsx> %x0,%x1,%x2 + xsmaddm<Fvsx> %x0,%x1,%x3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_<Fs>")]) + +; Altivec only has fma and nfms. +(define_expand "fms<mode>4" + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") + (fma:FMA_F + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "gpc_reg_operand" ""))))] + "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "") + +(define_insn "*fms<mode>4_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>,<Fv2>") + (fma:SFDF + (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>,0") + (neg:SFDF (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv2>"))))] + "TARGET_<MODE>_FPR" + "@ + fmsub<Ftrad> %0,%1,%2,%3 + xsmsuba<Fvsx> %x0,%x1,%x2 + xsmsubm<Fvsx> %x0,%x1,%x3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_<Fs>")]) + +;; If signed zeros are ignored, -(a * b - c) = -a * b + c. +(define_expand "fnma<mode>4" + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "gpc_reg_operand" "")))))] + "!HONOR_SIGNED_ZEROS (<MODE>mode)" + "") + +;; If signed zeros are ignored, -(a * b + c) = -a * b - c. +(define_expand "fnms<mode>4" + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (match_operand:FMA_F 3 "gpc_reg_operand" ""))))] + "!HONOR_SIGNED_ZEROS (<MODE>mode) && !VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "") + +; Not an official optab name, but used from builtins. +(define_expand "nfma<mode>4" + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (match_operand:FMA_F 3 "gpc_reg_operand" ""))))] + "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)" + "") + +(define_insn "*nfma<mode>4_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>,<Fv2>") + (neg:SFDF + (fma:SFDF + (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>,0") + (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv2>"))))] + "TARGET_<MODE>_FPR" + "@ + fnmadd<Ftrad> %0,%1,%2,%3 + xsnmadda<Fvsx> %x0,%x1,%x2 + xsnmaddm<Fvsx> %x0,%x1,%x3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_<Fs>")]) + +; Not an official optab name, but used from builtins. +(define_expand "nfms<mode>4" + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") + (neg:FMA_F + (fma:FMA_F + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "gpc_reg_operand" "")))))] + "" + "") + +(define_insn "*nfmssf4_fpr" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv2>,<Fv2>") + (neg:SFDF + (fma:SFDF + (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv2>,<Fv2>") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv2>,0") + (neg:SFDF + (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv2>")))))] + "TARGET_<MODE>_FPR" + "@ + fnmsub<Ftrad> %0,%1,%2,%3 + xsnmsuba<Fvsx> %x0,%x1,%x2 + xsnmsubm<Fvsx> %x0,%x1,%x3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_<Fs>")]) + + +(define_expand "rs6000_get_timebase" + [(use (match_operand:DI 0 "gpc_reg_operand" ""))] + "" +{ + if (TARGET_POWERPC64) + emit_insn (gen_rs6000_mftb_di (operands[0])); + else + emit_insn (gen_rs6000_get_timebase_ppc32 (operands[0])); + DONE; +}) + +(define_insn "rs6000_get_timebase_ppc32" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_MFTB)) + (clobber (match_scratch:SI 1 "=r")) + (clobber (match_scratch:CC 2 "=y"))] + "!TARGET_POWERPC64" +{ + if (WORDS_BIG_ENDIAN) + if (TARGET_MFCRF) + { + return "mfspr %0,269\;" + "mfspr %L0,268\;" + "mfspr %1,269\;" + "cmpw %2,%0,%1\;" + "bne- %2,$-16"; + } + else + { + return "mftbu %0\;" + "mftb %L0\;" + "mftbu %1\;" + "cmpw %2,%0,%1\;" + "bne- %2,$-16"; + } + else + if (TARGET_MFCRF) + { + return "mfspr %L0,269\;" + "mfspr %0,268\;" + "mfspr %1,269\;" + "cmpw %2,%L0,%1\;" + "bne- %2,$-16"; + } + else + { + return "mftbu %L0\;" + "mftb %0\;" + "mftbu %1\;" + "cmpw %2,%L0,%1\;" + "bne- %2,$-16"; + } +} + [(set_attr "length" "20")]) + +(define_insn "rs6000_mftb_<mode>" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (unspec_volatile:GPR [(const_int 0)] UNSPECV_MFTB))] + "" +{ + if (TARGET_MFCRF) + return "mfspr %0,268"; + else + return "mftb %0"; +}) + + +(define_insn "rs6000_mffs" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "mffs %0") + +(define_insn "rs6000_mtfsf" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i") + (match_operand:DF 1 "gpc_reg_operand" "d")] + UNSPECV_MTFSF)] + "TARGET_HARD_FLOAT && TARGET_FPRS" + "mtfsf %0,%1") + + +;; Power8 fusion support for fusing an addis instruction with a D-form load of +;; a GPR. The addis instruction must be adjacent to the load, and use the same +;; register that is being loaded. The fused ops must be physically adjacent. + +;; There are two parts to addis fusion. The support for fused TOCs occur +;; before register allocation, and is meant to reduce the lifetime for the +;; tempoary register that holds the ADDIS result. On Power8 GPR loads, we try +;; to use the register that is being load. The peephole2 then gathers any +;; other fused possibilities that it can find after register allocation. If +;; power9 fusion is selected, we also fuse floating point loads/stores. + +;; Fused TOC support: Replace simple GPR loads with a fused form. This is done +;; before register allocation, so that we can avoid allocating a temporary base +;; register that won't be used, and that we try to load into base registers, +;; and not register 0. If we can't get a fused GPR load, generate a P9 fusion +;; (addis followed by load) even on power8. + +(define_split + [(set (match_operand:INT1 0 "toc_fusion_or_p9_reg_operand" "") + (match_operand:INT1 1 "toc_fusion_mem_raw" ""))] + "TARGET_TOC_FUSION_INT && can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) (match_dup 2)) + (unspec [(const_int 0)] UNSPEC_FUSION_ADDIS) + (use (match_dup 3)) + (clobber (scratch:DI))])] +{ + operands[2] = fusion_wrap_memory_address (operands[1]); + operands[3] = gen_rtx_REG (Pmode, TOC_REGISTER); +}) + +(define_insn "*toc_fusionload_<mode>" + [(set (match_operand:QHSI 0 "int_reg_operand" "=&b,??r") + (match_operand:QHSI 1 "toc_fusion_mem_wrapped" "wG,wG")) + (unspec [(const_int 0)] UNSPEC_FUSION_ADDIS) + (use (match_operand:DI 2 "base_reg_operand" "r,r")) + (clobber (match_scratch:DI 3 "=X,&b"))] + "TARGET_TOC_FUSION_INT" +{ + if (base_reg_operand (operands[0], <MODE>mode)) + return emit_fusion_gpr_load (operands[0], operands[1]); + + return emit_fusion_p9_load (operands[0], operands[1], operands[3]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + +(define_insn "*toc_fusionload_di" + [(set (match_operand:DI 0 "int_reg_operand" "=&b,??r,?d") + (match_operand:DI 1 "toc_fusion_mem_wrapped" "wG,wG,wG")) + (unspec [(const_int 0)] UNSPEC_FUSION_ADDIS) + (use (match_operand:DI 2 "base_reg_operand" "r,r,r")) + (clobber (match_scratch:DI 3 "=X,&b,&b"))] + "TARGET_TOC_FUSION_INT && TARGET_POWERPC64 + && (MEM_P (operands[1]) || int_reg_operand (operands[0], DImode))" +{ + if (base_reg_operand (operands[0], DImode)) + return emit_fusion_gpr_load (operands[0], operands[1]); + + return emit_fusion_p9_load (operands[0], operands[1], operands[3]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + + +;; Find cases where the addis that feeds into a load instruction is either used +;; once or is the same as the target register, and replace it with the fusion +;; insn + +(define_peephole2 + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:INT1 2 "base_reg_operand" "") + (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] + "TARGET_P8_FUSION + && fusion_gpr_load_p (operands[0], operands[1], operands[2], + operands[3])" + [(const_int 0)] +{ + expand_fusion_gpr_load (operands); + DONE; +}) + +;; Fusion insn, created by the define_peephole2 above (and eventually by +;; reload) + +(define_insn "fusion_gpr_load_<mode>" + [(set (match_operand:INT1 0 "base_reg_operand" "=b") + (unspec:INT1 [(match_operand:INT1 1 "fusion_addis_mem_combo_load" "wF")] + UNSPEC_FUSION_GPR))] + "TARGET_P8_FUSION" +{ + return emit_fusion_gpr_load (operands[0], operands[1]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + + +;; ISA 3.0 (power9) fusion support +;; Merge addis with floating load/store to FPRs (or GPRs). +(define_peephole2 + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:SFDF 2 "toc_fusion_or_p9_reg_operand" "") + (match_operand:SFDF 3 "fusion_offsettable_mem_operand" ""))] + "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0]) + && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])" + [(const_int 0)] +{ + expand_fusion_p9_load (operands); + DONE; +}) + +(define_peephole2 + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:SFDF 2 "offsettable_mem_operand" "") + (match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))] + "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0]) + && fusion_p9_p (operands[0], operands[1], operands[2], operands[3]) + && !rtx_equal_p (operands[0], operands[3])" + [(const_int 0)] +{ + expand_fusion_p9_store (operands); + DONE; +}) + +(define_peephole2 + [(set (match_operand:SDI 0 "int_reg_operand" "") + (match_operand:SDI 1 "upper16_cint_operand" "")) + (set (match_dup 0) + (ior:SDI (match_dup 0) + (match_operand:SDI 2 "u_short_cint_operand" "")))] + "TARGET_P9_FUSION" + [(set (match_dup 0) + (unspec:SDI [(match_dup 1) + (match_dup 2)] UNSPEC_FUSION_P9))]) + +(define_peephole2 + [(set (match_operand:SDI 0 "int_reg_operand" "") + (match_operand:SDI 1 "upper16_cint_operand" "")) + (set (match_operand:SDI 2 "int_reg_operand" "") + (ior:SDI (match_dup 0) + (match_operand:SDI 3 "u_short_cint_operand" "")))] + "TARGET_P9_FUSION + && !rtx_equal_p (operands[0], operands[2]) + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (unspec:SDI [(match_dup 1) + (match_dup 3)] UNSPEC_FUSION_P9))]) + +;; Fusion insns, created by the define_peephole2 above (and eventually by +;; reload). Because we want to eventually have secondary_reload generate +;; these, they have to have a single alternative that gives the register +;; classes. This means we need to have separate gpr/fpr/altivec versions. +(define_insn "fusion_gpr_<P:mode>_<GPR_FUSION:mode>_load" + [(set (match_operand:GPR_FUSION 0 "int_reg_operand" "=r") + (unspec:GPR_FUSION + [(match_operand:GPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")] + UNSPEC_FUSION_P9)) + (clobber (match_operand:P 2 "base_reg_operand" "=b"))] + "TARGET_P9_FUSION" +{ + /* This insn is a secondary reload insn, which cannot have alternatives. + If we are not loading up register 0, use the power8 fusion instead. */ + if (base_reg_operand (operands[0], <GPR_FUSION:MODE>mode)) + return emit_fusion_gpr_load (operands[0], operands[1]); + + return emit_fusion_p9_load (operands[0], operands[1], operands[2]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + +(define_insn "fusion_gpr_<P:mode>_<GPR_FUSION:mode>_store" + [(set (match_operand:GPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF") + (unspec:GPR_FUSION + [(match_operand:GPR_FUSION 1 "int_reg_operand" "r")] + UNSPEC_FUSION_P9)) + (clobber (match_operand:P 2 "base_reg_operand" "=b"))] + "TARGET_P9_FUSION" +{ + return emit_fusion_p9_store (operands[0], operands[1], operands[2]); +} + [(set_attr "type" "store") + (set_attr "length" "8")]) + +(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load" + [(set (match_operand:FPR_FUSION 0 "vsx_register_operand" "=dwb") + (unspec:FPR_FUSION + [(match_operand:FPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")] + UNSPEC_FUSION_P9)) + (clobber (match_operand:P 2 "base_reg_operand" "=b"))] + "TARGET_P9_FUSION" +{ + return emit_fusion_p9_load (operands[0], operands[1], operands[2]); +} + [(set_attr "type" "fpload") + (set_attr "length" "8")]) + +(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store" + [(set (match_operand:FPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF") + (unspec:FPR_FUSION + [(match_operand:FPR_FUSION 1 "vsx_register_operand" "dwb")] + UNSPEC_FUSION_P9)) + (clobber (match_operand:P 2 "base_reg_operand" "=b"))] + "TARGET_P9_FUSION" +{ + return emit_fusion_p9_store (operands[0], operands[1], operands[2]); +} + [(set_attr "type" "fpstore") + (set_attr "length" "8")]) + +(define_insn "*fusion_p9_<mode>_constant" + [(set (match_operand:SDI 0 "int_reg_operand" "=r") + (unspec:SDI [(match_operand:SDI 1 "upper16_cint_operand" "L") + (match_operand:SDI 2 "u_short_cint_operand" "K")] + UNSPEC_FUSION_P9))] + "TARGET_P9_FUSION" +{ + emit_fusion_addis (operands[0], operands[1], "constant", "<MODE>"); + return "ori %0,%0,%2"; +} + [(set_attr "type" "two") + (set_attr "length" "8")]) + + +;; Optimize cases where we want to do a D-form load (register+offset) on +;; ISA 2.06/2.07 to an Altivec register, and the register allocator +;; has generated: +;; LFD 0,32(3) +;; XXLOR 32,0,0 +;; +;; and we change this to: +;; LI 0,32 +;; LXSDX 32,3,9 + +(define_peephole2 + [(match_scratch:DI 0 "b") + (set (match_operand:ALTIVEC_DFORM 1 "fpr_reg_operand") + (match_operand:ALTIVEC_DFORM 2 "simple_offsettable_mem_operand")) + (set (match_operand:ALTIVEC_DFORM 3 "altivec_register_operand") + (match_dup 1))] + "TARGET_VSX && TARGET_POWERPC64 && TARGET_UPPER_REGS_<MODE> + && !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])" + [(set (match_dup 0) + (match_dup 4)) + (set (match_dup 3) + (match_dup 5))] +{ + rtx tmp_reg = operands[0]; + rtx mem = operands[2]; + rtx addr = XEXP (mem, 0); + rtx add_op0, add_op1, new_addr; + + gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); + add_op0 = XEXP (addr, 0); + add_op1 = XEXP (addr, 1); + gcc_assert (REG_P (add_op0)); + new_addr = gen_rtx_PLUS (DImode, add_op0, tmp_reg); + + operands[4] = add_op1; + operands[5] = change_address (mem, <MODE>mode, new_addr); +}) + +;; Optimize cases were want to do a D-form store on ISA 2.06/2.07 from an +;; Altivec register, and the register allocator has generated: +;; XXLOR 0,32,32 +;; STFD 0,32(3) +;; +;; and we change this to: +;; LI 0,32 +;; STXSDX 32,3,9 + +(define_peephole2 + [(match_scratch:DI 0 "b") + (set (match_operand:ALTIVEC_DFORM 1 "fpr_reg_operand") + (match_operand:ALTIVEC_DFORM 2 "altivec_register_operand")) + (set (match_operand:ALTIVEC_DFORM 3 "simple_offsettable_mem_operand") + (match_dup 1))] + "TARGET_VSX && TARGET_POWERPC64 && TARGET_UPPER_REGS_<MODE> + && !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])" + [(set (match_dup 0) + (match_dup 4)) + (set (match_dup 5) + (match_dup 2))] +{ + rtx tmp_reg = operands[0]; + rtx mem = operands[3]; + rtx addr = XEXP (mem, 0); + rtx add_op0, add_op1, new_addr; + + gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); + add_op0 = XEXP (addr, 0); + add_op1 = XEXP (addr, 1); + gcc_assert (REG_P (add_op0)); + new_addr = gen_rtx_PLUS (DImode, add_op0, tmp_reg); + + operands[4] = add_op1; + operands[5] = change_address (mem, <MODE>mode, new_addr); +}) + + +;; Miscellaneous ISA 2.06 (power7) instructions +(define_insn "addg6s" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] + UNSPEC_ADDG6S))] + "TARGET_POPCNTD" + "addg6s %0,%1,%2" + [(set_attr "type" "integer") + (set_attr "length" "4")]) + +(define_insn "cdtbcd" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_CDTBCD))] + "TARGET_POPCNTD" + "cdtbcd %0,%1" + [(set_attr "type" "integer") + (set_attr "length" "4")]) + +(define_insn "cbcdtd" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_CBCDTD))] + "TARGET_POPCNTD" + "cbcdtd %0,%1" + [(set_attr "type" "integer") + (set_attr "length" "4")]) + +(define_int_iterator UNSPEC_DIV_EXTEND [UNSPEC_DIVE + UNSPEC_DIVEO + UNSPEC_DIVEU + UNSPEC_DIVEUO]) + +(define_int_attr div_extend [(UNSPEC_DIVE "e") + (UNSPEC_DIVEO "eo") + (UNSPEC_DIVEU "eu") + (UNSPEC_DIVEUO "euo")]) + +(define_insn "div<div_extend>_<mode>" + [(set (match_operand:GPR 0 "register_operand" "=r") + (unspec:GPR [(match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r")] + UNSPEC_DIV_EXTEND))] + "TARGET_POPCNTD" + "div<wd><div_extend> %0,%1,%2" + [(set_attr "type" "div") + (set_attr "size" "<bits>")]) + + +;; Pack/unpack 128-bit floating point types that take 2 scalar registers + +; Type of the 64-bit part when packing/unpacking 128-bit floating point types +(define_mode_attr FP128_64 [(TF "DF") + (IF "DF") + (TD "DI") + (KF "DI")]) + +(define_expand "unpack<mode>" + [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "") + (unspec:<FP128_64> + [(match_operand:FMOVE128 1 "register_operand" "") + (match_operand:QI 2 "const_0_to_1_operand" "")] + UNSPEC_UNPACK_128BIT))] + "FLOAT128_2REG_P (<MODE>mode)" + "") + +(define_insn_and_split "unpack<mode>_dm" + [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m,d,r,m") + (unspec:<FP128_64> + [(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r") + (match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")] + UNSPEC_UNPACK_128BIT))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && FLOAT128_2REG_P (<MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 3))] +{ + unsigned fp_regno = REGNO (operands[1]) + UINTVAL (operands[2]); + + if (REG_P (operands[0]) && REGNO (operands[0]) == fp_regno) + { + emit_note (NOTE_INSN_DELETED); + DONE; + } + + operands[3] = gen_rtx_REG (<FP128_64>mode, fp_regno); +} + [(set_attr "type" "fp,fpstore,mffgpr,mftgpr,store") + (set_attr "length" "4")]) + +(define_insn_and_split "unpack<mode>_nodm" + [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m") + (unspec:<FP128_64> + [(match_operand:FMOVE128 1 "register_operand" "d,d") + (match_operand:QI 2 "const_0_to_1_operand" "i,i")] + UNSPEC_UNPACK_128BIT))] + "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (<MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 3))] +{ + unsigned fp_regno = REGNO (operands[1]) + UINTVAL (operands[2]); + + if (REG_P (operands[0]) && REGNO (operands[0]) == fp_regno) + { + emit_note (NOTE_INSN_DELETED); + DONE; + } + + operands[3] = gen_rtx_REG (<FP128_64>mode, fp_regno); +} + [(set_attr "type" "fp,fpstore") + (set_attr "length" "4")]) + +(define_insn_and_split "pack<mode>" + [(set (match_operand:FMOVE128 0 "register_operand" "=d,&d") + (unspec:FMOVE128 + [(match_operand:<FP128_64> 1 "register_operand" "0,d") + (match_operand:<FP128_64> 2 "register_operand" "d,d")] + UNSPEC_PACK_128BIT))] + "FLOAT128_2REG_P (<MODE>mode)" + "@ + fmr %L0,%2 + #" + "&& reload_completed && REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (match_dup 2))] +{ + unsigned dest_hi = REGNO (operands[0]); + unsigned dest_lo = dest_hi + 1; + + gcc_assert (!IN_RANGE (REGNO (operands[1]), dest_hi, dest_lo)); + gcc_assert (!IN_RANGE (REGNO (operands[2]), dest_hi, dest_lo)); + + operands[3] = gen_rtx_REG (<FP128_64>mode, dest_hi); + operands[4] = gen_rtx_REG (<FP128_64>mode, dest_lo); +} + [(set_attr "type" "fpsimple,fp") + (set_attr "length" "4,8")]) + +(define_insn "unpack<mode>" + [(set (match_operand:DI 0 "register_operand" "=d,d") + (unspec:DI [(match_operand:FMOVE128_VSX 1 "register_operand" "0,wa") + (match_operand:QI 2 "const_0_to_1_operand" "O,i")] + UNSPEC_UNPACK_128BIT))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0) + return ASM_COMMENT_START " xxpermdi to same register"; + + operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3); + return "xxpermdi %x0,%x1,%x1,%3"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "pack<mode>" + [(set (match_operand:FMOVE128_VSX 0 "register_operand" "=wa") + (unspec:FMOVE128_VSX + [(match_operand:DI 1 "register_operand" "d") + (match_operand:DI 2 "register_operand" "d")] + UNSPEC_PACK_128BIT))] + "TARGET_VSX" + "xxpermdi %x0,%x1,%x2,0" + [(set_attr "type" "vecperm")]) + + + +;; ISA 2.08 IEEE 128-bit floating point support. + +(define_insn "add<mode>3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (plus:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsaddqp %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "sub<mode>3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (minus:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xssubqp %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "mul<mode>3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (mult:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsmulqp %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "div<mode>3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (div:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsdivqp %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "128")]) + +(define_insn "sqrt<mode>2" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (sqrt:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xssqrtqp %0,%1" + [(set_attr "type" "vecdiv") + (set_attr "size" "128")]) + +(define_expand "copysign<mode>3" + [(use (match_operand:IEEE128 0 "altivec_register_operand")) + (use (match_operand:IEEE128 1 "altivec_register_operand")) + (use (match_operand:IEEE128 2 "altivec_register_operand"))] + "FLOAT128_IEEE_P (<MODE>mode)" +{ + if (TARGET_FLOAT128_HW) + emit_insn (gen_copysign<mode>3_hard (operands[0], operands[1], + operands[2])); + else + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_copysign<mode>3_soft (operands[0], operands[1], + operands[2], tmp)); + } + DONE; +}) + +(define_insn "copysign<mode>3_hard" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")] + UNSPEC_COPYSIGN))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscpsgnqp %0,%2,%1" + [(set_attr "type" "vecmove") + (set_attr "size" "128")]) + +(define_insn "copysign<mode>3_soft" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "+v")] + UNSPEC_COPYSIGN))] + "!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscpsgndp %x3,%x2,%x1\;xxpermdi %x0,%x3,%x1,1" + [(set_attr "type" "veccomplex") + (set_attr "length" "8")]) + +(define_insn "neg<mode>2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsnegqp %0,%1" + [(set_attr "type" "vecmove") + (set_attr "size" "128")]) + + +(define_insn "abs<mode>2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (abs:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsabsqp %0,%1" + [(set_attr "type" "vecmove") + (set_attr "size" "128")]) + + +(define_insn "*nabs<mode>2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (abs:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v"))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsnabsqp %0,%1" + [(set_attr "type" "vecmove") + (set_attr "size" "128")]) + +;; Initially don't worry about doing fusion +(define_insn "*fma<mode>4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "0")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsmaddqp %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "*fms<mode>4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (neg:IEEE128 + (match_operand:IEEE128 3 "altivec_register_operand" "0"))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsmsubqp %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "*nfma<mode>4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "0"))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsnmaddqp %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "*nfms<mode>4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (neg:IEEE128 + (match_operand:IEEE128 3 "altivec_register_operand" "0")))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xsnmsubqp %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "extend<SFDF:mode><IEEE128:mode>2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (float_extend:IEEE128 + (match_operand:SFDF 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<IEEE128:MODE>mode)" + "xscvdpqp %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +;; Conversion between KFmode and TFmode if TFmode is ieee 128-bit floating +;; point is a simple copy. +(define_insn_and_split "extendkftf2" + [(set (match_operand:TF 0 "vsx_register_operand" "=wa,?wa") + (float_extend:TF (match_operand:KF 1 "vsx_register_operand" "0,wa")))] + "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD" + "@ + # + xxlor %x0,%x1,%x1" + "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])" + [(const_int 0)] +{ + emit_note (NOTE_INSN_DELETED); + DONE; +} + [(set_attr "type" "*,veclogical") + (set_attr "length" "0,4")]) + +(define_insn_and_split "trunctfkf2" + [(set (match_operand:KF 0 "vsx_register_operand" "=wa,?wa") + (float_extend:KF (match_operand:TF 1 "vsx_register_operand" "0,wa")))] + "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD" + "@ + # + xxlor %x0,%x1,%x1" + "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])" + [(const_int 0)] +{ + emit_note (NOTE_INSN_DELETED); + DONE; +} + [(set_attr "type" "*,veclogical") + (set_attr "length" "0,4")]) + +(define_insn "trunc<mode>df2_hw" + [(set (match_operand:DF 0 "altivec_register_operand" "=v") + (float_truncate:DF + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvqpdp %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +;; There is no KFmode -> SFmode instruction. Preserve the accuracy by doing +;; the KFmode -> DFmode conversion using round to odd rather than the normal +;; conversion +(define_insn_and_split "trunc<mode>sf2_hw" + [(set (match_operand:SF 0 "vsx_register_operand" "=wy") + (float_truncate:SF + (match_operand:IEEE128 1 "altivec_register_operand" "v"))) + (clobber (match_scratch:DF 2 "=v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(set (match_dup 2) + (unspec:DF [(match_dup 1)] UNSPEC_ROUND_TO_ODD)) + (set (match_dup 0) + (float_truncate:SF (match_dup 2)))] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DFmode); +} + [(set_attr "type" "vecfloat") + (set_attr "length" "8")]) + +;; Conversion between IEEE 128-bit and integer types +(define_insn "fix_<mode>di2_hw" + [(set (match_operand:DI 0 "altivec_register_operand" "=v") + (fix:DI (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvqpsdz %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "fixuns_<mode>di2_hw" + [(set (match_operand:DI 0 "altivec_register_operand" "=v") + (unsigned_fix:DI (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvqpudz %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "fix_<mode>si2_hw" + [(set (match_operand:SI 0 "altivec_register_operand" "=v") + (fix:SI (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvqpswz %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "fixuns_<mode>si2_hw" + [(set (match_operand:SI 0 "altivec_register_operand" "=v") + (unsigned_fix:SI (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvqpuwz %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +;; Combiner pattern to prevent moving the result of converting an IEEE 128-bit +;; floating point value to 32-bit integer to GPR in order to save it. +(define_insn_and_split "*fix<uns>_<mode>_mem" + [(set (match_operand:SI 0 "memory_operand" "=Z") + (any_fix:SI (match_operand:IEEE128 1 "altivec_register_operand" "v"))) + (clobber (match_scratch:SI 2 "=v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 2) + (any_fix:SI (match_dup 1))) + (set (match_dup 0) + (match_dup 2))]) + +(define_insn "float_<mode>di2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (float:IEEE128 (match_operand:DI 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvsdqp %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn_and_split "float_<mode>si2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (float:IEEE128 (match_operand:SI 1 "nonimmediate_operand" "vrZ"))) + (clobber (match_scratch:DI 2 "=v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(set (match_dup 2) + (sign_extend:DI (match_dup 1))) + (set (match_dup 0) + (float:IEEE128 (match_dup 2)))] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); +}) + +(define_insn_and_split "float<QHI:mode><IEEE128:mode>2" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v,v,v") + (float:IEEE128 (match_operand:QHI 1 "nonimmediate_operand" "v,r,Z"))) + (clobber (match_scratch:DI 2 "=X,r,X"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<IEEE128:MODE>mode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx dest_di = gen_rtx_REG (DImode, REGNO (dest)); + + if (altivec_register_operand (src, <QHI:MODE>mode)) + emit_insn (gen_extend<QHI:mode>di2 (dest_di, src)); + else if (int_reg_operand (src, <QHI:MODE>mode)) + { + rtx ext_di = operands[2]; + emit_insn (gen_extend<QHI:mode>di2 (ext_di, src)); + emit_move_insn (dest_di, ext_di); + } + else if (MEM_P (src)) + { + rtx dest_qhi = gen_rtx_REG (<QHI:MODE>mode, REGNO (dest)); + emit_move_insn (dest_qhi, src); + emit_insn (gen_extend<QHI:mode>di2 (dest_di, dest_qhi)); + } + else + gcc_unreachable (); + + emit_insn (gen_float_<IEEE128:mode>di2_hw (dest, dest_di)); + DONE; +} + [(set_attr "length" "8,12,12") + (set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn "floatuns_<mode>di2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unsigned_float:IEEE128 + (match_operand:DI 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvudqp %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +(define_insn_and_split "floatuns_<mode>si2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unsigned_float:IEEE128 + (match_operand:SI 1 "nonimmediate_operand" "vrZ"))) + (clobber (match_scratch:DI 2 "=v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(set (match_dup 2) + (zero_extend:DI (match_dup 1))) + (set (match_dup 0) + (float:IEEE128 (match_dup 2)))] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); +}) + +(define_insn_and_split "floatuns<QHI:mode><IEEE128:mode>2" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v,v,v") + (unsigned_float:IEEE128 + (match_operand:QHI 1 "nonimmediate_operand" "v,r,Z"))) + (clobber (match_scratch:DI 2 "=X,r,X"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<IEEE128:MODE>mode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx dest_di = gen_rtx_REG (DImode, REGNO (dest)); + + if (altivec_register_operand (src, <QHI:MODE>mode) || MEM_P (src)) + emit_insn (gen_zero_extend<QHI:mode>di2 (dest_di, src)); + else if (int_reg_operand (src, <QHI:MODE>mode)) + { + rtx ext_di = operands[2]; + emit_insn (gen_zero_extend<QHI:mode>di2 (ext_di, src)); + emit_move_insn (dest_di, ext_di); + } + else + gcc_unreachable (); + + emit_insn (gen_floatuns_<IEEE128:mode>di2_hw (dest, dest_di)); + DONE; +} + [(set_attr "length" "8,12,8") + (set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +;; IEEE 128-bit instructions with round to odd semantics +(define_insn "*trunc<mode>df2_odd" + [(set (match_operand:DF 0 "vsx_register_operand" "=v") + (unspec:DF [(match_operand:IEEE128 1 "altivec_register_operand" "v")] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscvqpdpo %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "size" "128")]) + +;; IEEE 128-bit comparisons +(define_insn "*cmp<mode>_hw" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscmpuqp %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "size" "128")]) + + + +(include "sync.md") +(include "vector.md") +(include "vsx.md") +(include "altivec.md") +(include "spe.md") +(include "dfp.md") +(include "paired.md") +(include "crypto.md") +(include "htm.md") diff --git a/gcc/config/powerpcspe/powerpcspe.opt b/gcc/config/powerpcspe/powerpcspe.opt new file mode 100644 index 000000000000..dc2fb11d1757 --- /dev/null +++ b/gcc/config/powerpcspe/powerpcspe.opt @@ -0,0 +1,702 @@ +; Options for the rs6000 port of the compiler +; +; Copyright (C) 2005-2017 Free Software Foundation, Inc. +; Contributed by Aldy Hernandez <aldy@quesejoda.com>. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +HeaderInclude +config/powerpcspe/powerpcspe-opts.h + +;; ISA flag bits (on/off) +Variable +HOST_WIDE_INT rs6000_isa_flags = TARGET_DEFAULT + +TargetSave +HOST_WIDE_INT x_rs6000_isa_flags + +;; Miscellaneous flag bits that were set explicitly by the user +Variable +HOST_WIDE_INT rs6000_isa_flags_explicit + +TargetSave +HOST_WIDE_INT x_rs6000_isa_flags_explicit + +;; Current processor +TargetVariable +enum processor_type rs6000_cpu = PROCESSOR_PPC603 + +;; Always emit branch hint bits. +TargetVariable +unsigned char rs6000_always_hint + +;; Schedule instructions for group formation. +TargetVariable +unsigned char rs6000_sched_groups + +;; Align branch targets. +TargetVariable +unsigned char rs6000_align_branch_targets + +;; Support for -msched-costly-dep option. +TargetVariable +enum rs6000_dependence_cost rs6000_sched_costly_dep = no_dep_costly + +;; Support for -minsert-sched-nops option. +TargetVariable +enum rs6000_nop_insertion rs6000_sched_insert_nops = sched_finish_none + +;; Non-zero to allow overriding loop alignment. +TargetVariable +unsigned char can_override_loop_align + +;; Which small data model to use (for System V targets only) +TargetVariable +enum rs6000_sdata_type rs6000_sdata = SDATA_DATA + +;; Bit size of immediate TLS offsets and string from which it is decoded. +TargetVariable +int rs6000_tls_size = 32 + +;; ABI enumeration available for subtarget to use. +TargetVariable +enum rs6000_abi rs6000_current_abi = ABI_NONE + +;; Type of traceback to use. +TargetVariable +enum rs6000_traceback_type rs6000_traceback = traceback_default + +;; Control alignment for fields within structures. +TargetVariable +unsigned char rs6000_alignment_flags + +;; Code model for 64-bit linux. +TargetVariable +enum rs6000_cmodel rs6000_current_cmodel = CMODEL_SMALL + +;; What type of reciprocal estimation instructions to generate +TargetVariable +unsigned int rs6000_recip_control + +;; Mask of what builtin functions are allowed +TargetVariable +HOST_WIDE_INT rs6000_builtin_mask + +;; Debug flags +TargetVariable +unsigned int rs6000_debug + +;; This option existed in the past, but now is always on. +mpowerpc +Target RejectNegative Undocumented Ignore + +mpowerpc64 +Target Report Mask(POWERPC64) Var(rs6000_isa_flags) +Use PowerPC-64 instruction set. + +mpowerpc-gpopt +Target Report Mask(PPC_GPOPT) Var(rs6000_isa_flags) +Use PowerPC General Purpose group optional instructions. + +mpowerpc-gfxopt +Target Report Mask(PPC_GFXOPT) Var(rs6000_isa_flags) +Use PowerPC Graphics group optional instructions. + +mmfcrf +Target Report Mask(MFCRF) Var(rs6000_isa_flags) +Use PowerPC V2.01 single field mfcr instruction. + +mpopcntb +Target Report Mask(POPCNTB) Var(rs6000_isa_flags) +Use PowerPC V2.02 popcntb instruction. + +mfprnd +Target Report Mask(FPRND) Var(rs6000_isa_flags) +Use PowerPC V2.02 floating point rounding instructions. + +mcmpb +Target Report Mask(CMPB) Var(rs6000_isa_flags) +Use PowerPC V2.05 compare bytes instruction. + +mmfpgpr +Target Report Mask(MFPGPR) Var(rs6000_isa_flags) +Use extended PowerPC V2.05 move floating point to/from GPR instructions. + +maltivec +Target Report Mask(ALTIVEC) Var(rs6000_isa_flags) +Use AltiVec instructions. + +maltivec=le +Target Report RejectNegative Var(rs6000_altivec_element_order, 1) Save +Generate AltiVec instructions using little-endian element order. + +maltivec=be +Target Report RejectNegative Var(rs6000_altivec_element_order, 2) +Generate AltiVec instructions using big-endian element order. + +mhard-dfp +Target Report Mask(DFP) Var(rs6000_isa_flags) +Use decimal floating point instructions. + +mmulhw +Target Report Mask(MULHW) Var(rs6000_isa_flags) +Use 4xx half-word multiply instructions. + +mdlmzb +Target Report Mask(DLMZB) Var(rs6000_isa_flags) +Use 4xx string-search dlmzb instruction. + +mmultiple +Target Report Mask(MULTIPLE) Var(rs6000_isa_flags) +Generate load/store multiple instructions. + +mstring +Target Report Mask(STRING) Var(rs6000_isa_flags) +Generate string instructions for block moves. + +msoft-float +Target Report RejectNegative Mask(SOFT_FLOAT) Var(rs6000_isa_flags) +Do not use hardware floating point. + +mhard-float +Target Report RejectNegative InverseMask(SOFT_FLOAT, HARD_FLOAT) Var(rs6000_isa_flags) +Use hardware floating point. + +mpopcntd +Target Report Mask(POPCNTD) Var(rs6000_isa_flags) +Use PowerPC V2.06 popcntd instruction. + +mfriz +Target Report Var(TARGET_FRIZ) Init(-1) Save +Under -ffast-math, generate a FRIZ instruction for (double)(long long) conversions. + +mveclibabi= +Target RejectNegative Joined Var(rs6000_veclibabi_name) +Vector library ABI to use. + +mvsx +Target Report Mask(VSX) Var(rs6000_isa_flags) +Use vector/scalar (VSX) instructions. + +mvsx-scalar-float +Target Undocumented Report Var(TARGET_VSX_SCALAR_FLOAT) Init(1) +; If -mpower8-vector, use VSX arithmetic instructions for SFmode (on by default) + +mvsx-scalar-double +Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1) +; If -mvsx, use VSX arithmetic instructions for DFmode (on by default) + +mvsx-scalar-memory +Target Undocumented Report Alias(mupper-regs-df) + +mvsx-align-128 +Target Undocumented Report Var(TARGET_VSX_ALIGN_128) Save +; If -mvsx, set alignment to 128 bits instead of 32/64 + +mallow-movmisalign +Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) Save +; Allow the movmisalign in DF/DI vectors + +mefficient-unaligned-vsx +Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags) +; Consider unaligned VSX vector and fp accesses to be efficient + +mallow-df-permute +Target Undocumented Var(TARGET_ALLOW_DF_PERMUTE) Save +; Allow permutation of DF/DI vectors + +msched-groups +Target Undocumented Report Var(TARGET_SCHED_GROUPS) Init(-1) Save +; Explicitly set rs6000_sched_groups + +malways-hint +Target Undocumented Report Var(TARGET_ALWAYS_HINT) Init(-1) Save +; Explicitly set rs6000_always_hint + +malign-branch-targets +Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1) Save +; Explicitly set rs6000_align_branch_targets + +mvectorize-builtins +Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1) Save +; Explicitly control whether we vectorize the builtins or not. + +mno-update +Target Report RejectNegative Mask(NO_UPDATE) Var(rs6000_isa_flags) +Do not generate load/store with update instructions. + +mupdate +Target Report RejectNegative InverseMask(NO_UPDATE, UPDATE) Var(rs6000_isa_flags) +Generate load/store with update instructions. + +msingle-pic-base +Target Report Var(TARGET_SINGLE_PIC_BASE) Init(0) +Do not load the PIC register in function prologues. + +mavoid-indexed-addresses +Target Report Var(TARGET_AVOID_XFORM) Init(-1) Save +Avoid generation of indexed load/store instructions when possible. + +mtls-markers +Target Report Var(tls_markers) Init(1) Save +Mark __tls_get_addr calls with argument info. + +msched-epilog +Target Undocumented Var(TARGET_SCHED_PROLOG) Init(1) Save + +msched-prolog +Target Report Var(TARGET_SCHED_PROLOG) Save +Schedule the start and end of the procedure. + +maix-struct-return +Target Report RejectNegative Var(aix_struct_return) Save +Return all structures in memory (AIX default). + +msvr4-struct-return +Target Report RejectNegative Var(aix_struct_return,0) Save +Return small structures in registers (SVR4 default). + +mxl-compat +Target Report Var(TARGET_XL_COMPAT) Save +Conform more closely to IBM XLC semantics. + +mrecip +Target Report +Generate software reciprocal divide and square root for better throughput. + +mrecip= +Target Report RejectNegative Joined Var(rs6000_recip_name) +Generate software reciprocal divide and square root for better throughput. + +mrecip-precision +Target Report Mask(RECIP_PRECISION) Var(rs6000_isa_flags) +Assume that the reciprocal estimate instructions provide more accuracy. + +mno-fp-in-toc +Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC) Save +Do not place floating point constants in TOC. + +mfp-in-toc +Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC,0) Save +Place floating point constants in TOC. + +mno-sum-in-toc +Target RejectNegative Var(TARGET_NO_SUM_IN_TOC) Save +Do not place symbol+offset constants in TOC. + +msum-in-toc +Target RejectNegative Var(TARGET_NO_SUM_IN_TOC,0) Save +Place symbol+offset constants in TOC. + +; Output only one TOC entry per module. Normally linking fails if +; there are more than 16K unique variables/constants in an executable. With +; this option, linking fails only if there are more than 16K modules, or +; if there are more than 16K unique variables/constant in a single module. +; +; This is at the cost of having 2 extra loads and one extra store per +; function, and one less allocable register. +mminimal-toc +Target Report Mask(MINIMAL_TOC) Var(rs6000_isa_flags) +Use only one TOC entry per procedure. + +mfull-toc +Target Report +Put everything in the regular TOC. + +mvrsave +Target Report Var(TARGET_ALTIVEC_VRSAVE) Save +Generate VRSAVE instructions when generating AltiVec code. + +mvrsave=no +Target RejectNegative Alias(mvrsave) NegativeAlias +Deprecated option. Use -mno-vrsave instead. + +mvrsave=yes +Target RejectNegative Alias(mvrsave) +Deprecated option. Use -mvrsave instead. + +mblock-move-inline-limit= +Target Report Var(rs6000_block_move_inline_limit) Init(0) RejectNegative Joined UInteger Save +Specify how many bytes should be moved inline before calling out to memcpy/memmove. + +mblock-compare-inline-limit= +Target Report Var(rs6000_block_compare_inline_limit) Init(5) RejectNegative Joined UInteger Save +Specify the maximum number pairs of load instructions that should be generated inline for the compare. If the number needed exceeds the limit, a call to memcmp will be generated instead. + +mstring-compare-inline-limit= +Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save +Specify the maximum number pairs of load instructions that should be generated inline for the compare. If the number needed exceeds the limit, a call to strncmp will be generated instead. + +misel +Target Report Mask(ISEL) Var(rs6000_isa_flags) +Generate isel instructions. + +misel=no +Target RejectNegative Alias(misel) NegativeAlias +Deprecated option. Use -mno-isel instead. + +misel=yes +Target RejectNegative Alias(misel) +Deprecated option. Use -misel instead. + +mspe +Target Var(rs6000_spe) Save +Generate SPE SIMD instructions on E500. + +mpaired +Target Var(rs6000_paired_float) Save +Generate PPC750CL paired-single instructions. + +mspe=no +Target RejectNegative Alias(mspe) NegativeAlias +Deprecated option. Use -mno-spe instead. + +mspe=yes +Target RejectNegative Alias(mspe) +Deprecated option. Use -mspe instead. + +mdebug= +Target RejectNegative Joined +-mdebug= Enable debug output. + +mabi=altivec +Target RejectNegative Var(rs6000_altivec_abi) Save +Use the AltiVec ABI extensions. + +mabi=no-altivec +Target RejectNegative Var(rs6000_altivec_abi, 0) +Do not use the AltiVec ABI extensions. + +mabi=spe +Target RejectNegative Var(rs6000_spe_abi) Save +Use the SPE ABI extensions. + +mabi=no-spe +Target RejectNegative Var(rs6000_spe_abi, 0) +Do not use the SPE ABI extensions. + +mabi=elfv1 +Target RejectNegative Var(rs6000_elf_abi, 1) Save +Use the ELFv1 ABI. + +mabi=elfv2 +Target RejectNegative Var(rs6000_elf_abi, 2) +Use the ELFv2 ABI. + +; These are here for testing during development only, do not document +; in the manual please. + +; If we want Darwin's struct-by-value-in-regs ABI. +mabi=d64 +Target RejectNegative Undocumented Warn(using darwin64 ABI) Var(rs6000_darwin64_abi) Save + +mabi=d32 +Target RejectNegative Undocumented Warn(using old darwin ABI) Var(rs6000_darwin64_abi, 0) + +mabi=ieeelongdouble +Target RejectNegative Undocumented Warn(using IEEE extended precision long double) Var(rs6000_ieeequad) Save + +mabi=ibmlongdouble +Target RejectNegative Undocumented Warn(using IBM extended precision long double) Var(rs6000_ieeequad, 0) + +mcpu= +Target RejectNegative Joined Var(rs6000_cpu_index) Init(-1) Enum(rs6000_cpu_opt_value) Save +-mcpu= Use features of and schedule code for given CPU. + +mtune= +Target RejectNegative Joined Var(rs6000_tune_index) Init(-1) Enum(rs6000_cpu_opt_value) Save +-mtune= Schedule code for given CPU. + +mtraceback= +Target RejectNegative Joined Enum(rs6000_traceback_type) Var(rs6000_traceback) +-mtraceback= Select full, part, or no traceback table. + +Enum +Name(rs6000_traceback_type) Type(enum rs6000_traceback_type) + +EnumValue +Enum(rs6000_traceback_type) String(full) Value(traceback_full) + +EnumValue +Enum(rs6000_traceback_type) String(part) Value(traceback_part) + +EnumValue +Enum(rs6000_traceback_type) String(no) Value(traceback_none) + +mlongcall +Target Report Var(rs6000_default_long_calls) Save +Avoid all range limits on call instructions. + +mgen-cell-microcode +Target Report Var(rs6000_gen_cell_microcode) Init(-1) Save +Generate Cell microcode. + +mwarn-cell-microcode +Target Var(rs6000_warn_cell_microcode) Init(0) Warning Save +Warn when a Cell microcoded instruction is emitted. + +mwarn-altivec-long +Target Var(rs6000_warn_altivec_long) Init(1) Save +Warn about deprecated 'vector long ...' AltiVec type usage. + +mfloat-gprs= +Target RejectNegative Joined Enum(rs6000_float_gprs) Var(rs6000_float_gprs) Save +-mfloat-gprs= Select GPR floating point method. + +Enum +Name(rs6000_float_gprs) Type(unsigned char) +Valid arguments to -mfloat-gprs=: + +EnumValue +Enum(rs6000_float_gprs) String(yes) Value(1) + +EnumValue +Enum(rs6000_float_gprs) String(single) Value(1) + +EnumValue +Enum(rs6000_float_gprs) String(double) Value(2) + +EnumValue +Enum(rs6000_float_gprs) String(no) Value(0) + +mlong-double- +Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save +-mlong-double-<n> Specify size of long double (64 or 128 bits). + +mlra +Target Report Mask(LRA) Var(rs6000_isa_flags) +Enable Local Register Allocation. + +msched-costly-dep= +Target RejectNegative Joined Var(rs6000_sched_costly_dep_str) +Determine which dependences between insns are considered costly. + +minsert-sched-nops= +Target RejectNegative Joined Var(rs6000_sched_insert_nops_str) +Specify which post scheduling nop insertion scheme to apply. + +malign- +Target RejectNegative Joined Enum(rs6000_alignment_flags) Var(rs6000_alignment_flags) +Specify alignment of structure fields default/natural. + +Enum +Name(rs6000_alignment_flags) Type(unsigned char) +Valid arguments to -malign-: + +EnumValue +Enum(rs6000_alignment_flags) String(power) Value(MASK_ALIGN_POWER) + +EnumValue +Enum(rs6000_alignment_flags) String(natural) Value(MASK_ALIGN_NATURAL) + +mprioritize-restricted-insns= +Target RejectNegative Joined UInteger Var(rs6000_sched_restricted_insns_priority) Save +Specify scheduling priority for dispatch slot restricted insns. + +msingle-float +Target RejectNegative Var(rs6000_single_float) Save +Single-precision floating point unit. + +mdouble-float +Target RejectNegative Var(rs6000_double_float) Save +Double-precision floating point unit. + +msimple-fpu +Target RejectNegative Var(rs6000_simple_fpu) Save +Floating point unit does not support divide & sqrt. + +mfpu= +Target RejectNegative Joined Enum(fpu_type_t) Var(rs6000_fpu_type) Init(FPU_NONE) +-mfpu= Specify FP (sp, dp, sp-lite, dp-lite) (implies -mxilinx-fpu). + +Enum +Name(fpu_type_t) Type(enum fpu_type_t) + +EnumValue +Enum(fpu_type_t) String(none) Value(FPU_NONE) + +EnumValue +Enum(fpu_type_t) String(sp_lite) Value(FPU_SF_LITE) + +EnumValue +Enum(fpu_type_t) String(dp_lite) Value(FPU_DF_LITE) + +EnumValue +Enum(fpu_type_t) String(sp_full) Value(FPU_SF_FULL) + +EnumValue +Enum(fpu_type_t) String(dp_full) Value(FPU_DF_FULL) + +mxilinx-fpu +Target Var(rs6000_xilinx_fpu) Save +Specify Xilinx FPU. + +mpointers-to-nested-functions +Target Report Var(TARGET_POINTERS_TO_NESTED_FUNCTIONS) Init(1) Save +Use r11 to hold the static link in calls to functions via pointers. + +msave-toc-indirect +Target Report Mask(SAVE_TOC_INDIRECT) Var(rs6000_isa_flags) +Save the TOC in the prologue for indirect calls rather than inline. + +mvsx-timode +Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags) +Allow 128-bit integers in VSX registers. + +mpower8-fusion +Target Report Mask(P8_FUSION) Var(rs6000_isa_flags) +Fuse certain integer operations together for better performance on power8. + +mpower8-fusion-sign +Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags) +Allow sign extension in fusion operations. + +mpower8-vector +Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags) +Use vector and scalar instructions added in ISA 2.07. + +mcrypto +Target Report Mask(CRYPTO) Var(rs6000_isa_flags) +Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions. + +mdirect-move +Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags) +Use ISA 2.07 direct move between GPR & VSX register instructions. + +mhtm +Target Report Mask(HTM) Var(rs6000_isa_flags) +Use ISA 2.07 transactional memory (HTM) instructions. + +mquad-memory +Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags) +Generate the quad word memory instructions (lq/stq). + +mquad-memory-atomic +Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags) +Generate the quad word memory atomic instructions (lqarx/stqcx). + +mcompat-align-parm +Target Report Var(rs6000_compat_align_parm) Init(0) Save +Generate aggregate parameter passing code with at most 64-bit alignment. + +mupper-regs-df +Target Report Mask(UPPER_REGS_DF) Var(rs6000_isa_flags) +Allow double variables in upper registers with -mcpu=power7 or -mvsx. + +mupper-regs-sf +Target Report Mask(UPPER_REGS_SF) Var(rs6000_isa_flags) +Allow float variables in upper registers with -mcpu=power8 or -mpower8-vector. + +mupper-regs +Target Report Var(TARGET_UPPER_REGS) Init(-1) Save +Allow float/double variables in upper registers if cpu allows it. + +mupper-regs-di +Target Report Mask(UPPER_REGS_DI) Var(rs6000_isa_flags) +Allow 64-bit integer variables in upper registers with -mcpu=power7 or -mvsx. + +moptimize-swaps +Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save +Analyze and remove doubleword swaps from VSX computations. + +mpower9-fusion +Target Undocumented Report Mask(P9_FUSION) Var(rs6000_isa_flags) +Fuse certain operations together for better performance on power9. + +mpower9-misc +Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags) +Use certain scalar instructions added in ISA 3.0. + +mpower9-vector +Target Undocumented Report Mask(P9_VECTOR) Var(rs6000_isa_flags) +Use vector instructions added in ISA 3.0. + +mpower9-dform-scalar +Target Undocumented Mask(P9_DFORM_SCALAR) Var(rs6000_isa_flags) +Use scalar register+offset memory instructions added in ISA 3.0. + +mpower9-dform-vector +Target Undocumented Mask(P9_DFORM_VECTOR) Var(rs6000_isa_flags) +Use vector register+offset memory instructions added in ISA 3.0. + +mpower9-dform +Target Undocumented Report Var(TARGET_P9_DFORM_BOTH) Init(-1) Save +Use register+offset memory instructions added in ISA 3.0. + +mpower9-minmax +Target Undocumented Mask(P9_MINMAX) Var(rs6000_isa_flags) +Use the new min/max instructions defined in ISA 3.0. + +mtoc-fusion +Target Undocumented Mask(TOC_FUSION) Var(rs6000_isa_flags) +Fuse medium/large code model toc references with the memory instruction. + +mmodulo +Target Undocumented Report Mask(MODULO) Var(rs6000_isa_flags) +Generate the integer modulo instructions. + +; We want to enable the internal support for the IEEE 128-bit floating point +; type without necessarily enabling the __float128 keyword. This is to allow +; Boost and other libraries that know about __float128 to work until the +; official library support is finished. +mfloat128-type +Target Undocumented Mask(FLOAT128_TYPE) Var(rs6000_isa_flags) +Allow the IEEE 128-bit types without requiring the __float128 keyword. + +mfloat128 +Target Report Mask(FLOAT128_KEYWORD) Var(rs6000_isa_flags) +Enable IEEE 128-bit floating point via the __float128 keyword. + +mfloat128-hardware +Target Report Mask(FLOAT128_HW) Var(rs6000_isa_flags) +Enable using IEEE 128-bit floating point instructions. + +mfloat128-convert +Target Undocumented Mask(FLOAT128_CVT) Var(rs6000_isa_flags) +Enable default conversions between __float128 & long double. + +mvsx-small-integer +Target Report Mask(VSX_SMALL_INTEGER) Var(rs6000_isa_flags) +Enable small integers to be in VSX registers. + +mstack-protector-guard= +Target RejectNegative Joined Enum(stack_protector_guard) Var(rs6000_stack_protector_guard) Init(SSP_TLS) +Use given stack-protector guard. + +Enum +Name(stack_protector_guard) Type(enum stack_protector_guard) +Valid arguments to -mstack-protector-guard=: + +EnumValue +Enum(stack_protector_guard) String(tls) Value(SSP_TLS) + +EnumValue +Enum(stack_protector_guard) String(global) Value(SSP_GLOBAL) + +mstack-protector-guard-reg= +Target RejectNegative Joined Var(rs6000_stack_protector_guard_reg_str) +Use the given base register for addressing the stack-protector guard. + +TargetVariable +int rs6000_stack_protector_guard_reg = 0 + +mstack-protector-guard-offset= +Target RejectNegative Joined Integer Var(rs6000_stack_protector_guard_offset_str) +Use the given offset for addressing the stack-protector guard. + +TargetVariable +long rs6000_stack_protector_guard_offset = 0 diff --git a/gcc/config/powerpcspe/ppc-asm.h b/gcc/config/powerpcspe/ppc-asm.h new file mode 100644 index 000000000000..33925e558b03 --- /dev/null +++ b/gcc/config/powerpcspe/ppc-asm.h @@ -0,0 +1,381 @@ +/* PowerPC asm definitions for GNU C. + +Copyright (C) 2002-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Under winnt, 1) gas supports the following as names and 2) in particular + defining "toc" breaks the FUNC_START macro as ".toc" becomes ".2" */ + +#define r0 0 +#define sp 1 +#define toc 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + +#define f0 0 +#define f1 1 +#define f2 2 +#define f3 3 +#define f4 4 +#define f5 5 +#define f6 6 +#define f7 7 +#define f8 8 +#define f9 9 +#define f10 10 +#define f11 11 +#define f12 12 +#define f13 13 +#define f14 14 +#define f15 15 +#define f16 16 +#define f17 17 +#define f18 18 +#define f19 19 +#define f20 20 +#define f21 21 +#define f22 22 +#define f23 23 +#define f24 24 +#define f25 25 +#define f26 26 +#define f27 27 +#define f28 28 +#define f29 29 +#define f30 30 +#define f31 31 + +#ifdef __VSX__ +#define f32 32 +#define f33 33 +#define f34 34 +#define f35 35 +#define f36 36 +#define f37 37 +#define f38 38 +#define f39 39 +#define f40 40 +#define f41 41 +#define f42 42 +#define f43 43 +#define f44 44 +#define f45 45 +#define f46 46 +#define f47 47 +#define f48 48 +#define f49 49 +#define f50 30 +#define f51 51 +#define f52 52 +#define f53 53 +#define f54 54 +#define f55 55 +#define f56 56 +#define f57 57 +#define f58 58 +#define f59 59 +#define f60 60 +#define f61 61 +#define f62 62 +#define f63 63 +#endif + +#ifdef __ALTIVEC__ +#define v0 0 +#define v1 1 +#define v2 2 +#define v3 3 +#define v4 4 +#define v5 5 +#define v6 6 +#define v7 7 +#define v8 8 +#define v9 9 +#define v10 10 +#define v11 11 +#define v12 12 +#define v13 13 +#define v14 14 +#define v15 15 +#define v16 16 +#define v17 17 +#define v18 18 +#define v19 19 +#define v20 20 +#define v21 21 +#define v22 22 +#define v23 23 +#define v24 24 +#define v25 25 +#define v26 26 +#define v27 27 +#define v28 28 +#define v29 29 +#define v30 30 +#define v31 31 +#endif + +#ifdef __VSX__ +#define vs0 0 +#define vs1 1 +#define vs2 2 +#define vs3 3 +#define vs4 4 +#define vs5 5 +#define vs6 6 +#define vs7 7 +#define vs8 8 +#define vs9 9 +#define vs10 10 +#define vs11 11 +#define vs12 12 +#define vs13 13 +#define vs14 14 +#define vs15 15 +#define vs16 16 +#define vs17 17 +#define vs18 18 +#define vs19 19 +#define vs20 20 +#define vs21 21 +#define vs22 22 +#define vs23 23 +#define vs24 24 +#define vs25 25 +#define vs26 26 +#define vs27 27 +#define vs28 28 +#define vs29 29 +#define vs30 30 +#define vs31 31 +#define vs32 32 +#define vs33 33 +#define vs34 34 +#define vs35 35 +#define vs36 36 +#define vs37 37 +#define vs38 38 +#define vs39 39 +#define vs40 40 +#define vs41 41 +#define vs42 42 +#define vs43 43 +#define vs44 44 +#define vs45 45 +#define vs46 46 +#define vs47 47 +#define vs48 48 +#define vs49 49 +#define vs50 30 +#define vs51 51 +#define vs52 52 +#define vs53 53 +#define vs54 54 +#define vs55 55 +#define vs56 56 +#define vs57 57 +#define vs58 58 +#define vs59 59 +#define vs60 60 +#define vs61 61 +#define vs62 62 +#define vs63 63 +#endif + +/* + * Macros to glue together two tokens. + */ + +#ifdef __STDC__ +#define XGLUE(a,b) a##b +#else +#define XGLUE(a,b) a/**/b +#endif + +#define GLUE(a,b) XGLUE(a,b) + +/* + * Macros to begin and end a function written in assembler. If -mcall-aixdesc + * or -mcall-nt, create a function descriptor with the given name, and create + * the real function with one or two leading periods respectively. + */ + +#if defined(__powerpc64__) && _CALL_ELF == 2 + +/* Defining "toc" above breaks @toc in assembler code. */ +#undef toc + +#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name) +#define JUMP_TARGET(name) FUNC_NAME(name) +#define FUNC_START(name) \ + .type FUNC_NAME(name),@function; \ + .globl FUNC_NAME(name); \ +FUNC_NAME(name): \ +0: addis 2,12,(.TOC.-0b)@ha; \ + addi 2,2,(.TOC.-0b)@l; \ + .localentry FUNC_NAME(name),.-FUNC_NAME(name) + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden FUNC_NAME(name); + +#define FUNC_END(name) \ + .size FUNC_NAME(name),.-FUNC_NAME(name) + +#elif defined (__powerpc64__) + +#define FUNC_NAME(name) GLUE(.,name) +#define JUMP_TARGET(name) FUNC_NAME(name) +#define FUNC_START(name) \ + .section ".opd","aw"; \ +name: \ + .quad GLUE(.,name); \ + .quad .TOC.@tocbase; \ + .quad 0; \ + .previous; \ + .type GLUE(.,name),@function; \ + .globl name; \ + .globl GLUE(.,name); \ +GLUE(.,name): + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden name; \ + .hidden GLUE(.,name); + +#define FUNC_END(name) \ +GLUE(.L,name): \ + .size GLUE(.,name),GLUE(.L,name)-GLUE(.,name) + +#elif defined(_CALL_AIXDESC) + +#ifdef _RELOCATABLE +#define DESC_SECTION ".got2" +#else +#define DESC_SECTION ".got1" +#endif + +#define FUNC_NAME(name) GLUE(.,name) +#define JUMP_TARGET(name) FUNC_NAME(name) +#define FUNC_START(name) \ + .section DESC_SECTION,"aw"; \ +name: \ + .long GLUE(.,name); \ + .long _GLOBAL_OFFSET_TABLE_; \ + .long 0; \ + .previous; \ + .type GLUE(.,name),@function; \ + .globl name; \ + .globl GLUE(.,name); \ +GLUE(.,name): + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden name; \ + .hidden GLUE(.,name); + +#define FUNC_END(name) \ +GLUE(.L,name): \ + .size GLUE(.,name),GLUE(.L,name)-GLUE(.,name) + +#else + +#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name) +#if defined __PIC__ || defined __pic__ +#define JUMP_TARGET(name) FUNC_NAME(name@plt) +#else +#define JUMP_TARGET(name) FUNC_NAME(name) +#endif +#define FUNC_START(name) \ + .type FUNC_NAME(name),@function; \ + .globl FUNC_NAME(name); \ +FUNC_NAME(name): + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden FUNC_NAME(name); + +#define FUNC_END(name) \ +GLUE(.L,name): \ + .size FUNC_NAME(name),GLUE(.L,name)-FUNC_NAME(name) +#endif + +#ifdef IN_GCC +/* For HAVE_GAS_CFI_DIRECTIVE. */ +#include "auto-host.h" + +#ifdef HAVE_GAS_CFI_DIRECTIVE +# define CFI_STARTPROC .cfi_startproc +# define CFI_ENDPROC .cfi_endproc +# define CFI_OFFSET(reg, off) .cfi_offset reg, off +# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg +# define CFI_RESTORE(reg) .cfi_restore reg +#else +# define CFI_STARTPROC +# define CFI_ENDPROC +# define CFI_OFFSET(reg, off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_RESTORE(reg) +#endif +#endif + +#if defined __linux__ && !defined __powerpc64__ + .section .note.GNU-stack + .previous +#endif diff --git a/gcc/config/powerpcspe/ppc-auxv.h b/gcc/config/powerpcspe/ppc-auxv.h new file mode 100644 index 000000000000..c7e2e0bfbf2c --- /dev/null +++ b/gcc/config/powerpcspe/ppc-auxv.h @@ -0,0 +1,105 @@ +/* PowerPC support for accessing the AUXV AT_PLATFORM, AT_HWCAP and AT_HWCAP2 + values from the Thread Control Block (TCB). + + Copyright (C) 2016-2017 Free Software Foundation, Inc. + Contributed by Peter Bergner <bergner@vnet.ibm.com>. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _PPC_AUXV_H +#define _PPC_AUXV_H + +/* The PLATFORM value stored in the TCB is offset by _DL_FIRST_PLATFORM. */ +#define _DL_FIRST_PLATFORM 32 + +/* AT_PLATFORM bits. These must match the values defined in GLIBC. */ +#define PPC_PLATFORM_POWER4 0 +#define PPC_PLATFORM_PPC970 1 +#define PPC_PLATFORM_POWER5 2 +#define PPC_PLATFORM_POWER5_PLUS 3 +#define PPC_PLATFORM_POWER6 4 +#define PPC_PLATFORM_CELL_BE 5 +#define PPC_PLATFORM_POWER6X 6 +#define PPC_PLATFORM_POWER7 7 +#define PPC_PLATFORM_PPCA2 8 +#define PPC_PLATFORM_PPC405 9 +#define PPC_PLATFORM_PPC440 10 +#define PPC_PLATFORM_PPC464 11 +#define PPC_PLATFORM_PPC476 12 +#define PPC_PLATFORM_POWER8 13 +#define PPC_PLATFORM_POWER9 14 + +/* AT_HWCAP bits. These must match the values defined in the Linux kernel. */ +#define PPC_FEATURE_32 0x80000000 +#define PPC_FEATURE_64 0x40000000 +#define PPC_FEATURE_601_INSTR 0x20000000 +#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 +#define PPC_FEATURE_HAS_FPU 0x08000000 +#define PPC_FEATURE_HAS_MMU 0x04000000 +#define PPC_FEATURE_HAS_4xxMAC 0x02000000 +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 +#define PPC_FEATURE_HAS_SPE 0x00800000 +#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 +#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 +#define PPC_FEATURE_POWER4 0x00080000 +#define PPC_FEATURE_POWER5 0x00040000 +#define PPC_FEATURE_POWER5_PLUS 0x00020000 +#define PPC_FEATURE_CELL_BE 0x00010000 +#define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 +#define PPC_FEATURE_PA6T 0x00000800 +#define PPC_FEATURE_HAS_DFP 0x00000400 +#define PPC_FEATURE_POWER6_EXT 0x00000200 +#define PPC_FEATURE_ARCH_2_06 0x00000100 +#define PPC_FEATURE_HAS_VSX 0x00000080 +#define PPC_FEATURE_PERFMON_COMPAT 0x00000040 +#define PPC_FEATURE_TRUE_LE 0x00000002 +#define PPC_FEATURE_PPC_LE 0x00000001 + +/* AT_HWCAP2 bits. These must match the values defined in the Linux kernel. */ +#define PPC_FEATURE2_ARCH_2_07 0x80000000 +#define PPC_FEATURE2_HAS_HTM 0x40000000 +#define PPC_FEATURE2_HAS_DSCR 0x20000000 +#define PPC_FEATURE2_HAS_EBB 0x10000000 +#define PPC_FEATURE2_HAS_ISEL 0x08000000 +#define PPC_FEATURE2_HAS_TAR 0x04000000 +#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 +#define PPC_FEATURE2_HTM_NOSC 0x01000000 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 + + +/* Thread Control Block (TCB) offsets of the AT_PLATFORM, AT_HWCAP and + AT_HWCAP2 values. These must match the values defined in GLIBC. */ +#define TCB_PLATFORM_OFFSET ((TARGET_64BIT) ? -28764 : -28724) +#define TCB_HWCAP_BASE_OFFSET ((TARGET_64BIT) ? -28776 : -28736) +#define TCB_HWCAP1_OFFSET \ + ((BYTES_BIG_ENDIAN) ? TCB_HWCAP_BASE_OFFSET : TCB_HWCAP_BASE_OFFSET+4) +#define TCB_HWCAP2_OFFSET \ + ((BYTES_BIG_ENDIAN) ? TCB_HWCAP_BASE_OFFSET+4 : TCB_HWCAP_BASE_OFFSET) +#define TCB_HWCAP_OFFSET(ID) \ + (((ID) == 0) ? TCB_HWCAP1_OFFSET : TCB_HWCAP2_OFFSET) + +#endif /* _PPC_AUXV_H */ diff --git a/gcc/config/powerpcspe/ppu_intrinsics.h b/gcc/config/powerpcspe/ppu_intrinsics.h new file mode 100644 index 000000000000..d5ddbe6e1efa --- /dev/null +++ b/gcc/config/powerpcspe/ppu_intrinsics.h @@ -0,0 +1,727 @@ +/* PPU intrinsics as defined by the C/C++ Language extension for Cell BEA. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* TODO: + misc ops (traps) + supervisor/hypervisor mode ops. */ + +#ifndef _PPU_INTRINSICS_H +#define _PPU_INTRINSICS_H + +#if !defined(__PPU__) && !defined(__ppc__) && !defined(__ppc64__) \ + && !defined(__GNUC__) + #error ppu_intrinsics.h included on wrong platform/compiler +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * unsigned int __cntlzw(unsigned int) + * unsigned int __cntlzd(unsigned long long) + * int __mulhw(int, int) + * unsigned int __mulhwu(unsigned int, unsigned int) + * long long __mulhd(long long, long long) + * unsigned long long __mulhdu(unsigned long long, unsigned long long) + * + * void __sync(void) + * void __isync(void) + * void __lwsync(void) + * void __eieio(void) + * + * void __nop(void) + * void __cctpl(void) + * void __cctpm(void) + * void __cctph(void) + * void __db8cyc(void) + * void __db10cyc(void) + * void __db12cyc(void) + * void __db16cyc(void) + * + * void __mtspr(unsigned int spr, unsigned long long value) + * unsigned long long __mfspr(unsigned int spr) + * unsigned long long __mftb(void) + * + * void __icbi(void *base) + * void __dcbi(void *base) + * + * void __dcbf(void *base) + * void __dcbz(void *base) + * void __dcbst(void *base) + * void __dcbtst(void *base) + * void __dcbt(void *base) + * void __dcbt_TH1000(void *EATRUNC, bool D, bool UG, int ID) + * void __dcbt_TH1010(bool GO, int S, int UNITCNT, bool T, bool U, int ID) + * + * unsigned __lwarx(void *base) + * unsigned long long __ldarx(void *base) + * bool __stwcx(void *base, unsigned value) + * bool __stdcx(void *base, unsigned long long value) + * + * unsigned short __lhbrx(void *base) + * unsigned int __lwbrx(void *base) + * unsigned long long __ldbrx(void *base) + * void __sthbrx(void *base, unsigned short value) + * void __stwbrx(void *base, unsigned int value) + * void __stdbrx(void *base, unsigned long long value) + * + * double __fabs(double x) + * float __fabsf(float x) + * double __fnabs(double x) + * float __fnabsf(float x) + * double __fmadd(double x, double y, double z) + * double __fmsub(double x, double y, double z) + * double __fnmadd(double x, double y, double z) + * double __fnmsub(double x, double y, double z) + * float __fmadds(float x, float y, float z) + * float __fmsubs(float x, float y, float z) + * float __fnmadds(float x, float y, float z) + * float __fnmsubs(float x, float y, float z) + * double __fsel(double x, double y, double z) + * float __fsels(float x, float y, float z) + * double __frsqrte(double x) + * float __fres(float x) + * double __fsqrt(double x) + * float __fsqrts(float x) + * long long __fctid(double x) + * long long __fctiw(double x) + * double __fcfid(long long x) + * double __mffs(void) + * void __mtfsf(int mask, double value) + * void __mtfsfi(int bits, int field) + * void __mtfsb0(int) + * void __mtfsb1(int) + * double __setflm(double) + * + * dcbt intrinsics + * void __protected_unlimited_stream_set (unsigned int direction, const void *add, unsigned int ID) + * void __protected_stream_set (unsigned int direction, const void *add, unsigned int ID) + * void __protected_stream_stop_all (void) + * void __protected_stream_stop (unsigned int ID) + * void __protected_stream_count (unsigned int unit_cnt, unsigned int ID) + * void __protected_stream_go (void) + */ + +typedef int __V4SI __attribute__((vector_size(16))); + +#define __cntlzw(v) __builtin_clz(v) +#define __cntlzd(v) __builtin_clzll(v) + +#define __mulhw(a,b) __extension__ \ + ({int result; \ + __asm__ ("mulhw %0,%1,%2" \ + : "=r" (result) \ + : "r" ((int) (a)), \ + "r" ((int) (b))); \ + result; }) + +#define __mulhwu(a,b) __extension__ \ + ({unsigned int result; \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" (result) \ + : "r" ((unsigned int) (a)), \ + "r" ((unsigned int) (b))); \ + result; }) + +#ifdef __powerpc64__ +#define __mulhd(a,b) __extension__ \ + ({ long long result; \ + __asm__ ("mulhd %0,%1,%2" \ + : "=r" (result) \ + : "r" ((long long) (a)), \ + "r" ((long long) (b))); \ + result; }) + +#define __mulhdu(a,b) __extension__ \ + ({unsigned long long result; \ + __asm__ ("mulhdu %0,%1,%2" \ + : "=r" (result) \ + : "r" ((unsigned long long) (a)), \ + "r" ((unsigned long long) (b))); \ + result; }) +#endif /* __powerpc64__ */ + +#define __sync() __asm__ volatile ("sync" : : : "memory") +#define __isync() __asm__ volatile ("isync" : : : "memory") +#define __lwsync() __asm__ volatile ("lwsync" : : : "memory") +#define __eieio() __asm__ volatile ("eieio" : : : "memory") + +#define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory") +#define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory") +#define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory") +#define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory") +#define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory") +#define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory") +#define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory") +#define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory") + +#ifdef __powerpc64__ +#define __mtspr(spr, value) \ + __asm__ volatile ("mtspr %0,%1" : : "n" (spr), "r" (value)) + +#define __mfspr(spr) __extension__ \ + ({ unsigned long long result; \ + __asm__ volatile ("mfspr %0,%1" : "=r" (result) : "n" (spr)); \ + result; }) +#endif /* __powerpc64__ */ + +#ifdef __powerpc64__ +/* Work around the hardware bug in the current Cell implementation. */ +#define __mftb() __extension__ \ + ({ unsigned long long result; \ + __asm__ volatile ("1: mftb %[current_tb]\n" \ + "\tcmpwi 7, %[current_tb], 0\n" \ + "\tbeq- 7, 1b" \ + : [current_tb] "=r" (result): \ + :"cr7"); \ + result; }) +#else +#define __mftb() __extension__ \ + ({ unsigned long long result; \ + unsigned long t; \ + __asm__ volatile ("1:\n" \ + "\tmftbu %0\n" \ + "\tmftb %L0\n" \ + "\tmftbu %1\n" \ + "\tcmpw %0,%1\n" \ + "\tbne 1b" \ + : "=r" (result), "=r" (t)); \ + result; }) +#endif /* __powerpc64__ */ + +#define __dcbf(base) \ + __asm__ volatile ("dcbf %y0" : "=Z" (*(__V4SI*) (base)) : : "memory") + +#define __dcbz(base) \ + __asm__ volatile ("dcbz %y0" : "=Z" (*(__V4SI*) (base)) : : "memory") + +#define __dcbst(base) \ + __asm__ volatile ("dcbst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory") + +#define __dcbtst(base) \ + __asm__ volatile ("dcbtst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory") + +#define __dcbt(base) \ + __asm__ volatile ("dcbt %y0" : "=Z" (*(__V4SI*) (base)) : : "memory") + +#define __icbi(base) \ + __asm__ volatile ("icbi %y0" : "=Z" (*(__V4SI*) (base)) : : "memory") + +#define __dcbt_TH1000(EATRUNC, D, UG, ID) \ + __asm__ volatile ("dcbt %y0,8" \ + : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (EATRUNC)) & ~0x7F) \ + | ((((D) & 1) << 6) \ + | (((UG) & 1) << 5) \ + | ((ID) & 0xF)))) : : "memory") + +#define __dcbt_TH1010(GO, S, UNITCNT, T, U, ID) \ + __asm__ volatile ("dcbt %y0,10" \ + : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (GO) & 1) << 31) \ + | (((S) & 0x3) << 29) \ + | (((UNITCNT) & 0x3FF) << 7) \ + | (((T) & 1) << 6) \ + | (((U) & 1) << 5) \ + | ((ID) & 0xF))) : : "memory") + +#define __protected_unlimited_stream_set(DIRECTION, ADDR, ID) \ + __dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 1, (ID)) + +#define __protected_stream_set(DIRECTION, ADDR, ID) \ + __dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 0, (ID)) + +#define __protected_stream_stop_all() \ + __dcbt_TH1010 (0, 3, 0, 0, 0, 0) + +#define __protected_stream_stop(ID) \ + __dcbt_TH1010 (0, 2, 0, 0, 0, (ID)) + +#define __protected_stream_count(COUNT, ID) \ + __dcbt_TH1010 (0, 0, (COUNT), 0, 0, (ID)) + +#define __protected_stream_go() \ + __dcbt_TH1010 (1, 0, 0, 0, 0, 0) + +#define __lhbrx(base) __extension__ \ + ({unsigned short result; \ + typedef struct {char a[2];} halfwordsize; \ + halfwordsize *ptrp = (halfwordsize*)(void*)(base); \ + __asm__ ("lhbrx %0,%y1" \ + : "=r" (result) \ + : "Z" (*ptrp)); \ + result; }) + +#define __lwbrx(base) __extension__ \ + ({unsigned int result; \ + typedef struct {char a[4];} wordsize; \ + wordsize *ptrp = (wordsize*)(void*)(base); \ + __asm__ ("lwbrx %0,%y1" \ + : "=r" (result) \ + : "Z" (*ptrp)); \ + result; }) + + +#ifdef __powerpc64__ +#define __ldbrx(base) __extension__ \ + ({unsigned long long result; \ + typedef struct {char a[8];} doublewordsize; \ + doublewordsize *ptrp = (doublewordsize*)(void*)(base); \ + __asm__ ("ldbrx %0,%y1" \ + : "=r" (result) \ + : "Z" (*ptrp)); \ + result; }) +#else +#define __ldbrx(base) __extension__ \ + ({unsigned long long result; \ + typedef struct {char a[8];} doublewordsize; \ + doublewordsize *ptrp = (doublewordsize*)(void*)(base); \ + __asm__ ("lwbrx %L0,%y1\n" \ + "\tlwbrx %0,%y2" \ + : "=&r" (result) \ + : "Z" (*ptrp), "Z" (*((char *) ptrp + 4))); \ + result; }) +#endif /* __powerpc64__ */ + + +#define __sthbrx(base, value) do { \ + typedef struct {char a[2];} halfwordsize; \ + halfwordsize *ptrp = (halfwordsize*)(void*)(base); \ + __asm__ ("sthbrx %1,%y0" \ + : "=Z" (*ptrp) \ + : "r" (value)); \ + } while (0) + +#define __stwbrx(base, value) do { \ + typedef struct {char a[4];} wordsize; \ + wordsize *ptrp = (wordsize*)(void*)(base); \ + __asm__ ("stwbrx %1,%y0" \ + : "=Z" (*ptrp) \ + : "r" (value)); \ + } while (0) + +#ifdef __powerpc64__ +#define __stdbrx(base, value) do { \ + typedef struct {char a[8];} doublewordsize; \ + doublewordsize *ptrp = (doublewordsize*)(void*)(base); \ + __asm__ ("stdbrx %1,%y0" \ + : "=Z" (*ptrp) \ + : "r" (value)); \ + } while (0) +#else +#define __stdbrx(base, value) do { \ + typedef struct {char a[8];} doublewordsize; \ + doublewordsize *ptrp = (doublewordsize*)(void*)(base); \ + __asm__ ("stwbrx %L2,%y0\n" \ + "\tstwbrx %2,%y1" \ + : "=Z" (*ptrp), "=Z" (*((char *) ptrp + 4)) \ + : "r" (value)); \ + } while (0) +#endif /* __powerpc64__ */ + + +#define __lwarx(base) __extension__ \ + ({unsigned int result; \ + typedef struct {char a[4];} wordsize; \ + wordsize *ptrp = (wordsize*)(void*)(base); \ + __asm__ volatile ("lwarx %0,%y1" \ + : "=r" (result) \ + : "Z" (*ptrp)); \ + result; }) + +#ifdef __powerpc64__ +#define __ldarx(base) __extension__ \ + ({unsigned long long result; \ + typedef struct {char a[8];} doublewordsize; \ + doublewordsize *ptrp = (doublewordsize*)(void*)(base); \ + __asm__ volatile ("ldarx %0,%y1" \ + : "=r" (result) \ + : "Z" (*ptrp)); \ + result; }) +#endif /* __powerpc64__ */ + +#define __stwcx(base, value) __extension__ \ + ({unsigned int result; \ + typedef struct {char a[4];} wordsize; \ + wordsize *ptrp = (wordsize*)(void*)(base); \ + __asm__ volatile ("stwcx. %2,%y1\n" \ + "\tmfocrf %0,0x80" \ + : "=r" (result), \ + "=Z" (*ptrp) \ + : "r" (value) : "cr0"); \ + ((result & 0x20000000) >> 29); }) + + +#ifdef __powerpc64__ +#define __stdcx(base, value) __extension__ \ + ({unsigned long long result; \ + typedef struct {char a[8];} doublewordsize; \ + doublewordsize *ptrp = (doublewordsize*)(void*)(base); \ + __asm__ volatile ("stdcx. %2,%y1\n" \ + "\tmfocrf %0,0x80" \ + : "=r" (result), \ + "=Z" (*ptrp) \ + : "r" (value) : "cr0"); \ + ((result & 0x20000000) >> 29); }) +#endif /* __powerpc64__ */ + +#define __mffs() __extension__ \ + ({double result; \ + __asm__ volatile ("mffs %0" : "=d" (result)); \ + result; }) + +#define __mtfsf(mask,value) \ + __asm__ volatile ("mtfsf %0,%1" : : "n" (mask), "d" ((double) (value))) + +#define __mtfsfi(bits,field) \ + __asm__ volatile ("mtfsfi %0,%1" : : "n" (bits), "n" (field)) + +#define __mtfsb0(bit) __asm__ volatile ("mtfsb0 %0" : : "n" (bit)) +#define __mtfsb1(bit) __asm__ volatile ("mtfsb1 %0" : : "n" (bit)) + +#define __setflm(v) __extension__ \ + ({double result; \ + __asm__ volatile ("mffs %0\n\tmtfsf 255,%1" \ + : "=&d" (result) \ + : "d" ((double) (v))); \ + result; }) + +/* __builtin_fabs may perform unnecessary rounding. */ + +/* Rename __fabs and __fabsf to work around internal prototypes defined + in bits/mathcalls.h with some glibc versions. */ +#define __fabs __ppu_fabs +#define __fabsf __ppu_fabsf + +static __inline__ double __fabs(double x) __attribute__((always_inline)); +static __inline__ double +__fabs(double x) +{ + double r; + __asm__("fabs %0,%1" : "=d"(r) : "d"(x)); + return r; +} + +static __inline__ float __fabsf(float x) __attribute__((always_inline)); +static __inline__ float +__fabsf(float x) +{ + float r; + __asm__("fabs %0,%1" : "=f"(r) : "f"(x)); + return r; +} + +static __inline__ double __fnabs(double x) __attribute__((always_inline)); +static __inline__ double +__fnabs(double x) +{ + double r; + __asm__("fnabs %0,%1" : "=d"(r) : "d"(x)); + return r; +} + +static __inline__ float __fnabsf(float x) __attribute__((always_inline)); +static __inline__ float +__fnabsf(float x) +{ + float r; + __asm__("fnabs %0,%1" : "=f"(r) : "f"(x)); + return r; +} + +static __inline__ double __fmadd(double x, double y, double z) + __attribute__((always_inline)); +static __inline__ double +__fmadd(double x, double y, double z) +{ + double r; + __asm__("fmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z)); + return r; +} + +static __inline__ double __fmsub(double x, double y, double z) + __attribute__((always_inline)); +static __inline__ double +__fmsub(double x, double y, double z) +{ + double r; + __asm__("fmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z)); + return r; +} + +static __inline__ double __fnmadd(double x, double y, double z) + __attribute__((always_inline)); +static __inline__ double +__fnmadd(double x, double y, double z) +{ + double r; + __asm__("fnmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z)); + return r; +} + +static __inline__ double __fnmsub(double x, double y, double z) + __attribute__((always_inline)); +static __inline__ double +__fnmsub(double x, double y, double z) +{ + double r; + __asm__("fnmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z)); + return r; +} + +static __inline__ float __fmadds(float x, float y, float z) + __attribute__((always_inline)); +static __inline__ float +__fmadds(float x, float y, float z) +{ + float r; + __asm__("fmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z)); + return r; +} + +static __inline__ float __fmsubs(float x, float y, float z) + __attribute__((always_inline)); +static __inline__ float +__fmsubs(float x, float y, float z) +{ + float r; + __asm__("fmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z)); + return r; +} + +static __inline__ float __fnmadds(float x, float y, float z) + __attribute__((always_inline)); +static __inline__ float +__fnmadds(float x, float y, float z) +{ + float r; + __asm__("fnmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z)); + return r; +} + +static __inline__ float __fnmsubs(float x, float y, float z) + __attribute__((always_inline)); +static __inline__ float +__fnmsubs(float x, float y, float z) +{ + float r; + __asm__("fnmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z)); + return r; +} + +static __inline__ double __fsel(double x, double y, double z) + __attribute__((always_inline)); +static __inline__ double +__fsel(double x, double y, double z) +{ + double r; + __asm__("fsel %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z)); + return r; +} + +static __inline__ float __fsels(float x, float y, float z) + __attribute__((always_inline)); +static __inline__ float +__fsels(float x, float y, float z) +{ + float r; + __asm__("fsel %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z)); + return r; +} + +static __inline__ double __frsqrte(double x) __attribute__((always_inline)); +static __inline__ double +__frsqrte(double x) +{ + double r; + __asm__("frsqrte %0,%1" : "=d" (r) : "d" (x)); + return r; +} + +static __inline__ float __fres(float x) __attribute__((always_inline)); +static __inline__ float +__fres(float x) +{ + float r; + __asm__("fres %0,%1" : "=f"(r) : "f"(x)); + return r; +} + +static __inline__ double __fsqrt(double x) __attribute__((always_inline)); +static __inline__ double +__fsqrt(double x) +{ + double r; + __asm__("fsqrt %0,%1" : "=d"(r) : "d"(x)); + return r; +} + +static __inline__ float __fsqrts(float x) __attribute__((always_inline)); +static __inline__ float +__fsqrts(float x) +{ + float r; + __asm__("fsqrts %0,%1" : "=f"(r) : "f"(x)); + return r; +} + +static __inline__ double __fmul (double a, double b) __attribute__ ((always_inline)); +static __inline__ double +__fmul(double a, double b) +{ + double d; + __asm__ ("fmul %0,%1,%2" : "=d" (d) : "d" (a), "d" (b)); + return d; +} + +static __inline__ float __fmuls (float a, float b) __attribute__ ((always_inline)); +static __inline__ float +__fmuls (float a, float b) +{ + float d; + __asm__ ("fmuls %0,%1,%2" : "=d" (d) : "f" (a), "f" (b)); + return d; +} + +static __inline__ float __frsp (float a) __attribute__ ((always_inline)); +static __inline__ float +__frsp (float a) +{ + float d; + __asm__ ("frsp %0,%1" : "=d" (d) : "f" (a)); + return d; +} + +static __inline__ double __fcfid (long long a) __attribute__((always_inline)); +static __inline__ double +__fcfid (long long a) +{ + double d; + __asm__ ("fcfid %0,%1" : "=d" (d) : "d" (a)); + return d; +} + +static __inline__ long long __fctid (double a) __attribute__ ((always_inline)); +static __inline__ long long +__fctid (double a) +{ + long long d; + __asm__ ("fctid %0,%1" : "=d" (d) : "d" (a)); + return d; +} + +static __inline__ long long __fctidz (double a) __attribute__ ((always_inline)); +static __inline__ long long +__fctidz (double a) +{ + long long d; + __asm__ ("fctidz %0,%1" : "=d" (d) : "d" (a)); + return d; +} + +static __inline__ int __fctiw (double a) __attribute__ ((always_inline)); +static __inline__ int +__fctiw (double a) +{ + unsigned long long d; + __asm__ ("fctiw %0,%1" : "=d" (d) : "d" (a)); + return (int) d; +} + +static __inline__ int __fctiwz (double a) __attribute__ ((always_inline)); +static __inline__ int +__fctiwz (double a) +{ + long long d; + __asm__ ("fctiwz %0,%1" : "=d" (d) : "d" (a)); + return (int) d; +} + +#ifdef __powerpc64__ +#define __rldcl(a,b,mb) __extension__ \ + ({ \ + unsigned long long d; \ + __asm__ ("rldcl %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (mb)); \ + d; \ + }) + +#define __rldcr(a,b,me) __extension__ \ + ({ \ + unsigned long long d; \ + __asm__ ("rldcr %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (me)); \ + d; \ + }) + +#define __rldic(a,sh,mb) __extension__ \ + ({ \ + unsigned long long d; \ + __asm__ ("rldic %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \ + d; \ + }) + +#define __rldicl(a,sh,mb) __extension__ \ + ({ \ + unsigned long long d; \ + __asm__ ("rldicl %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \ + d; \ + }) + +#define __rldicr(a,sh,me) __extension__ \ + ({ \ + unsigned long long d; \ + __asm__ ("rldicr %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (me)); \ + d; \ + }) + +#define __rldimi(a,b,sh,mb) __extension__ \ + ({ \ + unsigned long long d; \ + __asm__ ("rldimi %0,%1,%2,%3" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "0" (a)); \ + d; \ + }) +#endif /* __powerpc64__ */ + +#define __rlwimi(a,b,sh,mb,me) __extension__ \ + ({ \ + unsigned int d; \ + __asm__ ("rlwimi %0,%1,%2,%3,%4" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "i" (me), "0" (a)); \ + d; \ + }) + +#define __rlwinm(a,sh,mb,me) __extension__ \ + ({ \ + unsigned int d; \ + __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "i" (sh), "i" (mb), "i" (me)); \ + d; \ + }) + +#define __rlwnm(a,b,mb,me) __extension__ \ + ({ \ + unsigned int d; \ + __asm__ ("rlwnm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "r" (b), "i" (mb), "i" (me)); \ + d; \ + }) + +#ifdef __cplusplus +} +#endif + +#endif /* _PPU_INTRINSICS_H */ diff --git a/gcc/config/powerpcspe/predicates.md b/gcc/config/powerpcspe/predicates.md new file mode 100644 index 000000000000..0d816e5e6d97 --- /dev/null +++ b/gcc/config/powerpcspe/predicates.md @@ -0,0 +1,2124 @@ +;; Predicate definitions for POWER and PowerPC. +;; Copyright (C) 2005-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Return 1 for anything except PARALLEL. +(define_predicate "any_operand" + (match_code "const_int,const_double,const_wide_int,const,symbol_ref,label_ref,subreg,reg,mem")) + +;; Return 1 for any PARALLEL. +(define_predicate "any_parallel_operand" + (match_code "parallel")) + +;; Return 1 if op is COUNT register. +(define_predicate "count_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == CTR_REGNO + || REGNO (op) > LAST_VIRTUAL_REGISTER"))) + +;; Return 1 if op is a SUBREG that is used to look at a SFmode value as +;; and integer or vice versa. +;; +;; In the normal case where SFmode is in a floating point/vector register, it +;; is stored as a DFmode and has a different format. If we don't transform the +;; value, things that use logical operations on the values will get the wrong +;; value. +;; +;; If we don't have 64-bit and direct move, this conversion will be done by +;; store and load, instead of by fiddling with the bits within the register. +(define_predicate "sf_subreg_operand" + (match_code "subreg") +{ + rtx inner_reg = SUBREG_REG (op); + machine_mode inner_mode = GET_MODE (inner_reg); + + if (TARGET_ALLOW_SF_SUBREG || !REG_P (inner_reg)) + return 0; + + if ((mode == SFmode && GET_MODE_CLASS (inner_mode) == MODE_INT) + || (GET_MODE_CLASS (mode) == MODE_INT && inner_mode == SFmode)) + { + if (INT_REGNO_P (REGNO (inner_reg))) + return 0; + + return 1; + } + return 0; +}) + +;; Return 1 if op is an Altivec register. +(define_predicate "altivec_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return ALTIVEC_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a VSX register. +(define_predicate "vsx_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return VSX_REGNO_P (REGNO (op)); +}) + +;; Like vsx_register_operand, but allow SF SUBREGS +(define_predicate "vsx_reg_sfsubreg_ok" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return VSX_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a vector register that operates on floating point vectors +;; (either altivec or VSX). +(define_predicate "vfloat_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return VFLOAT_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a vector register that operates on integer vectors +;; (only altivec, VSX doesn't support integer vectors) +(define_predicate "vint_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return VINT_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a vector register to do logical operations on (and, or, +;; xor, etc.) +(define_predicate "vlogical_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return VLOGICAL_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is the carry register. +(define_predicate "ca_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return CA_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a signed 5-bit constant integer. +(define_predicate "s5bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15"))) + +;; Return 1 if op is a unsigned 3-bit constant integer. +(define_predicate "u3bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7"))) + +;; Return 1 if op is a unsigned 5-bit constant integer. +(define_predicate "u5bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 31"))) + +;; Return 1 if op is a unsigned 6-bit constant integer. +(define_predicate "u6bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 63"))) + +;; Return 1 if op is an unsigned 7-bit constant integer. +(define_predicate "u7bit_cint_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 127)"))) + +;; Return 1 if op is a signed 8-bit constant integer. +;; Integer multiplication complete more quickly +(define_predicate "s8bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= -128 && INTVAL (op) <= 127"))) + +;; Return 1 if op is a unsigned 10-bit constant integer. +(define_predicate "u10bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023"))) + +;; Return 1 if op is a constant integer that can fit in a D field. +(define_predicate "short_cint_operand" + (and (match_code "const_int") + (match_test "satisfies_constraint_I (op)"))) + +;; Return 1 if op is a constant integer that can fit in an unsigned D field. +(define_predicate "u_short_cint_operand" + (and (match_code "const_int") + (match_test "satisfies_constraint_K (op)"))) + +;; Return 1 if op is a constant integer that is a signed 16-bit constant +;; shifted left 16 bits +(define_predicate "upper16_cint_operand" + (and (match_code "const_int") + (match_test "satisfies_constraint_L (op)"))) + +;; Return 1 if op is a constant integer that cannot fit in a signed D field. +(define_predicate "non_short_cint_operand" + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) + (INTVAL (op) + 0x8000) >= 0x10000"))) + +;; Return 1 if op is a positive constant integer that is an exact power of 2. +(define_predicate "exact_log2_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) > 0 && exact_log2 (INTVAL (op)) >= 0"))) + +;; Match op = 0 or op = 1. +(define_predicate "const_0_to_1_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) + +;; Match op = 0..3. +(define_predicate "const_0_to_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 3)"))) + +;; Match op = 2 or op = 3. +(define_predicate "const_2_to_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) + +;; Match op = 0..7. +(define_predicate "const_0_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +;; Match op = 0..11 +(define_predicate "const_0_to_12_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 12)"))) + +;; Match op = 0..15 +(define_predicate "const_0_to_15_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) + +;; Return 1 if op is a register that is not special. +;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where +;; you need to be careful in moving a SFmode to SImode and vice versa due to +;; the fact that SFmode is represented as DFmode in the VSX registers. +(define_predicate "gpc_reg_operand" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + if (TARGET_ALTIVEC && ALTIVEC_REGNO_P (REGNO (op))) + return 1; + + if (TARGET_VSX && VSX_REGNO_P (REGNO (op))) + return 1; + + return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't +;; allow floating point or vector registers. Since vector registers are not +;; allowed, we don't have to reject SFmode/SImode subregs. +(define_predicate "int_reg_operand" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return INT_REGNO_P (REGNO (op)); +}) + +;; Like int_reg_operand, but don't return true for pseudo registers +;; We don't have to check for SF SUBREGS because pseudo registers +;; are not allowed, and SF SUBREGs are ok within GPR registers. +(define_predicate "int_reg_operand_not_pseudo" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 0; + + return INT_REGNO_P (REGNO (op)); +}) + +;; Like int_reg_operand, but only return true for base registers +(define_predicate "base_reg_operand" + (match_operand 0 "int_reg_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + return (REGNO (op) != FIRST_GPR_REGNO); +}) + + +;; Return true if this is a traditional floating point register +(define_predicate "fpr_reg_operand" + (match_code "reg,subreg") +{ + HOST_WIDE_INT r; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + r = REGNO (op); + if (r >= FIRST_PSEUDO_REGISTER) + return 1; + + return FP_REGNO_P (r); +}) + +;; Return true if this is a register that can has D-form addressing (GPR and +;; traditional FPR registers for scalars). ISA 3.0 (power9) adds D-form +;; addressing for scalars in Altivec registers. +;; +;; If this is a pseudo only allow for GPR fusion in power8. If we have the +;; power9 fusion allow the floating point types. +(define_predicate "toc_fusion_or_p9_reg_operand" + (match_code "reg,subreg") +{ + HOST_WIDE_INT r; + bool gpr_p = (mode == QImode || mode == HImode || mode == SImode + || mode == SFmode + || (TARGET_POWERPC64 && (mode == DImode || mode == DFmode))); + bool fpr_p = (TARGET_P9_FUSION + && (mode == DFmode || mode == SFmode + || (TARGET_POWERPC64 && mode == DImode))); + bool vmx_p = (TARGET_P9_FUSION && TARGET_P9_VECTOR + && (mode == DFmode || mode == SFmode)); + + if (!TARGET_P8_FUSION) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + r = REGNO (op); + if (r >= FIRST_PSEUDO_REGISTER) + return (gpr_p || fpr_p || vmx_p); + + if (INT_REGNO_P (r)) + return gpr_p; + + if (FP_REGNO_P (r)) + return fpr_p; + + if (ALTIVEC_REGNO_P (r)) + return vmx_p; + + return 0; +}) + +;; Return 1 if op is a HTM specific SPR register. +(define_predicate "htm_spr_reg_operand" + (match_operand 0 "register_operand") +{ + if (!TARGET_HTM) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + switch (REGNO (op)) + { + case TFHAR_REGNO: + case TFIAR_REGNO: + case TEXASR_REGNO: + return 1; + default: + break; + } + + /* Unknown SPR. */ + return 0; +}) + +;; Return 1 if op is a general purpose register that is an even register +;; which suitable for a load/store quad operation +;; Subregs are not allowed here because when they are combine can +;; create (subreg:PTI (reg:TI pseudo)) which will cause reload to +;; think the innermost reg needs reloading, in TImode instead of +;; PTImode. So reload will choose a reg in TImode which has no +;; requirement that the reg be even. +(define_predicate "quad_int_reg_operand" + (match_code "reg") +{ + HOST_WIDE_INT r; + + if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC) + return 0; + + r = REGNO (op); + if (r >= FIRST_PSEUDO_REGISTER) + return 1; + + return (INT_REGNO_P (r) && ((r & 1) == 0)); +}) + +;; Return 1 if op is a register that is a condition register field. +(define_predicate "cc_reg_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) > LAST_VIRTUAL_REGISTER) + return 1; + + return CR_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a register that is a condition register field not cr0. +(define_predicate "cc_reg_not_cr0_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) > LAST_VIRTUAL_REGISTER) + return 1; + + return CR_REGNO_NOT_CR0_P (REGNO (op)); +}) + +;; Return 1 if op is a register that is a condition register field and if generating microcode, not cr0. +(define_predicate "cc_reg_not_micro_cr0_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) > LAST_VIRTUAL_REGISTER) + return 1; + + if (rs6000_gen_cell_microcode) + return CR_REGNO_NOT_CR0_P (REGNO (op)); + else + return CR_REGNO_P (REGNO (op)); +}) + +;; Return 1 if op is a constant integer valid for D field +;; or non-special register register. +(define_predicate "reg_or_short_operand" + (if_then_else (match_code "const_int") + (match_operand 0 "short_cint_operand") + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if op is a constant integer valid for DS field +;; or non-special register. +(define_predicate "reg_or_aligned_short_operand" + (if_then_else (match_code "const_int") + (and (match_operand 0 "short_cint_operand") + (match_test "!(INTVAL (op) & 3)")) + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if op is a constant integer whose high-order 16 bits are zero +;; or non-special register. +(define_predicate "reg_or_u_short_operand" + (if_then_else (match_code "const_int") + (match_operand 0 "u_short_cint_operand") + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if op is any constant integer +;; or non-special register. +(define_predicate "reg_or_cint_operand" + (ior (match_code "const_int") + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if op is a constant integer valid for addition with addis, addi. +(define_predicate "add_cint_operand" + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) INTVAL (op) + + (mode == SImode ? 0x80000000 : 0x80008000)) + < (unsigned HOST_WIDE_INT) 0x100000000ll"))) + +;; Return 1 if op is a constant integer valid for addition +;; or non-special register. +(define_predicate "reg_or_add_cint_operand" + (if_then_else (match_code "const_int") + (match_operand 0 "add_cint_operand") + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if op is a constant integer valid for subtraction +;; or non-special register. +(define_predicate "reg_or_sub_cint_operand" + (if_then_else (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) + (- UINTVAL (op) + (mode == SImode ? 0x80000000 : 0x80008000)) + < (unsigned HOST_WIDE_INT) 0x100000000ll") + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if op is any 32-bit unsigned constant integer +;; or non-special register. +(define_predicate "reg_or_logical_cint_operand" + (if_then_else (match_code "const_int") + (match_test "(GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT + && INTVAL (op) >= 0) + || ((INTVAL (op) & GET_MODE_MASK (mode) + & (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)") + (match_operand 0 "gpc_reg_operand"))) + +;; Like reg_or_logical_cint_operand, but allow vsx registers +(define_predicate "vsx_reg_or_cint_operand" + (ior (match_operand 0 "vsx_register_operand") + (match_operand 0 "reg_or_logical_cint_operand"))) + +;; Return 1 if operand is a CONST_DOUBLE that can be set in a register +;; with no more than one instruction per word. +(define_predicate "easy_fp_constant" + (match_code "const_double") +{ + if (GET_MODE (op) != mode + || (!SCALAR_FLOAT_MODE_P (mode) && mode != DImode)) + return 0; + + /* Consider all constants with -msoft-float to be easy. */ + if ((TARGET_SOFT_FLOAT || TARGET_E500_SINGLE + || (TARGET_HARD_FLOAT && (TARGET_SINGLE_FLOAT && ! TARGET_DOUBLE_FLOAT))) + && mode != DImode) + return 1; + + /* 0.0D is not all zero bits. */ + if (DECIMAL_FLOAT_MODE_P (mode)) + return 0; + + /* The constant 0.0 is easy under VSX. */ + if (TARGET_VSX && SCALAR_FLOAT_MODE_P (mode) && op == CONST0_RTX (mode)) + return 1; + + /* If we are using V.4 style PIC, consider all constants to be hard. */ + if (flag_pic && DEFAULT_ABI == ABI_V4) + return 0; + + /* If we have real FPRs, consider floating point constants hard (other than + 0.0 under VSX), so that the constant gets pushed to memory during the + early RTL phases. This has the advantage that double precision constants + that can be represented in single precision without a loss of precision + will use single precision loads. */ + + switch (mode) + { + case KFmode: + case IFmode: + case TFmode: + case DFmode: + case SFmode: + return 0; + + case DImode: + return (num_insns_constant (op, DImode) <= 2); + + case SImode: + return 1; + + default: + gcc_unreachable (); + } +}) + +;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB +;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction. + +(define_predicate "xxspltib_constant_split" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) + return false; + + return num_insns > 1; +}) + + +;; Return 1 if the operand is constant that can loaded directly with a XXSPLTIB +;; instruction. + +(define_predicate "xxspltib_constant_nosplit" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) + return false; + + return num_insns == 1; +}) + +;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a +;; vector register without using memory. +(define_predicate "easy_vector_constant" + (match_code "const_vector") +{ + /* As the paired vectors are actually FPRs it seems that there is + no easy way to load a CONST_VECTOR without using memory. */ + if (TARGET_PAIRED_FLOAT) + return false; + + /* Because IEEE 128-bit floating point is considered a vector type + in order to pass it in VSX registers, it might use this function + instead of easy_fp_constant. */ + if (FLOAT128_VECTOR_P (mode)) + return easy_fp_constant (op, mode); + + if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) + { + int value = 256; + int num_insns = -1; + + if (zero_constant (op, mode) || all_ones_constant (op, mode)) + return true; + + if (TARGET_P9_VECTOR + && xxspltib_constant_p (op, mode, &num_insns, &value)) + return true; + + return easy_altivec_constant (op, mode); + } + + if (SPE_VECTOR_MODE (mode)) + { + int cst, cst2; + if (zero_constant (op, mode)) + return true; + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return false; + + /* Limit SPE vectors to 15 bits signed. These we can generate with: + li r0, CONSTANT1 + evmergelo r0, r0, r0 + li r0, CONSTANT2 + + I don't know how efficient it would be to allow bigger constants, + considering we'll have an extra 'ori' for every 'li'. I doubt 5 + instructions is better than a 64-bit memory load, but I don't + have the e500 timing specs. */ + if (mode == V2SImode) + { + cst = INTVAL (CONST_VECTOR_ELT (op, 0)); + cst2 = INTVAL (CONST_VECTOR_ELT (op, 1)); + return cst >= -0x7fff && cst <= 0x7fff + && cst2 >= -0x7fff && cst2 <= 0x7fff; + } + } + + return false; +}) + +;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF. +(define_predicate "easy_vector_constant_add_self" + (and (match_code "const_vector") + (and (match_test "TARGET_ALTIVEC") + (match_test "easy_altivec_constant (op, mode)"))) +{ + HOST_WIDE_INT val; + int elt; + if (mode == V2DImode || mode == V2DFmode) + return 0; + elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0; + val = const_vector_elt_as_int (op, elt); + val = ((val & 0xff) ^ 0x80) - 0x80; + return EASY_VECTOR_15_ADD_SELF (val); +}) + +;; Same as easy_vector_constant but only for EASY_VECTOR_MSB. +(define_predicate "easy_vector_constant_msb" + (and (match_code "const_vector") + (and (match_test "TARGET_ALTIVEC") + (match_test "easy_altivec_constant (op, mode)"))) +{ + HOST_WIDE_INT val; + int elt; + if (mode == V2DImode || mode == V2DFmode) + return 0; + elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0; + val = const_vector_elt_as_int (op, elt); + return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode)); +}) + +;; Return true if this is an easy altivec constant that we form +;; by using VSLDOI. +(define_predicate "easy_vector_constant_vsldoi" + (and (match_code "const_vector") + (and (match_test "TARGET_ALTIVEC") + (and (match_test "easy_altivec_constant (op, mode)") + (match_test "vspltis_shifted (op) != 0"))))) + +;; Return 1 if operand is constant zero (scalars and vectors). +(define_predicate "zero_constant" + (and (match_code "const_int,const_double,const_wide_int,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +;; Return 1 if operand is constant -1 (scalars and vectors). +(define_predicate "all_ones_constant" + (and (match_code "const_int,const_double,const_wide_int,const_vector") + (match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)"))) + +;; Return 1 if operand is a vector int register or is either a vector constant +;; of all 0 bits of a vector constant of all 1 bits. +(define_predicate "vector_int_reg_or_same_bit" + (match_code "reg,subreg,const_vector") +{ + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return 0; + + else if (REG_P (op) || SUBREG_P (op)) + return vint_operand (op, mode); + + else + return op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode); +}) + +;; Return 1 if operand is 0.0. +(define_predicate "zero_fp_constant" + (and (match_code "const_double") + (match_test "SCALAR_FLOAT_MODE_P (mode) + && op == CONST0_RTX (mode)"))) + +;; Return 1 if the operand is in volatile memory. Note that during the +;; RTL generation phase, memory_operand does not return TRUE for volatile +;; memory references. So this function allows us to recognize volatile +;; references where it's safe. +(define_predicate "volatile_mem_operand" + (and (and (match_code "mem") + (match_test "MEM_VOLATILE_P (op)")) + (if_then_else (match_test "reload_completed") + (match_operand 0 "memory_operand") + (if_then_else (match_test "reload_in_progress") + (match_test "strict_memory_address_p (mode, XEXP (op, 0))") + (match_test "memory_address_p (mode, XEXP (op, 0))"))))) + +;; Return 1 if the operand is an offsettable memory operand. +(define_predicate "offsettable_mem_operand" + (and (match_operand 0 "memory_operand") + (match_test "offsettable_nonstrict_memref_p (op)"))) + +;; Return 1 if the operand is a simple offsettable memory operand +;; that does not include pre-increment, post-increment, etc. +(define_predicate "simple_offsettable_mem_operand" + (match_operand 0 "offsettable_mem_operand") +{ + rtx addr = XEXP (op, 0); + + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + if (!CONSTANT_P (XEXP (addr, 1))) + return 0; + + return base_reg_operand (XEXP (addr, 0), Pmode); +}) + +;; Return 1 if the operand is suitable for load/store quad memory. +;; This predicate only checks for non-atomic loads/stores (not lqarx/stqcx). +(define_predicate "quad_memory_operand" + (match_code "mem") +{ + if (!TARGET_QUAD_MEMORY && !TARGET_SYNC_TI) + return false; + + if (GET_MODE_SIZE (mode) != 16 || !MEM_P (op) || MEM_ALIGN (op) < 128) + return false; + + return quad_address_p (XEXP (op, 0), mode, false); +}) + +;; Return 1 if the operand is suitable for load/store to vector registers with +;; d-form addressing (register+offset), which was added in ISA 3.0. +;; Unlike quad_memory_operand, we do not have to check for alignment. +(define_predicate "vsx_quad_dform_memory_operand" + (match_code "mem") +{ + if (!TARGET_P9_DFORM_VECTOR || !MEM_P (op) || GET_MODE_SIZE (mode) != 16) + return false; + + return quad_address_p (XEXP (op, 0), mode, false); +}) + +;; Return 1 if the operand is an indexed or indirect memory operand. +(define_predicate "indexed_or_indirect_operand" + (match_code "mem") +{ + op = XEXP (op, 0); + if (VECTOR_MEM_ALTIVEC_P (mode) + && GET_CODE (op) == AND + && GET_CODE (XEXP (op, 1)) == CONST_INT + && INTVAL (XEXP (op, 1)) == -16) + op = XEXP (op, 0); + + return indexed_or_indirect_address (op, mode); +}) + +;; Like indexed_or_indirect_operand, but also allow a GPR register if direct +;; moves are supported. +(define_predicate "reg_or_indexed_operand" + (match_code "mem,reg,subreg") +{ + if (MEM_P (op)) + return indexed_or_indirect_operand (op, mode); + else if (TARGET_DIRECT_MOVE) + return register_operand (op, mode); + return + 0; +}) + +;; Return 1 if the operand is an indexed or indirect memory operand with an +;; AND -16 in it, used to recognize when we need to switch to Altivec loads +;; to realign loops instead of VSX (altivec silently ignores the bottom bits, +;; while VSX uses the full address and traps) +(define_predicate "altivec_indexed_or_indirect_operand" + (match_code "mem") +{ + op = XEXP (op, 0); + if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) + && GET_CODE (op) == AND + && GET_CODE (XEXP (op, 1)) == CONST_INT + && INTVAL (XEXP (op, 1)) == -16) + return indexed_or_indirect_address (XEXP (op, 0), mode); + + return 0; +}) + +;; Return 1 if the operand is an indexed or indirect address. +(define_special_predicate "indexed_or_indirect_address" + (and (match_test "REG_P (op) + || (GET_CODE (op) == PLUS + /* Omit testing REG_P (XEXP (op, 0)). */ + && REG_P (XEXP (op, 1)))") + (match_operand 0 "address_operand"))) + +;; Return 1 if the operand is an index-form address. +(define_special_predicate "indexed_address" + (match_test "(GET_CODE (op) == PLUS + && REG_P (XEXP (op, 0)) + && REG_P (XEXP (op, 1)))")) + +;; Return 1 if the operand is a MEM with an update-form address. This may +;; also include update-indexed form. +(define_special_predicate "update_address_mem" + (match_test "(MEM_P (op) + && (GET_CODE (XEXP (op, 0)) == PRE_INC + || GET_CODE (XEXP (op, 0)) == PRE_DEC + || GET_CODE (XEXP (op, 0)) == PRE_MODIFY))")) + +;; Return 1 if the operand is a MEM with an indexed-form address. +(define_special_predicate "indexed_address_mem" + (match_test "(MEM_P (op) + && (indexed_address (XEXP (op, 0), mode) + || (GET_CODE (XEXP (op, 0)) == PRE_MODIFY + && indexed_address (XEXP (XEXP (op, 0), 1), mode))))")) + +;; Return 1 if the operand is either a non-special register or can be used +;; as the operand of a `mode' add insn. +(define_predicate "add_operand" + (if_then_else (match_code "const_int") + (match_test "satisfies_constraint_I (op) + || satisfies_constraint_L (op)") + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if the operand is either a non-special register, or 0, or -1. +(define_predicate "adde_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) == 0 || INTVAL (op) == -1") + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if OP is a constant but not a valid add_operand. +(define_predicate "non_add_cint_operand" + (and (match_code "const_int") + (match_test "!satisfies_constraint_I (op) + && !satisfies_constraint_L (op)"))) + +;; Return 1 if the operand is a constant that can be used as the operand +;; of an OR or XOR. +(define_predicate "logical_const_operand" + (match_code "const_int") +{ + HOST_WIDE_INT opl; + + opl = INTVAL (op) & GET_MODE_MASK (mode); + + return ((opl & ~ (unsigned HOST_WIDE_INT) 0xffff) == 0 + || (opl & ~ (unsigned HOST_WIDE_INT) 0xffff0000) == 0); +}) + +;; Return 1 if the operand is a non-special register or a constant that +;; can be used as the operand of an OR or XOR. +(define_predicate "logical_operand" + (ior (match_operand 0 "gpc_reg_operand") + (match_operand 0 "logical_const_operand"))) + +;; Return 1 if op is a constant that is not a logical operand, but could +;; be split into one. +(define_predicate "non_logical_cint_operand" + (and (match_code "const_int,const_wide_int") + (and (not (match_operand 0 "logical_operand")) + (match_operand 0 "reg_or_logical_cint_operand")))) + +;; Return 1 if the operand is either a non-special register or a +;; constant that can be used as the operand of a logical AND. +(define_predicate "and_operand" + (ior (and (match_code "const_int") + (match_test "rs6000_is_valid_and_mask (op, mode)")) + (if_then_else (match_test "fixed_regs[CR0_REGNO]") + (match_operand 0 "gpc_reg_operand") + (match_operand 0 "logical_operand")))) + +;; Return 1 if the operand is either a logical operand or a short cint operand. +(define_predicate "scc_eq_operand" + (ior (match_operand 0 "logical_operand") + (match_operand 0 "short_cint_operand"))) + +;; Return 1 if the operand is a general non-special register or memory operand. +(define_predicate "reg_or_mem_operand" + (ior (match_operand 0 "memory_operand") + (ior (and (match_code "mem") + (match_test "macho_lo_sum_memory_operand (op, mode)")) + (ior (match_operand 0 "volatile_mem_operand") + (match_operand 0 "gpc_reg_operand"))))) + +;; Return 1 if the operand is either an easy FP constant or memory or reg. +(define_predicate "reg_or_none500mem_operand" + (if_then_else (match_code "mem") + (and (match_test "!TARGET_E500_DOUBLE") + (ior (match_operand 0 "memory_operand") + (ior (match_test "macho_lo_sum_memory_operand (op, mode)") + (match_operand 0 "volatile_mem_operand")))) + (match_operand 0 "gpc_reg_operand"))) + +;; Return 1 if the operand is CONST_DOUBLE 0, register or memory operand. +(define_predicate "zero_reg_mem_operand" + (ior (and (match_test "TARGET_VSX") + (match_operand 0 "zero_fp_constant")) + (match_operand 0 "reg_or_mem_operand"))) + +;; Return 1 if the operand is a CONST_INT and it is the element for 64-bit +;; data types inside of a vector that scalar instructions operate on +(define_predicate "vsx_scalar_64bit" + (match_code "const_int") +{ + return (INTVAL (op) == VECTOR_ELEMENT_SCALAR_64BIT); +}) + +;; Return 1 if the operand is a general register or memory operand without +;; pre_inc or pre_dec or pre_modify, which produces invalid form of PowerPC +;; lwa instruction. +(define_predicate "lwa_operand" + (match_code "reg,subreg,mem") +{ + rtx inner, addr, offset; + + inner = op; + if (reload_completed && GET_CODE (inner) == SUBREG) + inner = SUBREG_REG (inner); + + if (gpc_reg_operand (inner, mode)) + return true; + if (!memory_operand (inner, mode)) + return false; + if (!rs6000_gen_cell_microcode) + return false; + + addr = XEXP (inner, 0); + if (GET_CODE (addr) == PRE_INC + || GET_CODE (addr) == PRE_DEC + || (GET_CODE (addr) == PRE_MODIFY + && !legitimate_indexed_address_p (XEXP (addr, 1), 0))) + return false; + if (GET_CODE (addr) == LO_SUM + && GET_CODE (XEXP (addr, 0)) == REG + && GET_CODE (XEXP (addr, 1)) == CONST) + addr = XEXP (XEXP (addr, 1), 0); + if (GET_CODE (addr) != PLUS) + return true; + offset = XEXP (addr, 1); + if (GET_CODE (offset) != CONST_INT) + return true; + return INTVAL (offset) % 4 == 0; +}) + +;; Return 1 if the operand, used inside a MEM, is a SYMBOL_REF. +(define_predicate "symbol_ref_operand" + (and (match_code "symbol_ref") + (match_test "(mode == VOIDmode || GET_MODE (op) == mode) + && (DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))"))) + +;; Return 1 if op is an operand that can be loaded via the GOT. +;; or non-special register register field no cr0 +(define_predicate "got_operand" + (match_code "symbol_ref,const,label_ref")) + +;; Return 1 if op is a simple reference that can be loaded via the GOT, +;; excluding labels involving addition. +(define_predicate "got_no_const_operand" + (match_code "symbol_ref,label_ref")) + +;; Return 1 if op is a SYMBOL_REF for a TLS symbol. +(define_predicate "rs6000_tls_symbol_ref" + (and (match_code "symbol_ref") + (match_test "RS6000_SYMBOL_REF_TLS_P (op)"))) + +;; Return 1 if the operand, used inside a MEM, is a valid first argument +;; to CALL. This is a SYMBOL_REF, a pseudo-register, LR or CTR. +(define_predicate "call_operand" + (if_then_else (match_code "reg") + (match_test "REGNO (op) == LR_REGNO + || REGNO (op) == CTR_REGNO + || REGNO (op) >= FIRST_PSEUDO_REGISTER") + (match_code "symbol_ref"))) + +;; Return 1 if the operand is a SYMBOL_REF for a function known to be in +;; this file. +(define_predicate "current_file_function_operand" + (and (match_code "symbol_ref") + (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op)) + && (SYMBOL_REF_LOCAL_P (op) + || (op == XEXP (DECL_RTL (current_function_decl), 0) + && !decl_replaceable_p (current_function_decl))) + && !((DEFAULT_ABI == ABI_AIX + || DEFAULT_ABI == ABI_ELFv2) + && (SYMBOL_REF_EXTERNAL_P (op) + || SYMBOL_REF_WEAK (op)))"))) + +;; Return 1 if this operand is a valid input for a move insn. +(define_predicate "input_operand" + (match_code "symbol_ref,const,reg,subreg,mem, + const_double,const_wide_int,const_vector,const_int") +{ + /* Memory is always valid. */ + if (memory_operand (op, mode)) + return 1; + + /* For floating-point, easy constants are valid. */ + if (SCALAR_FLOAT_MODE_P (mode) + && easy_fp_constant (op, mode)) + return 1; + + /* Allow any integer constant. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && CONST_SCALAR_INT_P (op)) + return 1; + + /* Allow easy vector constants. */ + if (GET_CODE (op) == CONST_VECTOR + && easy_vector_constant (op, mode)) + return 1; + + /* Do not allow invalid E500 subregs. */ + if ((TARGET_E500_DOUBLE || TARGET_SPE) + && GET_CODE (op) == SUBREG + && invalid_e500_subreg (op, mode)) + return 0; + + /* For floating-point or multi-word mode, the only remaining valid type + is a register. */ + if (SCALAR_FLOAT_MODE_P (mode) + || GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return register_operand (op, mode); + + /* We don't allow moving the carry bit around. */ + if (ca_operand (op, mode)) + return 0; + + /* The only cases left are integral modes one word or smaller (we + do not get called for MODE_CC values). These can be in any + register. */ + if (register_operand (op, mode)) + return 1; + + /* V.4 allows SYMBOL_REFs and CONSTs that are in the small data region + to be valid. */ + if (DEFAULT_ABI == ABI_V4 + && (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST) + && small_data_operand (op, Pmode)) + return 1; + + return 0; +}) + +;; Return 1 if this operand is a valid input for a vsx_splat insn. +(define_predicate "splat_input_operand" + (match_code "reg,subreg,mem") +{ + machine_mode vmode; + + if (mode == DFmode) + vmode = V2DFmode; + else if (mode == DImode) + vmode = V2DImode; + else if (mode == SImode && TARGET_P9_VECTOR) + vmode = V4SImode; + else if (mode == SFmode && TARGET_P9_VECTOR) + vmode = V4SFmode; + else + return false; + + if (MEM_P (op)) + { + rtx addr = XEXP (op, 0); + + if (! volatile_ok && MEM_VOLATILE_P (op)) + return 0; + + if (reload_in_progress || lra_in_progress || reload_completed) + return indexed_or_indirect_address (addr, vmode); + else + return memory_address_addr_space_p (vmode, addr, MEM_ADDR_SPACE (op)); + } + return gpc_reg_operand (op, mode); +}) + +;; Return true if OP is a non-immediate operand and not an invalid +;; SUBREG operation on the e500. +(define_predicate "rs6000_nonimmediate_operand" + (match_code "reg,subreg,mem") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) + && GET_CODE (op) == SUBREG + && invalid_e500_subreg (op, mode)) + return 0; + + return nonimmediate_operand (op, mode); +}) + +;; Return true if operand is an operator used in rotate-and-mask instructions. +(define_predicate "rotate_mask_operator" + (match_code "rotate,ashift,lshiftrt")) + +;; Return true if operand is boolean operator. +(define_predicate "boolean_operator" + (match_code "and,ior,xor")) + +;; Return true if operand is OR-form of boolean operator. +(define_predicate "boolean_or_operator" + (match_code "ior,xor")) + +;; Return true if operand is an equality operator. +(define_special_predicate "equality_operator" + (match_code "eq,ne")) + +;; Return 1 if OP is a comparison operation that is valid for a branch +;; instruction. We check the opcode against the mode of the CC value. +;; validate_condition_mode is an assertion. +(define_predicate "branch_comparison_operator" + (and (match_operand 0 "comparison_operator") + (and (match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC") + (match_test "validate_condition_mode (GET_CODE (op), + GET_MODE (XEXP (op, 0))), + 1")))) + +;; Return 1 if OP is a valid comparison operator for "cbranch" instructions. +;; If we're assuming that FP operations cannot generate user-visible traps, +;; then on e500 we can use the ordered-signaling instructions to implement +;; the unordered-quiet FP comparison predicates modulo a reversal. +(define_predicate "rs6000_cbranch_operator" + (if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS") + (if_then_else (match_test "flag_trapping_math") + (match_operand 0 "ordered_comparison_operator") + (ior (match_operand 0 "ordered_comparison_operator") + (match_code ("unlt,unle,ungt,unge")))) + (match_operand 0 "comparison_operator"))) + +;; Return 1 if OP is an unsigned comparison operator. +(define_predicate "unsigned_comparison_operator" + (match_code "ltu,gtu,leu,geu")) + +;; Return 1 if OP is a signed comparison operator. +(define_predicate "signed_comparison_operator" + (match_code "lt,gt,le,ge")) + +;; Return 1 if OP is a comparison operation that is valid for an SCC insn -- +;; it must be a positive comparison. +(define_predicate "scc_comparison_operator" + (and (match_operand 0 "branch_comparison_operator") + (match_code "eq,lt,gt,ltu,gtu,unordered"))) + +;; Return 1 if OP is a comparison operation whose inverse would be valid for +;; an SCC insn. +(define_predicate "scc_rev_comparison_operator" + (and (match_operand 0 "branch_comparison_operator") + (match_code "ne,le,ge,leu,geu,ordered"))) + +;; Return 1 if OP is a comparison operator suitable for floating point +;; vector/scalar comparisons that generate a -1/0 mask. +(define_predicate "fpmask_comparison_operator" + (match_code "eq,gt,ge")) + +;; Return 1 if OP is a comparison operator suitable for vector/scalar +;; comparisons that generate a 0/-1 mask (i.e. the inverse of +;; fpmask_comparison_operator). +(define_predicate "invert_fpmask_comparison_operator" + (match_code "ne,unlt,unle")) + +;; Return 1 if OP is a comparison operation suitable for integer vector/scalar +;; comparisons that generate a -1/0 mask. +(define_predicate "vecint_comparison_operator" + (match_code "eq,gt,gtu")) + +;; Return 1 if OP is a comparison operation that is valid for a branch +;; insn, which is true if the corresponding bit in the CC register is set. +(define_predicate "branch_positive_comparison_operator" + (and (match_operand 0 "branch_comparison_operator") + (match_code "eq,lt,gt,ltu,gtu,unordered"))) + +;; Return 1 if OP is a load multiple operation, known to be a PARALLEL. +(define_predicate "load_multiple_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0); + unsigned int dest_regno; + rtx src_addr; + int i; + + /* Perform a quick check so we don't blow up below. */ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM) + return 0; + + dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0))); + src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0); + + for (i = 1; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + + if (GET_CODE (elt) != SET + || GET_CODE (SET_DEST (elt)) != REG + || GET_MODE (SET_DEST (elt)) != SImode + || REGNO (SET_DEST (elt)) != dest_regno + i + || GET_CODE (SET_SRC (elt)) != MEM + || GET_MODE (SET_SRC (elt)) != SImode + || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS + || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr) + || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT + || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != i * 4) + return 0; + } + + return 1; +}) + +;; Return 1 if OP is a store multiple operation, known to be a PARALLEL. +;; The second vector element is a CLOBBER. +(define_predicate "store_multiple_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0) - 1; + unsigned int src_regno; + rtx dest_addr; + int i; + + /* Perform a quick check so we don't blow up below. */ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG) + return 0; + + src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0))); + dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0); + + for (i = 1; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i + 1); + + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != REG + || GET_MODE (SET_SRC (elt)) != SImode + || REGNO (SET_SRC (elt)) != src_regno + i + || GET_CODE (SET_DEST (elt)) != MEM + || GET_MODE (SET_DEST (elt)) != SImode + || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS + || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr) + || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT + || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != i * 4) + return 0; + } + + return 1; +}) + +;; Return 1 if OP is valid for a save_world call in prologue, known to be +;; a PARLLEL. +(define_predicate "save_world_operation" + (match_code "parallel") +{ + int index; + int i; + rtx elt; + int count = XVECLEN (op, 0); + + if (count != 54) + return 0; + + index = 0; + if (GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER + || GET_CODE (XVECEXP (op, 0, index++)) != USE) + return 0; + + for (i=1; i <= 18; i++) + { + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_DEST (elt)) != MEM + || ! memory_operand (SET_DEST (elt), DFmode) + || GET_CODE (SET_SRC (elt)) != REG + || GET_MODE (SET_SRC (elt)) != DFmode) + return 0; + } + + for (i=1; i <= 12; i++) + { + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_DEST (elt)) != MEM + || GET_CODE (SET_SRC (elt)) != REG + || GET_MODE (SET_SRC (elt)) != V4SImode) + return 0; + } + + for (i=1; i <= 19; i++) + { + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_DEST (elt)) != MEM + || ! memory_operand (SET_DEST (elt), Pmode) + || GET_CODE (SET_SRC (elt)) != REG + || GET_MODE (SET_SRC (elt)) != Pmode) + return 0; + } + + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_DEST (elt)) != MEM + || ! memory_operand (SET_DEST (elt), Pmode) + || GET_CODE (SET_SRC (elt)) != REG + || REGNO (SET_SRC (elt)) != CR2_REGNO + || GET_MODE (SET_SRC (elt)) != Pmode) + return 0; + + if (GET_CODE (XVECEXP (op, 0, index++)) != SET + || GET_CODE (XVECEXP (op, 0, index++)) != SET) + return 0; + return 1; +}) + +;; Return 1 if OP is valid for a restore_world call in epilogue, known to be +;; a PARLLEL. +(define_predicate "restore_world_operation" + (match_code "parallel") +{ + int index; + int i; + rtx elt; + int count = XVECLEN (op, 0); + + if (count != 59) + return 0; + + index = 0; + if (GET_CODE (XVECEXP (op, 0, index++)) != RETURN + || GET_CODE (XVECEXP (op, 0, index++)) != USE + || GET_CODE (XVECEXP (op, 0, index++)) != USE + || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER) + return 0; + + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != MEM + || ! memory_operand (SET_SRC (elt), Pmode) + || GET_CODE (SET_DEST (elt)) != REG + || REGNO (SET_DEST (elt)) != CR2_REGNO + || GET_MODE (SET_DEST (elt)) != Pmode) + return 0; + + for (i=1; i <= 19; i++) + { + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != MEM + || ! memory_operand (SET_SRC (elt), Pmode) + || GET_CODE (SET_DEST (elt)) != REG + || GET_MODE (SET_DEST (elt)) != Pmode) + return 0; + } + + for (i=1; i <= 12; i++) + { + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != MEM + || GET_CODE (SET_DEST (elt)) != REG + || GET_MODE (SET_DEST (elt)) != V4SImode) + return 0; + } + + for (i=1; i <= 18; i++) + { + elt = XVECEXP (op, 0, index++); + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != MEM + || ! memory_operand (SET_SRC (elt), DFmode) + || GET_CODE (SET_DEST (elt)) != REG + || GET_MODE (SET_DEST (elt)) != DFmode) + return 0; + } + + if (GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER + || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER + || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER + || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER + || GET_CODE (XVECEXP (op, 0, index++)) != USE) + return 0; + return 1; +}) + +;; Return 1 if OP is valid for a vrsave call, known to be a PARALLEL. +(define_predicate "vrsave_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0); + unsigned int dest_regno, src_regno; + int i; + + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC_VOLATILE + || XINT (SET_SRC (XVECEXP (op, 0, 0)), 1) != UNSPECV_SET_VRSAVE) + return 0; + + dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0))); + src_regno = REGNO (XVECEXP (SET_SRC (XVECEXP (op, 0, 0)), 0, 1)); + + if (dest_regno != VRSAVE_REGNO || src_regno != VRSAVE_REGNO) + return 0; + + for (i = 1; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + + if (GET_CODE (elt) != CLOBBER + && GET_CODE (elt) != SET) + return 0; + } + + return 1; +}) + +;; Return 1 if OP is valid for mfcr insn, known to be a PARALLEL. +(define_predicate "mfcr_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0); + int i; + + /* Perform a quick check so we don't blow up below. */ + if (count < 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC + || XVECLEN (SET_SRC (XVECEXP (op, 0, 0)), 0) != 2) + return 0; + + for (i = 0; i < count; i++) + { + rtx exp = XVECEXP (op, 0, i); + rtx unspec; + int maskval; + rtx src_reg; + + src_reg = XVECEXP (SET_SRC (exp), 0, 0); + + if (GET_CODE (src_reg) != REG + || GET_MODE (src_reg) != CCmode + || ! CR_REGNO_P (REGNO (src_reg))) + return 0; + + if (GET_CODE (exp) != SET + || GET_CODE (SET_DEST (exp)) != REG + || GET_MODE (SET_DEST (exp)) != SImode + || ! INT_REGNO_P (REGNO (SET_DEST (exp)))) + return 0; + unspec = SET_SRC (exp); + maskval = 1 << (MAX_CR_REGNO - REGNO (src_reg)); + + if (GET_CODE (unspec) != UNSPEC + || XINT (unspec, 1) != UNSPEC_MOVESI_FROM_CR + || XVECLEN (unspec, 0) != 2 + || XVECEXP (unspec, 0, 0) != src_reg + || GET_CODE (XVECEXP (unspec, 0, 1)) != CONST_INT + || INTVAL (XVECEXP (unspec, 0, 1)) != maskval) + return 0; + } + return 1; +}) + +;; Return 1 if OP is valid for mtcrf insn, known to be a PARALLEL. +(define_predicate "mtcrf_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0); + int i; + rtx src_reg; + + /* Perform a quick check so we don't blow up below. */ + if (count < 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC + || XVECLEN (SET_SRC (XVECEXP (op, 0, 0)), 0) != 2) + return 0; + src_reg = XVECEXP (SET_SRC (XVECEXP (op, 0, 0)), 0, 0); + + if (GET_CODE (src_reg) != REG + || GET_MODE (src_reg) != SImode + || ! INT_REGNO_P (REGNO (src_reg))) + return 0; + + for (i = 0; i < count; i++) + { + rtx exp = XVECEXP (op, 0, i); + rtx unspec; + int maskval; + + if (GET_CODE (exp) != SET + || GET_CODE (SET_DEST (exp)) != REG + || GET_MODE (SET_DEST (exp)) != CCmode + || ! CR_REGNO_P (REGNO (SET_DEST (exp)))) + return 0; + unspec = SET_SRC (exp); + maskval = 1 << (MAX_CR_REGNO - REGNO (SET_DEST (exp))); + + if (GET_CODE (unspec) != UNSPEC + || XINT (unspec, 1) != UNSPEC_MOVESI_TO_CR + || XVECLEN (unspec, 0) != 2 + || XVECEXP (unspec, 0, 0) != src_reg + || GET_CODE (XVECEXP (unspec, 0, 1)) != CONST_INT + || INTVAL (XVECEXP (unspec, 0, 1)) != maskval) + return 0; + } + return 1; +}) + +;; Return 1 if OP is valid for crsave insn, known to be a PARALLEL. +(define_predicate "crsave_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0); + int i; + + for (i = 1; i < count; i++) + { + rtx exp = XVECEXP (op, 0, i); + + if (GET_CODE (exp) != USE + || GET_CODE (XEXP (exp, 0)) != REG + || GET_MODE (XEXP (exp, 0)) != CCmode + || ! CR_REGNO_P (REGNO (XEXP (exp, 0)))) + return 0; + } + return 1; +}) + +;; Return 1 if OP is valid for lmw insn, known to be a PARALLEL. +(define_predicate "lmw_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0); + unsigned int dest_regno; + rtx src_addr; + unsigned int base_regno; + HOST_WIDE_INT offset; + int i; + + /* Perform a quick check so we don't blow up below. */ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM) + return 0; + + dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0))); + src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0); + + if (dest_regno > 31 + || count != 32 - (int) dest_regno) + return 0; + + if (legitimate_indirect_address_p (src_addr, 0)) + { + offset = 0; + base_regno = REGNO (src_addr); + if (base_regno == 0) + return 0; + } + else if (rs6000_legitimate_offset_address_p (SImode, src_addr, false, false)) + { + offset = INTVAL (XEXP (src_addr, 1)); + base_regno = REGNO (XEXP (src_addr, 0)); + } + else + return 0; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + rtx newaddr; + rtx addr_reg; + HOST_WIDE_INT newoffset; + + if (GET_CODE (elt) != SET + || GET_CODE (SET_DEST (elt)) != REG + || GET_MODE (SET_DEST (elt)) != SImode + || REGNO (SET_DEST (elt)) != dest_regno + i + || GET_CODE (SET_SRC (elt)) != MEM + || GET_MODE (SET_SRC (elt)) != SImode) + return 0; + newaddr = XEXP (SET_SRC (elt), 0); + if (legitimate_indirect_address_p (newaddr, 0)) + { + newoffset = 0; + addr_reg = newaddr; + } + else if (rs6000_legitimate_offset_address_p (SImode, newaddr, false, false)) + { + addr_reg = XEXP (newaddr, 0); + newoffset = INTVAL (XEXP (newaddr, 1)); + } + else + return 0; + if (REGNO (addr_reg) != base_regno + || newoffset != offset + 4 * i) + return 0; + } + + return 1; +}) + +;; Return 1 if OP is valid for stmw insn, known to be a PARALLEL. +(define_predicate "stmw_operation" + (match_code "parallel") +{ + int count = XVECLEN (op, 0); + unsigned int src_regno; + rtx dest_addr; + unsigned int base_regno; + HOST_WIDE_INT offset; + int i; + + /* Perform a quick check so we don't blow up below. */ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG) + return 0; + + src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0))); + dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0); + + if (src_regno > 31 + || count != 32 - (int) src_regno) + return 0; + + if (legitimate_indirect_address_p (dest_addr, 0)) + { + offset = 0; + base_regno = REGNO (dest_addr); + if (base_regno == 0) + return 0; + } + else if (rs6000_legitimate_offset_address_p (SImode, dest_addr, false, false)) + { + offset = INTVAL (XEXP (dest_addr, 1)); + base_regno = REGNO (XEXP (dest_addr, 0)); + } + else + return 0; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + rtx newaddr; + rtx addr_reg; + HOST_WIDE_INT newoffset; + + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != REG + || GET_MODE (SET_SRC (elt)) != SImode + || REGNO (SET_SRC (elt)) != src_regno + i + || GET_CODE (SET_DEST (elt)) != MEM + || GET_MODE (SET_DEST (elt)) != SImode) + return 0; + newaddr = XEXP (SET_DEST (elt), 0); + if (legitimate_indirect_address_p (newaddr, 0)) + { + newoffset = 0; + addr_reg = newaddr; + } + else if (rs6000_legitimate_offset_address_p (SImode, newaddr, false, false)) + { + addr_reg = XEXP (newaddr, 0); + newoffset = INTVAL (XEXP (newaddr, 1)); + } + else + return 0; + if (REGNO (addr_reg) != base_regno + || newoffset != offset + 4 * i) + return 0; + } + + return 1; +}) + +;; Return 1 if OP is a stack tie operand. +(define_predicate "tie_operand" + (match_code "parallel") +{ + return (GET_CODE (XVECEXP (op, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (op, 0, 0), 0)) == MEM + && GET_MODE (XEXP (XVECEXP (op, 0, 0), 0)) == BLKmode + && XEXP (XVECEXP (op, 0, 0), 1) == const0_rtx); +}) + +;; Match a small code model toc reference (or medium and large +;; model toc references before reload). +(define_predicate "small_toc_ref" + (match_code "unspec,plus") +{ + if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), mode)) + op = XEXP (op, 0); + + return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL; +}) + +;; Match the TOC memory operand that can be fused with an addis instruction. +;; This is used in matching a potential fused address before register +;; allocation. +(define_predicate "toc_fusion_mem_raw" + (match_code "mem") +{ + if (!TARGET_TOC_FUSION_INT || !can_create_pseudo_p ()) + return false; + + return small_toc_ref (XEXP (op, 0), Pmode); +}) + +;; Match the memory operand that has been fused with an addis instruction and +;; wrapped inside of an (unspec [...] UNSPEC_FUSION_ADDIS) wrapper. +(define_predicate "toc_fusion_mem_wrapped" + (match_code "mem") +{ + rtx addr; + + if (!TARGET_TOC_FUSION_INT) + return false; + + if (!MEM_P (op)) + return false; + + addr = XEXP (op, 0); + return (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS); +}) + +;; Match the first insn (addis) in fusing the combination of addis and loads to +;; GPR registers on power8. +(define_predicate "fusion_gpr_addis" + (match_code "const_int,high,plus") +{ + HOST_WIDE_INT value; + rtx int_const; + + if (GET_CODE (op) == HIGH) + return 1; + + if (CONST_INT_P (op)) + int_const = op; + + else if (GET_CODE (op) == PLUS + && base_reg_operand (XEXP (op, 0), Pmode) + && CONST_INT_P (XEXP (op, 1))) + int_const = XEXP (op, 1); + + else + return 0; + + value = INTVAL (int_const); + if ((value & (HOST_WIDE_INT)0xffff) != 0) + return 0; + + if ((value & (HOST_WIDE_INT)0xffff0000) == 0) + return 0; + + /* Power8 currently will only do the fusion if the top 11 bits of the addis + value are all 1's or 0's. Ignore this restriction if we are testing + advanced fusion. */ + if (TARGET_P9_FUSION) + return 1; + + return (IN_RANGE (value >> 16, -32, 31)); +}) + +;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis +;; and loads to GPR registers on power8. +(define_predicate "fusion_gpr_mem_load" + (match_code "mem,sign_extend,zero_extend") +{ + rtx addr, base, offset; + + /* Handle sign/zero extend. */ + if (GET_CODE (op) == ZERO_EXTEND + || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND)) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + if (!MEM_P (op)) + return 0; + + switch (mode) + { + case QImode: + case HImode: + case SImode: + break; + + case DImode: + if (!TARGET_POWERPC64) + return 0; + break; + + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + base = XEXP (addr, 0); + if (!base_reg_operand (base, GET_MODE (base))) + return 0; + + offset = XEXP (addr, 1); + + if (GET_CODE (addr) == PLUS) + return satisfies_constraint_I (offset); + + else if (GET_CODE (addr) == LO_SUM) + { + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); + + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); + } + + return 0; +}) + +;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the +;; memory field with both the addis and the memory offset. Sign extension +;; is not handled here, since lha and lwa are not fused. +;; With P9 fusion, also match a fpr/vector load and float_extend +(define_predicate "fusion_addis_mem_combo_load" + (match_code "mem,zero_extend,float_extend") +{ + rtx addr, base, offset; + + /* Handle zero/float extend. */ + if (GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == FLOAT_EXTEND) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + if (!MEM_P (op)) + return 0; + + switch (mode) + { + case QImode: + case HImode: + case SImode: + break; + + /* Do not fuse 64-bit DImode in 32-bit since it splits into two + separate instructions. */ + case DImode: + if (!TARGET_POWERPC64) + return 0; + break; + + /* ISA 2.08/power8 only had fusion of GPR loads. */ + case SFmode: + if (!TARGET_P9_FUSION) + return 0; + break; + + /* ISA 2.08/power8 only had fusion of GPR loads. Do not allow 64-bit + DFmode in 32-bit if -msoft-float since it splits into two separate + instructions. */ + case DFmode: + if ((!TARGET_POWERPC64 && !TARGET_DF_FPR) || !TARGET_P9_FUSION) + return 0; + break; + + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + base = XEXP (addr, 0); + if (!fusion_gpr_addis (base, GET_MODE (base))) + return 0; + + offset = XEXP (addr, 1); + if (GET_CODE (addr) == PLUS) + return satisfies_constraint_I (offset); + + else if (GET_CODE (addr) == LO_SUM) + { + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); + + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); + } + + return 0; +}) + +;; Like fusion_addis_mem_combo_load, but for stores +(define_predicate "fusion_addis_mem_combo_store" + (match_code "mem") +{ + rtx addr, base, offset; + + if (!MEM_P (op) || !TARGET_P9_FUSION) + return 0; + + switch (mode) + { + case QImode: + case HImode: + case SImode: + case SFmode: + break; + + /* Do not fuse 64-bit DImode in 32-bit since it splits into two + separate instructions. */ + case DImode: + if (!TARGET_POWERPC64) + return 0; + break; + + /* Do not allow 64-bit DFmode in 32-bit if -msoft-float since it splits + into two separate instructions. Do allow fusion if we have hardware + floating point. */ + case DFmode: + if (!TARGET_POWERPC64 && !TARGET_DF_FPR) + return 0; + break; + + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + base = XEXP (addr, 0); + if (!fusion_gpr_addis (base, GET_MODE (base))) + return 0; + + offset = XEXP (addr, 1); + if (GET_CODE (addr) == PLUS) + return satisfies_constraint_I (offset); + + else if (GET_CODE (addr) == LO_SUM) + { + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); + + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); + } + + return 0; +}) + +;; Return true if the operand is a float_extend or zero extend of an +;; offsettable memory operand suitable for use in fusion +(define_predicate "fusion_offsettable_mem_operand" + (match_code "mem,zero_extend,float_extend") +{ + if (GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == FLOAT_EXTEND) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + if (!memory_operand (op, mode)) + return 0; + + return offsettable_nonstrict_memref_p (op); +}) diff --git a/gcc/config/powerpcspe/rs64.md b/gcc/config/powerpcspe/rs64.md new file mode 100644 index 000000000000..cdbcab1ddf7d --- /dev/null +++ b/gcc/config/powerpcspe/rs64.md @@ -0,0 +1,162 @@ +;; Scheduling description for IBM RS64 processors. +;; Copyright (C) 2003-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "rs64,rs64fp") +(define_cpu_unit "iu_rs64" "rs64") +(define_cpu_unit "mciu_rs64" "rs64") +(define_cpu_unit "fpu_rs64" "rs64fp") +(define_cpu_unit "lsu_rs64,bpu_rs64" "rs64") + +;; RS64a 64-bit IU, LSU, FPU, BPU + +(define_insn_reservation "rs64a-load" 2 + (and (eq_attr "type" "load") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-store" 2 + (and (eq_attr "type" "store,fpstore") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-fpload" 3 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-llsc" 2 + (and (eq_attr "type" "load_l,store_c") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-integer" 1 + (and (ior (eq_attr "type" "integer,insert,trap,cntlz,isel") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "rs64a")) + "iu_rs64") + +(define_insn_reservation "rs64a-two" 1 + (and (eq_attr "type" "two") + (eq_attr "cpu" "rs64a")) + "iu_rs64,iu_rs64") + +(define_insn_reservation "rs64a-three" 1 + (and (eq_attr "type" "three") + (eq_attr "cpu" "rs64a")) + "iu_rs64,iu_rs64,iu_rs64") + +(define_insn_reservation "rs64a-imul" 20 + (and (eq_attr "type" "mul") + (eq_attr "size" "32") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*13") + +(define_insn_reservation "rs64a-imul2" 12 + (and (eq_attr "type" "mul") + (eq_attr "size" "16") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*5") + +(define_insn_reservation "rs64a-imul3" 8 + (and (eq_attr "type" "mul") + (eq_attr "size" "8") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*2") + +(define_insn_reservation "rs64a-lmul" 34 + (and (eq_attr "type" "mul") + (eq_attr "size" "64") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*34") + +(define_insn_reservation "rs64a-idiv" 66 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*66") + +(define_insn_reservation "rs64a-ldiv" 66 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*66") + +(define_insn_reservation "rs64a-compare" 3 + (and (ior (eq_attr "type" "cmp") + (and (eq_attr "type" "add,logical,shift,exts") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "rs64a")) + "iu_rs64,nothing,bpu_rs64") + +(define_insn_reservation "rs64a-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64,bpu_rs64") + +(define_insn_reservation "rs64a-fp" 4 + (and (eq_attr "type" "fp,fpsimple,dmul") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64") + +(define_insn_reservation "rs64a-sdiv" 31 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64*31") + +(define_insn_reservation "rs64a-sqrt" 49 + (and (eq_attr "type" "ssqrt,dsqrt") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64*49") + +(define_insn_reservation "rs64a-mfcr" 2 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-mtjmpr" 3 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-mfjmpr" 2 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr") + (eq_attr "cpu" "rs64a")) + "bpu_rs64") + +(define_insn_reservation "rs64a-isync" 6 + (and (eq_attr "type" "isync") + (eq_attr "cpu" "rs64a")) + "bpu_rs64") + +(define_insn_reservation "rs64a-sync" 1 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + diff --git a/gcc/config/powerpcspe/rtems.h b/gcc/config/powerpcspe/rtems.h new file mode 100644 index 000000000000..54a36de6eb45 --- /dev/null +++ b/gcc/config/powerpcspe/rtems.h @@ -0,0 +1,60 @@ +/* Definitions for rtems targeting a PowerPC using elf. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Joel Sherrill (joel@OARcorp.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Specify predefined symbols in preprocessor. */ + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("PPC"); \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + while (0) + +#undef TARGET_LIBGCC_SDATA_SECTION +#define TARGET_LIBGCC_SDATA_SECTION ".sdata" + +#undef CPP_OS_DEFAULT_SPEC +#define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)" + +#define CPP_OS_RTEMS_SPEC "\ +%{!mcpu*: %{!Dppc*: %{!Dmpc*: -Dmpc750} } }\ +%{mcpu=403: %{!Dppc*: %{!Dmpc*: -Dppc403} } } \ +%{mcpu=505: %{!Dppc*: %{!Dmpc*: -Dmpc505} } } \ +%{mcpu=601: %{!Dppc*: %{!Dmpc*: -Dppc601} } } \ +%{mcpu=602: %{!Dppc*: %{!Dmpc*: -Dppc602} } } \ +%{mcpu=603: %{!Dppc*: %{!Dmpc*: -Dppc603} } } \ +%{mcpu=603e: %{!Dppc*: %{!Dmpc*: -Dppc603e} } } \ +%{mcpu=604: %{!Dppc*: %{!Dmpc*: -Dmpc604} } } \ +%{mcpu=750: %{!Dppc*: %{!Dmpc*: -Dmpc750} } } \ +%{mcpu=821: %{!Dppc*: %{!Dmpc*: -Dmpc821} } } \ +%{mcpu=860: %{!Dppc*: %{!Dmpc*: -Dmpc860} } } \ +%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540} } } \ +%{mcpu=e6500: -D__PPC_CPU_E6500__}" + +#undef SUBSUBTARGET_EXTRA_SPECS +#define SUBSUBTARGET_EXTRA_SPECS \ + { "cpp_os_rtems", CPP_OS_RTEMS_SPEC } diff --git a/gcc/config/powerpcspe/secureplt.h b/gcc/config/powerpcspe/secureplt.h new file mode 100644 index 000000000000..2d9d3dfcf579 --- /dev/null +++ b/gcc/config/powerpcspe/secureplt.h @@ -0,0 +1,21 @@ +/* Default to -msecure-plt. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define CC1_SECURE_PLT_DEFAULT_SPEC "-msecure-plt" +#define LINK_SECURE_PLT_DEFAULT_SPEC "--secure-plt" diff --git a/gcc/config/powerpcspe/si2vmx.h b/gcc/config/powerpcspe/si2vmx.h new file mode 100644 index 000000000000..6b8cca783c74 --- /dev/null +++ b/gcc/config/powerpcspe/si2vmx.h @@ -0,0 +1,2048 @@ +/* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics + Copyright (C) 2007-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SI2VMX_H_ +#define _SI2VMX_H_ 1 + +#ifndef __SPU__ + +#include <stdlib.h> +#include <vec_types.h> + + +/* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics. + * Users can override the action by defining it prior to including this + * header file. + */ +#ifndef SPU_HALT_ACTION +#define SPU_HALT_ACTION abort() +#endif + +/* Specify a default stop action for the spu_stop intrinsic. + * Users can override the action by defining it prior to including this + * header file. + */ +#ifndef SPU_STOP_ACTION +#define SPU_STOP_ACTION abort() +#endif + + +/* Specify a default action for unsupported intrinsic. + * Users can override the action by defining it prior to including this + * header file. + */ +#ifndef SPU_UNSUPPORTED_ACTION +#define SPU_UNSUPPORTED_ACTION abort() +#endif + + +/* Casting intrinsics - from scalar to quadword + */ + +static __inline qword si_from_uchar(unsigned char c) { + union { + qword q; + unsigned char c[16]; + } x; + x.c[3] = c; + return (x.q); +} + +static __inline qword si_from_char(signed char c) { + union { + qword q; + signed char c[16]; + } x; + x.c[3] = c; + return (x.q); +} + +static __inline qword si_from_ushort(unsigned short s) { + union { + qword q; + unsigned short s[8]; + } x; + x.s[1] = s; + return (x.q); +} + +static __inline qword si_from_short(short s) { + union { + qword q; + short s[8]; + } x; + x.s[1] = s; + return (x.q); +} + + +static __inline qword si_from_uint(unsigned int i) { + union { + qword q; + unsigned int i[4]; + } x; + x.i[0] = i; + return (x.q); +} + +static __inline qword si_from_int(int i) { + union { + qword q; + int i[4]; + } x; + x.i[0] = i; + return (x.q); +} + +static __inline qword si_from_ullong(unsigned long long l) { + union { + qword q; + unsigned long long l[2]; + } x; + x.l[0] = l; + return (x.q); +} + +static __inline qword si_from_llong(long long l) { + union { + qword q; + long long l[2]; + } x; + x.l[0] = l; + return (x.q); +} + +static __inline qword si_from_float(float f) { + union { + qword q; + float f[4]; + } x; + x.f[0] = f; + return (x.q); +} + +static __inline qword si_from_double(double d) { + union { + qword q; + double d[2]; + } x; + x.d[0] = d; + return (x.q); +} + +static __inline qword si_from_ptr(void *ptr) { + union { + qword q; + void *p; + } x; + x.p = ptr; + return (x.q); +} + + +/* Casting intrinsics - from quadword to scalar + */ +static __inline unsigned char si_to_uchar(qword q) { + union { + qword q; + unsigned char c[16]; + } x; + x.q = q; + return (x.c[3]); +} + +static __inline signed char si_to_char(qword q) { + union { + qword q; + signed char c[16]; + } x; + x.q = q; + return (x.c[3]); +} + +static __inline unsigned short si_to_ushort(qword q) { + union { + qword q; + unsigned short s[8]; + } x; + x.q = q; + return (x.s[1]); +} + +static __inline short si_to_short(qword q) { + union { + qword q; + short s[8]; + } x; + x.q = q; + return (x.s[1]); +} + +static __inline unsigned int si_to_uint(qword q) { + union { + qword q; + unsigned int i[4]; + } x; + x.q = q; + return (x.i[0]); +} + +static __inline int si_to_int(qword q) { + union { + qword q; + int i[4]; + } x; + x.q = q; + return (x.i[0]); +} + +static __inline unsigned long long si_to_ullong(qword q) { + union { + qword q; + unsigned long long l[2]; + } x; + x.q = q; + return (x.l[0]); +} + +static __inline long long si_to_llong(qword q) { + union { + qword q; + long long l[2]; + } x; + x.q = q; + return (x.l[0]); +} + +static __inline float si_to_float(qword q) { + union { + qword q; + float f[4]; + } x; + x.q = q; + return (x.f[0]); +} + +static __inline double si_to_double(qword q) { + union { + qword q; + double d[2]; + } x; + x.q = q; + return (x.d[0]); +} + +static __inline void * si_to_ptr(qword q) { + union { + qword q; + void *p; + } x; + x.q = q; + return (x.p); +} + + +/* Absolute difference + */ +static __inline qword si_absdb(qword a, qword b) +{ + vec_uchar16 ac, bc, dc; + + ac = (vec_uchar16)(a); + bc = (vec_uchar16)(b); + dc = vec_sel(vec_sub(bc, ac), vec_sub(ac, bc), vec_cmpgt(ac, bc)); + + return ((qword)(dc)); +} + +/* Add intrinsics + */ +#define si_a(_a, _b) ((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b)))) + +#define si_ah(_a, _b) ((qword)(vec_add((vec_ushort8)(_a), (vec_ushort8)(_b)))) + +static __inline qword si_ai(qword a, int b) +{ + return ((qword)(vec_add((vec_int4)(a), + vec_splat((vec_int4)(si_from_int(b)), 0)))); +} + + +static __inline qword si_ahi(qword a, short b) +{ + return ((qword)(vec_add((vec_short8)(a), + vec_splat((vec_short8)(si_from_short(b)), 1)))); +} + + +#define si_fa(_a, _b) ((qword)(vec_add((vec_float4)(_a), (vec_float4)(_b)))) + + +static __inline qword si_dfa(qword a, qword b) +{ + union { + vec_double2 v; + double d[2]; + } ad, bd, dd; + + ad.v = (vec_double2)(a); + bd.v = (vec_double2)(b); + dd.d[0] = ad.d[0] + bd.d[0]; + dd.d[1] = ad.d[1] + bd.d[1]; + + return ((qword)(dd.v)); +} + +/* Add word extended + */ +#define si_addx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \ + vec_and((vec_uint4)(_c), vec_splat_u32(1))))) + + +/* Bit-wise AND + */ +#define si_and(_a, _b) ((qword)(vec_and((vec_uint4)(_a), (vec_uint4)(_b)))) + + +static __inline qword si_andbi(qword a, signed char b) +{ + return ((qword)(vec_and((vec_char16)(a), + vec_splat((vec_char16)(si_from_char(b)), 3)))); +} + +static __inline qword si_andhi(qword a, signed short b) +{ + return ((qword)(vec_and((vec_short8)(a), + vec_splat((vec_short8)(si_from_short(b)), 1)))); +} + + +static __inline qword si_andi(qword a, signed int b) +{ + return ((qword)(vec_and((vec_int4)(a), + vec_splat((vec_int4)(si_from_int(b)), 0)))); +} + + +/* Bit-wise AND with complement + */ +#define si_andc(_a, _b) ((qword)(vec_andc((vec_uchar16)(_a), (vec_uchar16)(_b)))) + + +/* Average byte vectors + */ +#define si_avgb(_a, _b) ((qword)(vec_avg((vec_uchar16)(_a), (vec_uchar16)(_b)))) + + +/* Branch indirect and set link on external data + */ +#define si_bisled(_func) /* not mappable */ +#define si_bisledd(_func) /* not mappable */ +#define si_bislede(_func) /* not mappable */ + + +/* Borrow generate + */ +#define si_bg(_a, _b) ((qword)(vec_subc((vec_uint4)(_b), (vec_uint4)(_a)))) + +#define si_bgx(_a, _b, _c) ((qword)(vec_and(vec_or(vec_cmpgt((vec_uint4)(_b), (vec_uint4)(_a)), \ + vec_and(vec_cmpeq((vec_uint4)(_b), (vec_uint4)(_a)), \ + (vec_uint4)(_c))), vec_splat_u32(1)))) + +/* Compare absolute equal + */ +static __inline qword si_fcmeq(qword a, qword b) +{ + vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000}); + + return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb), + vec_andc((vec_float4)(b), msb)))); +} + +static __inline qword si_dfcmeq(qword a, qword b) +{ + vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; + vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 }; + vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27}; + + vec_uint4 biteq; + vec_uint4 aabs; + vec_uint4 babs; + vec_uint4 a_gt; + vec_uint4 ahi_inf; + vec_uint4 anan; + vec_uint4 result; + + union { + vec_uchar16 v; + int i[4]; + } x; + + /* Shift 4 bytes */ + x.i[3] = 4 << 3; + + /* Mask out sign bits */ + aabs = vec_and((vec_uint4)a,sign_mask); + babs = vec_and((vec_uint4)b,sign_mask); + + /* A) Check for bit equality, store in high word */ + biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs); + biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v)); + + /* + B) Check if a is NaN, store in high word + + B1) If the high word is greater than max_exp (indicates a NaN) + B2) If the low word is greater than 0 + */ + a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask); + + /* B3) Check if the high word is equal to the inf exponent */ + ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask); + + /* anan = B1[hi] or (B2[lo] and B3[hi]) */ + anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf)); + + /* result = A and not B */ + result = vec_andc(biteq, anan); + + /* Promote high words to 64 bits and return */ + return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote))); +} + + +/* Compare absolute greater than + */ +static __inline qword si_fcmgt(qword a, qword b) +{ + vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000}); + + return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb), + vec_andc((vec_float4)(b), msb)))); +} + +static __inline qword si_dfcmgt(qword a, qword b) +{ + vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; + vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 }; + vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; + + union { + vec_uchar16 v; + int i[4]; + } x; + + /* Shift 4 bytes */ + x.i[3] = 4 << 3; + + // absolute value of a,b + vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask); + vec_uint4 babs = vec_and((vec_uint4)b, sign_mask); + + // check if a is nan + vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask); + vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask); + a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf)); + a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); + + // check if b is nan + vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask); + vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask); + b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf)); + b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi); + + // A) Check if the exponents are different + vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs); + + // B) Check if high word equal, and low word greater + vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aabs, (vec_uint4)babs); + vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs); + vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v)); + + // If either A or B is true, return true (unless NaNs detected) + vec_uint4 r = vec_or(gt_hi, eqgt); + + // splat the high words of the comparison step + r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi); + + // correct for NaNs in input + return ((qword)vec_andc(r,vec_or(a_nan,b_nan))); +} + + +/* Compare equal + */ +static __inline qword si_ceqb(qword a, qword b) +{ + return ((qword)(vec_cmpeq((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +static __inline qword si_ceqh(qword a, qword b) +{ + return ((qword)(vec_cmpeq((vec_ushort8)(a), (vec_ushort8)(b)))); +} + +static __inline qword si_ceq(qword a, qword b) +{ + return ((qword)(vec_cmpeq((vec_uint4)(a), (vec_uint4)(b)))); +} + +static __inline qword si_fceq(qword a, qword b) +{ + return ((qword)(vec_cmpeq((vec_float4)(a), (vec_float4)(b)))); +} + +static __inline qword si_ceqbi(qword a, signed char b) +{ + return ((qword)(vec_cmpeq((vec_char16)(a), + vec_splat((vec_char16)(si_from_char(b)), 3)))); +} + +static __inline qword si_ceqhi(qword a, signed short b) +{ + return ((qword)(vec_cmpeq((vec_short8)(a), + vec_splat((vec_short8)(si_from_short(b)), 1)))); +} + +static __inline qword si_ceqi(qword a, signed int b) +{ + return ((qword)(vec_cmpeq((vec_int4)(a), + vec_splat((vec_int4)(si_from_int(b)), 0)))); +} + +static __inline qword si_dfceq(qword a, qword b) +{ + vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; + vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 }; + vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3, 16,17,18,19, 8,9,10,11, 24,25,26,27}; + + vec_uint4 biteq; + vec_uint4 aabs; + vec_uint4 babs; + vec_uint4 a_gt; + vec_uint4 ahi_inf; + vec_uint4 anan; + vec_uint4 iszero; + vec_uint4 result; + + union { + vec_uchar16 v; + int i[4]; + } x; + + /* Shift 4 bytes */ + x.i[3] = 4 << 3; + + /* A) Check for bit equality, store in high word */ + biteq = (vec_uint4) vec_cmpeq((vec_uint4)a,(vec_uint4)b); + biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v)); + + /* Mask out sign bits */ + aabs = vec_and((vec_uint4)a,sign_mask); + babs = vec_and((vec_uint4)b,sign_mask); + + /* + B) Check if a is NaN, store in high word + + B1) If the high word is greater than max_exp (indicates a NaN) + B2) If the low word is greater than 0 + */ + a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask); + + /* B3) Check if the high word is equal to the inf exponent */ + ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask); + + /* anan = B1[hi] or (B2[lo] and B3[hi]) */ + anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf)); + + /* C) Check for 0 = -0 special case */ + iszero =(vec_uint4)vec_cmpeq((vec_uint4)vec_or(aabs,babs),(vec_uint4)vec_splat_u32(0)); + iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v)); + + /* result = (A or C) and not B */ + result = vec_or(biteq,iszero); + result = vec_andc(result, anan); + + /* Promote high words to 64 bits and return */ + return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote))); +} + + +/* Compare greater than + */ +static __inline qword si_cgtb(qword a, qword b) +{ + return ((qword)(vec_cmpgt((vec_char16)(a), (vec_char16)(b)))); +} + +static __inline qword si_cgth(qword a, qword b) +{ + return ((qword)(vec_cmpgt((vec_short8)(a), (vec_short8)(b)))); +} + +static __inline qword si_cgt(qword a, qword b) +{ + return ((qword)(vec_cmpgt((vec_int4)(a), (vec_int4)(b)))); +} + +static __inline qword si_clgtb(qword a, qword b) +{ + return ((qword)(vec_cmpgt((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +static __inline qword si_clgth(qword a, qword b) +{ + return ((qword)(vec_cmpgt((vec_ushort8)(a), (vec_ushort8)(b)))); +} + +static __inline qword si_clgt(qword a, qword b) +{ + return ((qword)(vec_cmpgt((vec_uint4)(a), (vec_uint4)(b)))); +} + +static __inline qword si_fcgt(qword a, qword b) +{ + return ((qword)(vec_cmpgt((vec_float4)(a), (vec_float4)(b)))); +} + +static __inline qword si_dfcgt(qword a, qword b) +{ + vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; + vec_uchar16 borrow_shuffle = (vec_uchar16) { 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192 }; + vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 }; + vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; + + union { + vec_uchar16 v; + int i[4]; + } x; + + /* Shift 4 bytes */ + x.i[3] = 4 << 3; + + // absolute value of a,b + vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask); + vec_uint4 babs = vec_and((vec_uint4)b, sign_mask); + + // check if a is nan + vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask); + vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask); + a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf)); + a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); + + // check if b is nan + vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask); + vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask); + b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf)); + b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi); + + // sign of a + vec_uint4 asel = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0)); + asel = (vec_uint4)vec_perm((vec_uchar16)asel,(vec_uchar16)asel,splat_hi); + + // sign of b + vec_uint4 bsel = (vec_uint4)vec_sra((vec_int4)(b), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0)); + bsel = (vec_uint4)vec_perm((vec_uchar16)bsel,(vec_uchar16)bsel,splat_hi); + + // negative a + vec_uint4 abor = vec_subc((vec_uint4)vec_splat_u32(0), aabs); + vec_uchar16 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), vec_sr(borrow_shuffle, vec_splat_u8(3)), vec_sra(borrow_shuffle, vec_splat_u8(7))); + abor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)abor, (vec_uchar16)abor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat)); + vec_uint4 aneg = vec_add(vec_add(vec_splat_u32(0), vec_nor(aabs, aabs)), vec_and(abor, vec_splat_u32(1))); + + // pick the one we want + vec_int4 aval = (vec_int4)vec_sel((vec_uchar16)aabs, (vec_uchar16)aneg, (vec_uchar16)asel); + + // negative b + vec_uint4 bbor = vec_subc((vec_uint4)vec_splat_u32(0), babs); + bbor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)bbor, (vec_uchar16)bbor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat)); + vec_uint4 bneg = vec_add(vec_nor(babs, babs), vec_and(bbor, vec_splat_u32(1))); + + // pick the one we want + vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel); + + // A) Check if the exponents are different + vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval); + + // B) Check if high word equal, and low word greater + vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aval, (vec_uint4)bval); + vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval); + vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v)); + + // If either A or B is true, return true (unless NaNs detected) + vec_uint4 r = vec_or(gt_hi, eqgt); + + // splat the high words of the comparison step + r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi); + + // correct for NaNs in input + return ((qword)vec_andc(r,vec_or(a_nan,b_nan))); +} + +static __inline qword si_cgtbi(qword a, signed char b) +{ + return ((qword)(vec_cmpgt((vec_char16)(a), + vec_splat((vec_char16)(si_from_char(b)), 3)))); +} + +static __inline qword si_cgthi(qword a, signed short b) +{ + return ((qword)(vec_cmpgt((vec_short8)(a), + vec_splat((vec_short8)(si_from_short(b)), 1)))); +} + +static __inline qword si_cgti(qword a, signed int b) +{ + return ((qword)(vec_cmpgt((vec_int4)(a), + vec_splat((vec_int4)(si_from_int(b)), 0)))); +} + +static __inline qword si_clgtbi(qword a, unsigned char b) +{ + return ((qword)(vec_cmpgt((vec_uchar16)(a), + vec_splat((vec_uchar16)(si_from_uchar(b)), 3)))); +} + +static __inline qword si_clgthi(qword a, unsigned short b) +{ + return ((qword)(vec_cmpgt((vec_ushort8)(a), + vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); +} + +static __inline qword si_clgti(qword a, unsigned int b) +{ + return ((qword)(vec_cmpgt((vec_uint4)(a), + vec_splat((vec_uint4)(si_from_uint(b)), 0)))); +} + +static __inline qword si_dftsv(qword a, char b) +{ + vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; + vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF }; + vec_uint4 result = (vec_uint4){0}; + vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0)); + sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi); + vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask); + + union { + vec_uchar16 v; + int i[4]; + } x; + + /* Shift 4 bytes */ + x.i[3] = 4 << 3; + + /* Nan or +inf or -inf */ + if (b & 0x70) + { + vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 }; + vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask); + /* NaN */ + if (b & 0x40) + { + vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask); + a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf)); + a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); + result = vec_or(result, a_nan); + } + /* inf */ + if (b & 0x30) + { + a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf); + a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi); + /* +inf */ + if (b & 0x20) + result = vec_or(vec_andc(a_inf, sign), result); + /* -inf */ + if (b & 0x10) + result = vec_or(vec_and(a_inf, sign), result); + } + } + /* 0 or denorm */ + if (b & 0xF) + { + vec_uint4 iszero =(vec_uint4)vec_cmpeq(aabs,(vec_uint4)vec_splat_u32(0)); + iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v)); + /* denorm */ + if (b & 0x3) + { + vec_uint4 denorm_mask = (vec_uint4){0xFFFFF, 0xFFFFF, 0xFFFFF, 0xFFFFF}; + vec_uint4 isdenorm = vec_nor((vec_uint4)vec_cmpgt(aabs, denorm_mask), iszero); + isdenorm = (vec_uint4)vec_perm((vec_uchar16)isdenorm, (vec_uchar16)isdenorm, splat_hi); + /* +denorm */ + if (b & 0x2) + result = vec_or(vec_andc(isdenorm, sign), result); + /* -denorm */ + if (b & 0x1) + result = vec_or(vec_and(isdenorm, sign), result); + } + /* 0 */ + if (b & 0xC) + { + iszero = (vec_uint4)vec_perm((vec_uchar16)iszero, (vec_uchar16)iszero, splat_hi); + /* +0 */ + if (b & 0x8) + result = vec_or(vec_andc(iszero, sign), result); + /* -0 */ + if (b & 0x4) + result = vec_or(vec_and(iszero, sign), result); + } + } + return ((qword)result); +} + + +/* Carry generate + */ +#define si_cg(_a, _b) ((qword)(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)))) + +#define si_cgx(_a, _b, _c) ((qword)(vec_or(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)), \ + vec_addc(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), \ + vec_and((vec_uint4)(_c), vec_splat_u32(1)))))) + + +/* Count ones for bytes + */ +static __inline qword si_cntb(qword a) +{ + vec_uchar16 nib_cnt = (vec_uchar16){0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; + vec_uchar16 four = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; + vec_uchar16 av; + + av = (vec_uchar16)(a); + + return ((qword)(vec_add(vec_perm(nib_cnt, nib_cnt, av), + vec_perm(nib_cnt, nib_cnt, vec_sr (av, four))))); +} + +/* Count ones for bytes + */ +static __inline qword si_clz(qword a) +{ + vec_uchar16 av; + vec_uchar16 cnt_hi, cnt_lo, cnt, tmp1, tmp2, tmp3; + vec_uchar16 four = vec_splat_u8(4); + vec_uchar16 nib_cnt = (vec_uchar16){4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}; + vec_uchar16 eight = vec_splat_u8(8); + vec_uchar16 sixteen = (vec_uchar16){16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}; + vec_uchar16 twentyfour = (vec_uchar16){24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24}; + + av = (vec_uchar16)(a); + + cnt_hi = vec_perm(nib_cnt, nib_cnt, vec_sr(av, four)); + cnt_lo = vec_perm(nib_cnt, nib_cnt, av); + + cnt = vec_add(cnt_hi, vec_and(cnt_lo, vec_cmpeq(cnt_hi, four))); + + tmp1 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(eight)); + tmp2 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(sixteen)); + tmp3 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(twentyfour)); + + cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight))); + cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen))); + cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour))); + + return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour)))); +} + +/* Convert to float + */ +#define si_cuflt(_a, _b) ((qword)(vec_ctf((vec_uint4)(_a), _b))) +#define si_csflt(_a, _b) ((qword)(vec_ctf((vec_int4)(_a), _b))) + +/* Convert to signed int + */ +#define si_cflts(_a, _b) ((qword)(vec_cts((vec_float4)(_a), _b))) + +/* Convert to unsigned int + */ +#define si_cfltu(_a, _b) ((qword)(vec_ctu((vec_float4)(_a), _b))) + +/* Synchronize + */ +#define si_dsync() /* do nothing */ +#define si_sync() /* do nothing */ +#define si_syncc() /* do nothing */ + + +/* Equivalence + */ +static __inline qword si_eqv(qword a, qword b) +{ + vec_uchar16 d; + + d = vec_xor((vec_uchar16)(a), (vec_uchar16)(b)); + return ((qword)(vec_nor(d, d))); +} + +/* Extend + */ +static __inline qword si_xsbh(qword a) +{ + vec_char16 av; + + av = (vec_char16)(a); + return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15, + 0, 0, 0, 0, 0, 0, 0, 0}))))); +} + +static __inline qword si_xshw(qword a) +{ + vec_short8 av; + + av = (vec_short8)(a); + return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7, + 10,11,14,15, + 0, 0, 0, 0, + 0, 0, 0, 0}))))); +} + +static __inline qword si_xswd(qword a) +{ + vec_int4 av; + + av = (vec_int4)(a); + return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})), + ((vec_uchar16){20, 21, 22, 23, + 4, 5, 6, 7, + 28, 29, 30, 31, + 12, 13, 14, 15})))); +} + +static __inline qword si_fesd(qword a) +{ + union { + double d[2]; + vec_double2 vd; + } out; + union { + float f[4]; + vec_float4 vf; + } in; + + in.vf = (vec_float4)(a); + out.d[0] = (double)(in.f[0]); + out.d[1] = (double)(in.f[2]); + return ((qword)(out.vd)); +} + +/* Gather + */ +static __inline qword si_gbb(qword a) +{ + vec_uchar16 bits; + vec_uint4 bytes; + + bits = vec_sl(vec_and((vec_uchar16)(a), vec_splat_u8(1)), ((vec_uchar16){7, 6, 5, 4, 3, 2, 1, 0, + 7, 6, 5, 4, 3, 2, 1, 0})); + bytes = (vec_uint4)vec_sum2s((vec_int4)(vec_sum4s(bits, ((vec_uint4){0}))), ((vec_int4){0})); + + return ((qword)(vec_perm(bytes, bytes, ((vec_uchar16){0, 0, 7,15, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0})))); +} + + +static __inline qword si_gbh(qword a) +{ + vec_ushort8 bits; + vec_uint4 bytes; + + bits = vec_sl(vec_and((vec_ushort8)(a), vec_splat_u16(1)), ((vec_ushort8){7, 6, 5, 4, 3, 2, 1, 0})); + + bytes = (vec_uint4)vec_sums((vec_int4)(vec_sum4s((vec_short8)(bits), (vec_int4){0})), (vec_int4){0}); + + return ((qword)(vec_sld(bytes, bytes, 12))); +} + +static __inline qword si_gb(qword a) +{ + vec_uint4 bits; + vec_uint4 bytes; + + bits = vec_sl(vec_and((vec_uint4)(a), vec_splat_u32(1)), ((vec_uint4){3, 2, 1, 0})); + bytes = (vec_uint4)vec_sums((vec_int4)(bits), ((vec_int4){0})); + return ((qword)(vec_sld(bytes, bytes, 12))); +} + + +/* Compare and halt + */ +static __inline void si_heq(qword a, qword b) +{ + union { + vector unsigned int v; + unsigned int i[4]; + } aa, bb; + + aa.v = (vector unsigned int)(a); + bb.v = (vector unsigned int)(b); + + if (aa.i[0] == bb.i[0]) { SPU_HALT_ACTION; }; +} + +static __inline void si_heqi(qword a, unsigned int b) +{ + union { + vector unsigned int v; + unsigned int i[4]; + } aa; + + aa.v = (vector unsigned int)(a); + + if (aa.i[0] == b) { SPU_HALT_ACTION; }; +} + +static __inline void si_hgt(qword a, qword b) +{ + union { + vector signed int v; + signed int i[4]; + } aa, bb; + + aa.v = (vector signed int)(a); + bb.v = (vector signed int)(b); + + if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; }; +} + +static __inline void si_hgti(qword a, signed int b) +{ + union { + vector signed int v; + signed int i[4]; + } aa; + + aa.v = (vector signed int)(a); + + if (aa.i[0] > b) { SPU_HALT_ACTION; }; +} + +static __inline void si_hlgt(qword a, qword b) +{ + union { + vector unsigned int v; + unsigned int i[4]; + } aa, bb; + + aa.v = (vector unsigned int)(a); + bb.v = (vector unsigned int)(b); + + if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; }; +} + +static __inline void si_hlgti(qword a, unsigned int b) +{ + union { + vector unsigned int v; + unsigned int i[4]; + } aa; + + aa.v = (vector unsigned int)(a); + + if (aa.i[0] > b) { SPU_HALT_ACTION; }; +} + + +/* Multiply and Add + */ +static __inline qword si_mpya(qword a, qword b, qword c) +{ + return ((qword)(vec_msum(vec_and((vec_short8)(a), + ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})), + (vec_short8)(b), (vec_int4)(c)))); +} + +static __inline qword si_fma(qword a, qword b, qword c) +{ + return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), (vec_float4)(c)))); +} + +static __inline qword si_dfma(qword a, qword b, qword c) +{ + union { + vec_double2 v; + double d[2]; + } aa, bb, cc, dd; + + aa.v = (vec_double2)(a); + bb.v = (vec_double2)(b); + cc.v = (vec_double2)(c); + dd.d[0] = aa.d[0] * bb.d[0] + cc.d[0]; + dd.d[1] = aa.d[1] * bb.d[1] + cc.d[1]; + return ((qword)(dd.v)); +} + +/* Form Mask + */ +#define si_fsmbi(_a) si_fsmb(si_from_int(_a)) + +static __inline qword si_fsmb(qword a) +{ + vec_char16 mask; + vec_ushort8 in; + + in = (vec_ushort8)(a); + mask = (vec_char16)(vec_perm(in, in, ((vec_uchar16){2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3}))); + return ((qword)(vec_sra(vec_sl(mask, ((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7})), + vec_splat_u8(7)))); +} + + +static __inline qword si_fsmh(qword a) +{ + vec_uchar16 in; + vec_short8 mask; + + in = (vec_uchar16)(a); + mask = (vec_short8)(vec_splat(in, 3)); + return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})), + vec_splat_u16(15)))); +} + +static __inline qword si_fsm(qword a) +{ + vec_uchar16 in; + vec_int4 mask; + + in = (vec_uchar16)(a); + mask = (vec_int4)(vec_splat(in, 3)); + return ((qword)(vec_sra(vec_sl(mask, ((vec_uint4){28, 29, 30, 31})), + ((vec_uint4){31,31,31,31})))); +} + +/* Move from/to registers + */ +#define si_fscrrd() ((qword)((vec_uint4){0})) +#define si_fscrwr(_a) + +#define si_mfspr(_reg) ((qword)((vec_uint4){0})) +#define si_mtspr(_reg, _a) + +/* Multiply High High Add + */ +static __inline qword si_mpyhha(qword a, qword b, qword c) +{ + return ((qword)(vec_add(vec_mule((vec_short8)(a), (vec_short8)(b)), (vec_int4)(c)))); +} + +static __inline qword si_mpyhhau(qword a, qword b, qword c) +{ + return ((qword)(vec_add(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)), (vec_uint4)(c)))); +} + +/* Multiply Subtract + */ +static __inline qword si_fms(qword a, qword b, qword c) +{ + return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), + vec_sub(((vec_float4){0.0f}), (vec_float4)(c))))); +} + +static __inline qword si_dfms(qword a, qword b, qword c) +{ + union { + vec_double2 v; + double d[2]; + } aa, bb, cc, dd; + + aa.v = (vec_double2)(a); + bb.v = (vec_double2)(b); + cc.v = (vec_double2)(c); + dd.d[0] = aa.d[0] * bb.d[0] - cc.d[0]; + dd.d[1] = aa.d[1] * bb.d[1] - cc.d[1]; + return ((qword)(dd.v)); +} + +/* Multiply + */ +static __inline qword si_fm(qword a, qword b) +{ + return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), ((vec_float4){0.0f})))); +} + +static __inline qword si_dfm(qword a, qword b) +{ + union { + vec_double2 v; + double d[2]; + } aa, bb, dd; + + aa.v = (vec_double2)(a); + bb.v = (vec_double2)(b); + dd.d[0] = aa.d[0] * bb.d[0]; + dd.d[1] = aa.d[1] * bb.d[1]; + return ((qword)(dd.v)); +} + +/* Multiply High + */ +static __inline qword si_mpyh(qword a, qword b) +{ + vec_uint4 sixteen = (vec_uint4){16, 16, 16, 16}; + + return ((qword)(vec_sl(vec_mule((vec_short8)(a), (vec_short8)(vec_sl((vec_uint4)(b), sixteen))), sixteen))); +} + + +/* Multiply High High + */ +static __inline qword si_mpyhh(qword a, qword b) +{ + return ((qword)(vec_mule((vec_short8)(a), (vec_short8)(b)))); +} + +static __inline qword si_mpyhhu(qword a, qword b) +{ + return ((qword)(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)))); +} + +/* Multiply Odd + */ +static __inline qword si_mpy(qword a, qword b) +{ + return ((qword)(vec_mulo((vec_short8)(a), (vec_short8)(b)))); +} + +static __inline qword si_mpyu(qword a, qword b) +{ + return ((qword)(vec_mulo((vec_ushort8)(a), (vec_ushort8)(b)))); +} + +static __inline qword si_mpyi(qword a, short b) +{ + return ((qword)(vec_mulo((vec_short8)(a), + vec_splat((vec_short8)(si_from_short(b)), 1)))); +} + +static __inline qword si_mpyui(qword a, unsigned short b) +{ + return ((qword)(vec_mulo((vec_ushort8)(a), + vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); +} + +/* Multiply and Shift Right + */ +static __inline qword si_mpys(qword a, qword b) +{ + return ((qword)(vec_sra(vec_mulo((vec_short8)(a), (vec_short8)(b)), ((vec_uint4){16,16,16,16})))); +} + +/* Nand + */ +static __inline qword si_nand(qword a, qword b) +{ + vec_uchar16 d; + + d = vec_and((vec_uchar16)(a), (vec_uchar16)(b)); + return ((qword)(vec_nor(d, d))); +} + +/* Negative Multiply Add + */ +static __inline qword si_dfnma(qword a, qword b, qword c) +{ + union { + vec_double2 v; + double d[2]; + } aa, bb, cc, dd; + + aa.v = (vec_double2)(a); + bb.v = (vec_double2)(b); + cc.v = (vec_double2)(c); + dd.d[0] = -cc.d[0] - aa.d[0] * bb.d[0]; + dd.d[1] = -cc.d[1] - aa.d[1] * bb.d[1]; + return ((qword)(dd.v)); +} + +/* Negative Multiply and Subtract + */ +static __inline qword si_fnms(qword a, qword b, qword c) +{ + return ((qword)(vec_nmsub((vec_float4)(a), (vec_float4)(b), (vec_float4)(c)))); +} + +static __inline qword si_dfnms(qword a, qword b, qword c) +{ + union { + vec_double2 v; + double d[2]; + } aa, bb, cc, dd; + + aa.v = (vec_double2)(a); + bb.v = (vec_double2)(b); + cc.v = (vec_double2)(c); + dd.d[0] = cc.d[0] - aa.d[0] * bb.d[0]; + dd.d[1] = cc.d[1] - aa.d[1] * bb.d[1]; + return ((qword)(dd.v)); +} + +/* Nor + */ +static __inline qword si_nor(qword a, qword b) +{ + return ((qword)(vec_nor((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +/* Or + */ +static __inline qword si_or(qword a, qword b) +{ + return ((qword)(vec_or((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +static __inline qword si_orbi(qword a, unsigned char b) +{ + return ((qword)(vec_or((vec_uchar16)(a), + vec_splat((vec_uchar16)(si_from_uchar(b)), 3)))); +} + +static __inline qword si_orhi(qword a, unsigned short b) +{ + return ((qword)(vec_or((vec_ushort8)(a), + vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); +} + +static __inline qword si_ori(qword a, unsigned int b) +{ + return ((qword)(vec_or((vec_uint4)(a), + vec_splat((vec_uint4)(si_from_uint(b)), 0)))); +} + +/* Or Complement + */ +static __inline qword si_orc(qword a, qword b) +{ + return ((qword)(vec_or((vec_uchar16)(a), vec_nor((vec_uchar16)(b), (vec_uchar16)(b))))); +} + + +/* Or Across + */ +static __inline qword si_orx(qword a) +{ + vec_uchar16 tmp; + tmp = (vec_uchar16)(a); + tmp = vec_or(tmp, vec_sld(tmp, tmp, 8)); + tmp = vec_or(tmp, vec_sld(tmp, tmp, 4)); + return ((qword)(vec_and(tmp, ((vec_uchar16){0xFF,0xFF,0xFF,0xFF, 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00})))); +} + + +/* Estimates + */ +static __inline qword si_frest(qword a) +{ + return ((qword)(vec_re((vec_float4)(a)))); +} + +static __inline qword si_frsqest(qword a) +{ + return ((qword)(vec_rsqrte((vec_float4)(a)))); +} + +#define si_fi(_a, _d) (_d) + +/* Channel Read and Write + */ +#define si_rdch(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */ +#define si_rchcnt(_channel) ((qword)(vec_splat_u8(0))) /* not mappable */ +#define si_wrch(_channel, _a) /* not mappable */ + +/* Rotate Left + */ +static __inline qword si_roth(qword a, qword b) +{ + return ((qword)(vec_rl((vec_ushort8)(a), (vec_ushort8)(b)))); +} + +static __inline qword si_rot(qword a, qword b) +{ + return ((qword)(vec_rl((vec_uint4)(a), (vec_uint4)(b)))); +} + +static __inline qword si_rothi(qword a, int b) +{ + return ((qword)(vec_rl((vec_ushort8)(a), + vec_splat((vec_ushort8)(si_from_int(b)), 1)))); +} + +static __inline qword si_roti(qword a, int b) +{ + return ((qword)(vec_rl((vec_uint4)(a), + vec_splat((vec_uint4)(si_from_int(b)), 0)))); +} + +/* Rotate Left with Mask + */ +static __inline qword si_rothm(qword a, qword b) +{ + vec_ushort8 neg_b; + vec_ushort8 mask; + + neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b)); + mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); + return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask))); +} + +static __inline qword si_rotm(qword a, qword b) +{ + vec_uint4 neg_b; + vec_uint4 mask; + + neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b)); + mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); + return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask))); +} + +static __inline qword si_rothmi(qword a, int b) +{ + vec_ushort8 neg_b; + vec_ushort8 mask; + + neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1); + mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); + return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask))); +} + +static __inline qword si_rotmi(qword a, int b) +{ + vec_uint4 neg_b; + vec_uint4 mask; + + neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0); + mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); + return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask))); +} + + +/* Rotate Left Algebraic with Mask + */ +static __inline qword si_rotmah(qword a, qword b) +{ + vec_ushort8 neg_b; + vec_ushort8 mask; + + neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b)); + mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); + return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask)))); +} + +static __inline qword si_rotma(qword a, qword b) +{ + vec_uint4 neg_b; + vec_uint4 mask; + + neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b)); + mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); + return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask)))); +} + + +static __inline qword si_rotmahi(qword a, int b) +{ + vec_ushort8 neg_b; + vec_ushort8 mask; + + neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1); + mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15)); + return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask)))); +} + +static __inline qword si_rotmai(qword a, int b) +{ + vec_uint4 neg_b; + vec_uint4 mask; + + neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0); + mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); + return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask)))); +} + + +/* Rotate Left Quadword by Bytes with Mask + */ +static __inline qword si_rotqmbyi(qword a, int count) +{ + union { + vec_uchar16 v; + int i[4]; + } x; + vec_uchar16 mask; + + count = 0 - count; + x.i[3] = count << 3; + mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1); + + return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask))); +} + + +static __inline qword si_rotqmby(qword a, qword count) +{ + union { + vec_uchar16 v; + int i[4]; + } x; + int cnt; + vec_uchar16 mask; + + x.v = (vec_uchar16)(count); + x.i[0] = cnt = (0 - x.i[0]) << 3; + + x.v = vec_splat(x.v, 3); + mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); + + return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask))); +} + + +/* Rotate Left Quadword by Bytes + */ +static __inline qword si_rotqbyi(qword a, int count) +{ + union { + vec_uchar16 v; + int i[4]; + } left, right; + + count <<= 3; + left.i[3] = count; + right.i[3] = 0 - count; + return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left.v), vec_sro((vec_uchar16)(a), right.v)))); +} + +static __inline qword si_rotqby(qword a, qword count) +{ + vec_uchar16 left, right; + + left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3)); + right = vec_sub(vec_splat_u8(0), left); + return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right)))); +} + +/* Rotate Left Quadword by Bytes Bit Count + */ +static __inline qword si_rotqbybi(qword a, qword count) +{ + vec_uchar16 left, right; + + left = vec_splat((vec_uchar16)(count), 3); + right = vec_sub(vec_splat_u8(7), left); + return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right)))); +} + + +/* Rotate Left Quadword by Bytes Bit Count + */ +static __inline qword si_rotqbii(qword a, int count) +{ + vec_uchar16 x, y; + vec_uchar16 result; + + x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3); + y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))), + (vec_uint4)vec_sub(vec_splat_u8(8), x))); + result = vec_or(vec_sll((qword)(a), x), y); + return ((qword)(result)); +} + +static __inline qword si_rotqbi(qword a, qword count) +{ + vec_uchar16 x, y; + vec_uchar16 result; + + x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7)); + y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))), + (vec_uint4)vec_sub(vec_splat_u8(8), x))); + + result = vec_or(vec_sll((qword)(a), x), y); + return ((qword)(result)); +} + + +/* Rotate Left Quadword and Mask by Bits + */ +static __inline qword si_rotqmbii(qword a, int count) +{ + return ((qword)(vec_srl((vec_uchar16)(a), vec_splat((vec_uchar16)(si_from_int(0 - count)), 3)))); +} + +static __inline qword si_rotqmbi(qword a, qword count) +{ + return ((qword)(vec_srl((vec_uchar16)(a), vec_sub(vec_splat_u8(0), vec_splat((vec_uchar16)(count), 3))))); +} + + +/* Rotate Left Quadword and Mask by Bytes with Bit Count + */ +static __inline qword si_rotqmbybi(qword a, qword count) +{ + union { + vec_uchar16 v; + int i[4]; + } x; + int cnt; + vec_uchar16 mask; + + x.v = (vec_uchar16)(count); + x.i[0] = cnt = 0 - (x.i[0] & ~7); + x.v = vec_splat(x.v, 3); + mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); + + return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask))); +} + + + + +/* Round Double to Float + */ +static __inline qword si_frds(qword a) +{ + union { + vec_float4 v; + float f[4]; + } d; + union { + vec_double2 v; + double d[2]; + } in; + + in.v = (vec_double2)(a); + d.v = (vec_float4){0.0f}; + d.f[0] = (float)in.d[0]; + d.f[2] = (float)in.d[1]; + + return ((qword)(d.v)); +} + +/* Select Bits + */ +static __inline qword si_selb(qword a, qword b, qword c) +{ + return ((qword)(vec_sel((vec_uchar16)(a), (vec_uchar16)(b), (vec_uchar16)(c)))); +} + + +/* Shuffle Bytes + */ +static __inline qword si_shufb(qword a, qword b, qword pattern) +{ + vec_uchar16 pat; + + pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), + vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)), + vec_sra((vec_uchar16)(pattern), vec_splat_u8(7))); + return ((qword)(vec_perm(vec_perm(a, b, pattern), + ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, + 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}), + pat))); +} + + +/* Shift Left + */ +static __inline qword si_shlh(qword a, qword b) +{ + vec_ushort8 mask; + + mask = (vec_ushort8)vec_sra(vec_sl((vec_ushort8)(b), vec_splat_u16(11)), vec_splat_u16(15)); + return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), (vec_ushort8)(b)), mask))); +} + +static __inline qword si_shl(qword a, qword b) +{ + vec_uint4 mask; + + mask = (vec_uint4)vec_sra(vec_sl((vec_uint4)(b), ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); + return ((qword)(vec_andc(vec_sl((vec_uint4)(a), (vec_uint4)(b)), mask))); +} + + +static __inline qword si_shlhi(qword a, unsigned int b) +{ + vec_ushort8 mask; + vec_ushort8 bv; + + bv = vec_splat((vec_ushort8)(si_from_int(b)), 1); + mask = (vec_ushort8)vec_sra(vec_sl(bv, vec_splat_u16(11)), vec_splat_u16(15)); + return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), bv), mask))); +} + +static __inline qword si_shli(qword a, unsigned int b) +{ + vec_uint4 bv; + vec_uint4 mask; + + bv = vec_splat((vec_uint4)(si_from_uint(b)), 0); + mask = (vec_uint4)vec_sra(vec_sl(bv, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31})); + return ((qword)(vec_andc(vec_sl((vec_uint4)(a), bv), mask))); +} + + +/* Shift Left Quadword + */ +static __inline qword si_shlqbii(qword a, unsigned int count) +{ + vec_uchar16 x; + + x = vec_splat((vec_uchar16)(si_from_uint(count)), 3); + return ((qword)(vec_sll((vec_uchar16)(a), x))); +} + +static __inline qword si_shlqbi(qword a, qword count) +{ + vec_uchar16 x; + + x = vec_splat((vec_uchar16)(count), 3); + return ((qword)(vec_sll((vec_uchar16)(a), x))); +} + + +/* Shift Left Quadword by Bytes + */ +static __inline qword si_shlqbyi(qword a, unsigned int count) +{ + union { + vec_uchar16 v; + int i[4]; + } x; + vec_uchar16 mask; + + x.i[3] = count << 3; + mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1); + return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask))); +} + +static __inline qword si_shlqby(qword a, qword count) +{ + union { + vec_uchar16 v; + unsigned int i[4]; + } x; + unsigned int cnt; + vec_uchar16 mask; + + x.v = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3)); + cnt = x.i[0]; + mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); + return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask))); +} + +/* Shift Left Quadword by Bytes with Bit Count + */ +static __inline qword si_shlqbybi(qword a, qword count) +{ + union { + vec_uchar16 v; + int i[4]; + } x; + unsigned int cnt; + vec_uchar16 mask; + + x.v = vec_splat((vec_uchar16)(count), 3); + cnt = x.i[0]; + mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1); + return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask))); +} + + +/* Stop and Signal + */ +#define si_stop(_type) SPU_STOP_ACTION +#define si_stopd(a, b, c) SPU_STOP_ACTION + + +/* Subtract + */ +static __inline qword si_sfh(qword a, qword b) +{ + return ((qword)(vec_sub((vec_ushort8)(b), (vec_ushort8)(a)))); +} + +static __inline qword si_sf(qword a, qword b) +{ + return ((qword)(vec_sub((vec_uint4)(b), (vec_uint4)(a)))); +} + +static __inline qword si_fs(qword a, qword b) +{ + return ((qword)(vec_sub((vec_float4)(a), (vec_float4)(b)))); +} + +static __inline qword si_dfs(qword a, qword b) +{ + union { + vec_double2 v; + double d[2]; + } aa, bb, dd; + + aa.v = (vec_double2)(a); + bb.v = (vec_double2)(b); + dd.d[0] = aa.d[0] - bb.d[0]; + dd.d[1] = aa.d[1] - bb.d[1]; + return ((qword)(dd.v)); +} + +static __inline qword si_sfhi(qword a, short b) +{ + return ((qword)(vec_sub(vec_splat((vec_short8)(si_from_short(b)), 1), + (vec_short8)(a)))); +} + +static __inline qword si_sfi(qword a, int b) +{ + return ((qword)(vec_sub(vec_splat((vec_int4)(si_from_int(b)), 0), + (vec_int4)(a)))); +} + +/* Subtract word extended + */ +#define si_sfx(_a, _b, _c) ((qword)(vec_add(vec_add((vec_uint4)(_b), \ + vec_nor((vec_uint4)(_a), (vec_uint4)(_a))), \ + vec_and((vec_uint4)(_c), vec_splat_u32(1))))) + + +/* Sum Bytes into Shorts + */ +static __inline qword si_sumb(qword a, qword b) +{ + vec_uint4 zero = (vec_uint4){0}; + vec_ushort8 sum_a, sum_b; + + sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero); + sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero); + + return ((qword)(vec_perm(sum_a, sum_b, ((vec_uchar16){18, 19, 2, 3, 22, 23, 6, 7, + 26, 27, 10, 11, 30, 31, 14, 15})))); +} + +/* Exclusive OR + */ +static __inline qword si_xor(qword a, qword b) +{ + return ((qword)(vec_xor((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +static __inline qword si_xorbi(qword a, unsigned char b) +{ + return ((qword)(vec_xor((vec_uchar16)(a), + vec_splat((vec_uchar16)(si_from_uchar(b)), 3)))); +} + +static __inline qword si_xorhi(qword a, unsigned short b) +{ + return ((qword)(vec_xor((vec_ushort8)(a), + vec_splat((vec_ushort8)(si_from_ushort(b)), 1)))); +} + +static __inline qword si_xori(qword a, unsigned int b) +{ + return ((qword)(vec_xor((vec_uint4)(a), + vec_splat((vec_uint4)(si_from_uint(b)), 0)))); +} + + +/* Generate Controls for Sub-Quadword Insertion + */ +static __inline qword si_cbd(qword a, int imm) +{ + union { + vec_uint4 v; + unsigned char c[16]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.c[(si_to_uint(a) + (unsigned int)(imm)) & 0xF] = 0x03; + return ((qword)(shmask.v)); +} + +static __inline qword si_cdd(qword a, int imm) +{ + union { + vec_uint4 v; + unsigned long long ll[2]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.ll[((si_to_uint(a) + (unsigned int)(imm)) >> 3) & 0x1] = 0x0001020304050607ULL; + return ((qword)(shmask.v)); +} + +static __inline qword si_chd(qword a, int imm) +{ + union { + vec_uint4 v; + unsigned short s[8]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.s[((si_to_uint(a) + (unsigned int)(imm)) >> 1) & 0x7] = 0x0203; + return ((qword)(shmask.v)); +} + +static __inline qword si_cwd(qword a, int imm) +{ + union { + vec_uint4 v; + unsigned int i[4]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.i[((si_to_uint(a) + (unsigned int)(imm)) >> 2) & 0x3] = 0x00010203; + return ((qword)(shmask.v)); +} + +static __inline qword si_cbx(qword a, qword b) +{ + union { + vec_uint4 v; + unsigned char c[16]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.c[si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) & 0xF] = 0x03; + return ((qword)(shmask.v)); +} + + +static __inline qword si_cdx(qword a, qword b) +{ + union { + vec_uint4 v; + unsigned long long ll[2]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.ll[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 3) & 0x1] = 0x0001020304050607ULL; + return ((qword)(shmask.v)); +} + +static __inline qword si_chx(qword a, qword b) +{ + union { + vec_uint4 v; + unsigned short s[8]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.s[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 1) & 0x7] = 0x0203; + return ((qword)(shmask.v)); +} + +static __inline qword si_cwx(qword a, qword b) +{ + union { + vec_uint4 v; + unsigned int i[4]; + } shmask; + + shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F}); + shmask.i[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 2) & 0x3] = 0x00010203; + return ((qword)(shmask.v)); +} + + +/* Constant Formation + */ +static __inline qword si_il(signed short imm) +{ + return ((qword)(vec_splat((vec_int4)(si_from_int((signed int)(imm))), 0))); +} + + +static __inline qword si_ila(unsigned int imm) +{ + return ((qword)(vec_splat((vec_uint4)(si_from_uint(imm)), 0))); +} + +static __inline qword si_ilh(signed short imm) +{ + return ((qword)(vec_splat((vec_short8)(si_from_short(imm)), 1))); +} + +static __inline qword si_ilhu(signed short imm) +{ + return ((qword)(vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm) << 16)), 0))); +} + +static __inline qword si_iohl(qword a, unsigned short imm) +{ + return ((qword)(vec_or((vec_uint4)(a), vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm))), 0)))); +} + +/* No Operation + */ +#define si_lnop() /* do nothing */ +#define si_nop() /* do nothing */ + + +/* Memory Load and Store + */ +static __inline qword si_lqa(unsigned int imm) +{ + return ((qword)(vec_ld(0, (vector unsigned char *)(imm)))); +} + +static __inline qword si_lqd(qword a, unsigned int imm) +{ + return ((qword)(vec_ld(si_to_uint(a) & ~0xF, (vector unsigned char *)(imm)))); +} + +static __inline qword si_lqr(unsigned int imm) +{ + return ((qword)(vec_ld(0, (vector unsigned char *)(imm)))); +} + +static __inline qword si_lqx(qword a, qword b) +{ + return ((qword)(vec_ld(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))), (vector unsigned char *)(0)))); +} + +static __inline void si_stqa(qword a, unsigned int imm) +{ + vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm)); +} + +static __inline void si_stqd(qword a, qword b, unsigned int imm) +{ + vec_st((vec_uchar16)(a), si_to_uint(b) & ~0xF, (vector unsigned char *)(imm)); +} + +static __inline void si_stqr(qword a, unsigned int imm) +{ + vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm)); +} + +static __inline void si_stqx(qword a, qword b, qword c) +{ + vec_st((vec_uchar16)(a), + si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))), + (vector unsigned char *)(0)); +} + +#endif /* !__SPU__ */ +#endif /* !_SI2VMX_H_ */ + diff --git a/gcc/config/powerpcspe/singlefp.h b/gcc/config/powerpcspe/singlefp.h new file mode 100644 index 000000000000..3a5f87ba0fdb --- /dev/null +++ b/gcc/config/powerpcspe/singlefp.h @@ -0,0 +1,40 @@ +/* Definitions for PowerPC single-precision floating point unit + such as Xilinx PowerPC 405/440 APU. + + Copyright (C) 2008-2017 Free Software Foundation, Inc. + Contributed by Michael Eager (eager@eagercon.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + +/* Undefine definitions from rs6000.h. */ +#undef TARGET_SINGLE_FLOAT +#undef TARGET_DOUBLE_FLOAT +#undef TARGET_SINGLE_FPU +#undef TARGET_SIMPLE_FPU +#undef UNITS_PER_FP_WORD + +/* FPU operations supported. + If TARGET_SINGLE_FPU set, processor supports single fp options. */ +#define TARGET_SINGLE_FLOAT (rs6000_single_float) +#define TARGET_DOUBLE_FLOAT (rs6000_double_float) +#define TARGET_SINGLE_FPU 1 +#define TARGET_SIMPLE_FPU (rs6000_simple_fpu) + +/* FP word width depends on single/double fp support. */ +#define UNITS_PER_FP_WORD ((TARGET_SOFT_FLOAT || TARGET_DOUBLE_FLOAT) ? 8 : 4) + diff --git a/gcc/config/powerpcspe/spe.h b/gcc/config/powerpcspe/spe.h new file mode 100644 index 000000000000..3d556c08ad88 --- /dev/null +++ b/gcc/config/powerpcspe/spe.h @@ -0,0 +1,1107 @@ +/* PowerPC E500 user include file. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez (aldyh@redhat.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SPE_H +#define _SPE_H + +#define __vector __attribute__((vector_size(8))) + +typedef int int32_t; +typedef unsigned uint32_t; +typedef short int16_t; +typedef unsigned short uint16_t; +typedef long long int64_t; +typedef unsigned long long uint64_t; + +typedef short __vector __ev64_s16__; +typedef unsigned short __vector __ev64_u16__; +typedef int __vector __ev64_s32__; +typedef unsigned __vector __ev64_u32__; +typedef long long __vector __ev64_s64__; +typedef unsigned long long __vector __ev64_u64__; +typedef float __vector __ev64_fs__; + +#define __v2si __ev64_opaque__ +#define __v2sf __ev64_fs__ + +#define __ev_addw __builtin_spe_evaddw +#define __ev_addiw __builtin_spe_evaddiw +#define __ev_subfw(a,b) __builtin_spe_evsubfw ((b), (a)) +#define __ev_subw __builtin_spe_evsubfw +#define __ev_subifw(a,b) __builtin_spe_evsubifw ((b), (a)) +#define __ev_subiw __builtin_spe_evsubifw +#define __ev_abs __builtin_spe_evabs +#define __ev_neg __builtin_spe_evneg +#define __ev_extsb __builtin_spe_evextsb +#define __ev_extsh __builtin_spe_evextsh +#define __ev_and __builtin_spe_evand +#define __ev_or __builtin_spe_evor +#define __ev_xor __builtin_spe_evxor +#define __ev_nand __builtin_spe_evnand +#define __ev_nor __builtin_spe_evnor +#define __ev_eqv __builtin_spe_eveqv +#define __ev_andc __builtin_spe_evandc +#define __ev_orc __builtin_spe_evorc +#define __ev_rlw __builtin_spe_evrlw +#define __ev_rlwi __builtin_spe_evrlwi +#define __ev_slw __builtin_spe_evslw +#define __ev_slwi __builtin_spe_evslwi +#define __ev_srws __builtin_spe_evsrws +#define __ev_srwu __builtin_spe_evsrwu +#define __ev_srwis __builtin_spe_evsrwis +#define __ev_srwiu __builtin_spe_evsrwiu +#define __ev_cntlzw __builtin_spe_evcntlzw +#define __ev_cntlsw __builtin_spe_evcntlsw +#define __ev_rndw __builtin_spe_evrndw +#define __ev_mergehi __builtin_spe_evmergehi +#define __ev_mergelo __builtin_spe_evmergelo +#define __ev_mergelohi __builtin_spe_evmergelohi +#define __ev_mergehilo __builtin_spe_evmergehilo +#define __ev_splati __builtin_spe_evsplati +#define __ev_splatfi __builtin_spe_evsplatfi +#define __ev_divws __builtin_spe_evdivws +#define __ev_divwu __builtin_spe_evdivwu +#define __ev_mra __builtin_spe_evmra + +#define __brinc __builtin_spe_brinc + +/* Loads. */ + +#define __ev_lddx __builtin_spe_evlddx +#define __ev_ldwx __builtin_spe_evldwx +#define __ev_ldhx __builtin_spe_evldhx +#define __ev_lwhex __builtin_spe_evlwhex +#define __ev_lwhoux __builtin_spe_evlwhoux +#define __ev_lwhosx __builtin_spe_evlwhosx +#define __ev_lwwsplatx __builtin_spe_evlwwsplatx +#define __ev_lwhsplatx __builtin_spe_evlwhsplatx +#define __ev_lhhesplatx __builtin_spe_evlhhesplatx +#define __ev_lhhousplatx __builtin_spe_evlhhousplatx +#define __ev_lhhossplatx __builtin_spe_evlhhossplatx +#define __ev_ldd __builtin_spe_evldd +#define __ev_ldw __builtin_spe_evldw +#define __ev_ldh __builtin_spe_evldh +#define __ev_lwhe __builtin_spe_evlwhe +#define __ev_lwhou __builtin_spe_evlwhou +#define __ev_lwhos __builtin_spe_evlwhos +#define __ev_lwwsplat __builtin_spe_evlwwsplat +#define __ev_lwhsplat __builtin_spe_evlwhsplat +#define __ev_lhhesplat __builtin_spe_evlhhesplat +#define __ev_lhhousplat __builtin_spe_evlhhousplat +#define __ev_lhhossplat __builtin_spe_evlhhossplat + +/* Stores. */ + +#define __ev_stddx __builtin_spe_evstddx +#define __ev_stdwx __builtin_spe_evstdwx +#define __ev_stdhx __builtin_spe_evstdhx +#define __ev_stwwex __builtin_spe_evstwwex +#define __ev_stwwox __builtin_spe_evstwwox +#define __ev_stwhex __builtin_spe_evstwhex +#define __ev_stwhox __builtin_spe_evstwhox +#define __ev_stdd __builtin_spe_evstdd +#define __ev_stdw __builtin_spe_evstdw +#define __ev_stdh __builtin_spe_evstdh +#define __ev_stwwe __builtin_spe_evstwwe +#define __ev_stwwo __builtin_spe_evstwwo +#define __ev_stwhe __builtin_spe_evstwhe +#define __ev_stwho __builtin_spe_evstwho + +/* Fixed point complex. */ + +#define __ev_mhossf __builtin_spe_evmhossf +#define __ev_mhosmf __builtin_spe_evmhosmf +#define __ev_mhosmi __builtin_spe_evmhosmi +#define __ev_mhoumi __builtin_spe_evmhoumi +#define __ev_mhessf __builtin_spe_evmhessf +#define __ev_mhesmf __builtin_spe_evmhesmf +#define __ev_mhesmi __builtin_spe_evmhesmi +#define __ev_mheumi __builtin_spe_evmheumi +#define __ev_mhossfa __builtin_spe_evmhossfa +#define __ev_mhosmfa __builtin_spe_evmhosmfa +#define __ev_mhosmia __builtin_spe_evmhosmia +#define __ev_mhoumia __builtin_spe_evmhoumia +#define __ev_mhessfa __builtin_spe_evmhessfa +#define __ev_mhesmfa __builtin_spe_evmhesmfa +#define __ev_mhesmia __builtin_spe_evmhesmia +#define __ev_mheumia __builtin_spe_evmheumia + +#define __ev_mhoumf __ev_mhoumi +#define __ev_mheumf __ev_mheumi +#define __ev_mhoumfa __ev_mhoumia +#define __ev_mheumfa __ev_mheumia + +#define __ev_mhossfaaw __builtin_spe_evmhossfaaw +#define __ev_mhossiaaw __builtin_spe_evmhossiaaw +#define __ev_mhosmfaaw __builtin_spe_evmhosmfaaw +#define __ev_mhosmiaaw __builtin_spe_evmhosmiaaw +#define __ev_mhousiaaw __builtin_spe_evmhousiaaw +#define __ev_mhoumiaaw __builtin_spe_evmhoumiaaw +#define __ev_mhessfaaw __builtin_spe_evmhessfaaw +#define __ev_mhessiaaw __builtin_spe_evmhessiaaw +#define __ev_mhesmfaaw __builtin_spe_evmhesmfaaw +#define __ev_mhesmiaaw __builtin_spe_evmhesmiaaw +#define __ev_mheusiaaw __builtin_spe_evmheusiaaw +#define __ev_mheumiaaw __builtin_spe_evmheumiaaw + +#define __ev_mhousfaaw __ev_mhousiaaw +#define __ev_mhoumfaaw __ev_mhoumiaaw +#define __ev_mheusfaaw __ev_mheusiaaw +#define __ev_mheumfaaw __ev_mheumiaaw + +#define __ev_mhossfanw __builtin_spe_evmhossfanw +#define __ev_mhossianw __builtin_spe_evmhossianw +#define __ev_mhosmfanw __builtin_spe_evmhosmfanw +#define __ev_mhosmianw __builtin_spe_evmhosmianw +#define __ev_mhousianw __builtin_spe_evmhousianw +#define __ev_mhoumianw __builtin_spe_evmhoumianw +#define __ev_mhessfanw __builtin_spe_evmhessfanw +#define __ev_mhessianw __builtin_spe_evmhessianw +#define __ev_mhesmfanw __builtin_spe_evmhesmfanw +#define __ev_mhesmianw __builtin_spe_evmhesmianw +#define __ev_mheusianw __builtin_spe_evmheusianw +#define __ev_mheumianw __builtin_spe_evmheumianw + +#define __ev_mhousfanw __ev_mhousianw +#define __ev_mhoumfanw __ev_mhoumianw +#define __ev_mheusfanw __ev_mheusianw +#define __ev_mheumfanw __ev_mheumianw + +#define __ev_mhogsmfaa __builtin_spe_evmhogsmfaa +#define __ev_mhogsmiaa __builtin_spe_evmhogsmiaa +#define __ev_mhogumiaa __builtin_spe_evmhogumiaa +#define __ev_mhegsmfaa __builtin_spe_evmhegsmfaa +#define __ev_mhegsmiaa __builtin_spe_evmhegsmiaa +#define __ev_mhegumiaa __builtin_spe_evmhegumiaa + +#define __ev_mhogumfaa __ev_mhogumiaa +#define __ev_mhegumfaa __ev_mhegumiaa + +#define __ev_mhogsmfan __builtin_spe_evmhogsmfan +#define __ev_mhogsmian __builtin_spe_evmhogsmian +#define __ev_mhogumian __builtin_spe_evmhogumian +#define __ev_mhegsmfan __builtin_spe_evmhegsmfan +#define __ev_mhegsmian __builtin_spe_evmhegsmian +#define __ev_mhegumian __builtin_spe_evmhegumian + +#define __ev_mhogumfan __ev_mhogumian +#define __ev_mhegumfan __ev_mhegumian + +#define __ev_mwhssf __builtin_spe_evmwhssf +#define __ev_mwhsmf __builtin_spe_evmwhsmf +#define __ev_mwhsmi __builtin_spe_evmwhsmi +#define __ev_mwhumi __builtin_spe_evmwhumi +#define __ev_mwhssfa __builtin_spe_evmwhssfa +#define __ev_mwhsmfa __builtin_spe_evmwhsmfa +#define __ev_mwhsmia __builtin_spe_evmwhsmia +#define __ev_mwhumia __builtin_spe_evmwhumia + +#define __ev_mwhumf __ev_mwhumi +#define __ev_mwhumfa __ev_mwhumia + +#define __ev_mwlumi __builtin_spe_evmwlumi +#define __ev_mwlumia __builtin_spe_evmwlumia +#define __ev_mwlumiaaw __builtin_spe_evmwlumiaaw + +#define __ev_mwlssiaaw __builtin_spe_evmwlssiaaw +#define __ev_mwlsmiaaw __builtin_spe_evmwlsmiaaw +#define __ev_mwlusiaaw __builtin_spe_evmwlusiaaw +#define __ev_mwlusiaaw __builtin_spe_evmwlusiaaw + +#define __ev_mwlssianw __builtin_spe_evmwlssianw +#define __ev_mwlsmianw __builtin_spe_evmwlsmianw +#define __ev_mwlusianw __builtin_spe_evmwlusianw +#define __ev_mwlumianw __builtin_spe_evmwlumianw + +#define __ev_mwssf __builtin_spe_evmwssf +#define __ev_mwsmf __builtin_spe_evmwsmf +#define __ev_mwsmi __builtin_spe_evmwsmi +#define __ev_mwumi __builtin_spe_evmwumi +#define __ev_mwssfa __builtin_spe_evmwssfa +#define __ev_mwsmfa __builtin_spe_evmwsmfa +#define __ev_mwsmia __builtin_spe_evmwsmia +#define __ev_mwumia __builtin_spe_evmwumia + +#define __ev_mwumf __ev_mwumi +#define __ev_mwumfa __ev_mwumia + +#define __ev_mwssfaa __builtin_spe_evmwssfaa +#define __ev_mwsmfaa __builtin_spe_evmwsmfaa +#define __ev_mwsmiaa __builtin_spe_evmwsmiaa +#define __ev_mwumiaa __builtin_spe_evmwumiaa + +#define __ev_mwumfaa __ev_mwumiaa + +#define __ev_mwssfan __builtin_spe_evmwssfan +#define __ev_mwsmfan __builtin_spe_evmwsmfan +#define __ev_mwsmian __builtin_spe_evmwsmian +#define __ev_mwumian __builtin_spe_evmwumian + +#define __ev_mwumfan __ev_mwumian + +#define __ev_addssiaaw __builtin_spe_evaddssiaaw +#define __ev_addsmiaaw __builtin_spe_evaddsmiaaw +#define __ev_addusiaaw __builtin_spe_evaddusiaaw +#define __ev_addumiaaw __builtin_spe_evaddumiaaw + +#define __ev_addusfaaw __ev_addusiaaw +#define __ev_addumfaaw __ev_addumiaaw +#define __ev_addsmfaaw __ev_addsmiaaw +#define __ev_addssfaaw __ev_addssiaaw + +#define __ev_subfssiaaw __builtin_spe_evsubfssiaaw +#define __ev_subfsmiaaw __builtin_spe_evsubfsmiaaw +#define __ev_subfusiaaw __builtin_spe_evsubfusiaaw +#define __ev_subfumiaaw __builtin_spe_evsubfumiaaw + +#define __ev_subfusfaaw __ev_subfusiaaw +#define __ev_subfumfaaw __ev_subfumiaaw +#define __ev_subfsmfaaw __ev_subfsmiaaw +#define __ev_subfssfaaw __ev_subfssiaaw + +/* Floating Point SIMD Instructions */ + +#define __ev_fsabs __builtin_spe_evfsabs +#define __ev_fsnabs __builtin_spe_evfsnabs +#define __ev_fsneg __builtin_spe_evfsneg +#define __ev_fsadd __builtin_spe_evfsadd +#define __ev_fssub __builtin_spe_evfssub +#define __ev_fsmul __builtin_spe_evfsmul +#define __ev_fsdiv __builtin_spe_evfsdiv +#define __ev_fscfui __builtin_spe_evfscfui +#define __ev_fscfsi __builtin_spe_evfscfsi +#define __ev_fscfuf __builtin_spe_evfscfuf +#define __ev_fscfsf __builtin_spe_evfscfsf +#define __ev_fsctui __builtin_spe_evfsctui +#define __ev_fsctsi __builtin_spe_evfsctsi +#define __ev_fsctuf __builtin_spe_evfsctuf +#define __ev_fsctsf __builtin_spe_evfsctsf +#define __ev_fsctuiz __builtin_spe_evfsctuiz +#define __ev_fsctsiz __builtin_spe_evfsctsiz + +/* NOT SUPPORTED IN FIRST e500, support via two instructions: */ + +#define __ev_mwhusfaaw __ev_mwhusiaaw +#define __ev_mwhumfaaw __ev_mwhumiaaw +#define __ev_mwhusfanw __ev_mwhusianw +#define __ev_mwhumfanw __ev_mwhumianw +#define __ev_mwhgumfaa __ev_mwhgumiaa +#define __ev_mwhgumfan __ev_mwhgumian + +#define __ev_mwhgssfaa __internal_ev_mwhgssfaa +#define __ev_mwhgsmfaa __internal_ev_mwhgsmfaa +#define __ev_mwhgsmiaa __internal_ev_mwhgsmiaa +#define __ev_mwhgumiaa __internal_ev_mwhgumiaa +#define __ev_mwhgssfan __internal_ev_mwhgssfan +#define __ev_mwhgsmfan __internal_ev_mwhgsmfan +#define __ev_mwhgsmian __internal_ev_mwhgsmian +#define __ev_mwhgumian __internal_ev_mwhgumian +#define __ev_mwhssiaaw __internal_ev_mwhssiaaw +#define __ev_mwhssfaaw __internal_ev_mwhssfaaw +#define __ev_mwhsmfaaw __internal_ev_mwhsmfaaw +#define __ev_mwhsmiaaw __internal_ev_mwhsmiaaw +#define __ev_mwhusiaaw __internal_ev_mwhusiaaw +#define __ev_mwhumiaaw __internal_ev_mwhumiaaw +#define __ev_mwhssfanw __internal_ev_mwhssfanw +#define __ev_mwhssianw __internal_ev_mwhssianw +#define __ev_mwhsmfanw __internal_ev_mwhsmfanw +#define __ev_mwhsmianw __internal_ev_mwhsmianw +#define __ev_mwhusianw __internal_ev_mwhusianw +#define __ev_mwhumianw __internal_ev_mwhumianw + +static inline __ev64_opaque__ +__internal_ev_mwhssfaaw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhssf (a, b); + return __ev_addssiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhssiaaw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmi (a, b); + return __ev_addssiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhsmfaaw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmf (a, b); + return __ev_addsmiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhsmiaaw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmi (a, b); + return __ev_addsmiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhusiaaw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhumi (a, b); + return __ev_addusiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhumiaaw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhumi (a, b); + return __ev_addumiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhssfanw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhssf (a, b); + return __ev_subfssiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhssianw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmi (a, b); + return __ev_subfssiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhsmfanw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmf (a, b); + return __ev_subfsmiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhsmianw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmi (a, b); + return __ev_subfsmiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhusianw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhumi (a, b); + return __ev_subfusiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhumianw (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhumi (a, b); + return __ev_subfumiaaw (t); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgssfaa (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhssf (a, b); + return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1})); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgsmfaa (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmf (a, b); + return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1})); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgsmiaa (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmi (a, b); + return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1})); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgumiaa (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhumi (a, b); + return __ev_mwumiaa (t, ((__ev64_s32__){1, 1})); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgssfan (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhssf (a, b); + return __ev_mwsmian (t, ((__ev64_s32__){1, 1})); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgsmfan (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmf (a, b); + return __ev_mwsmian (t, ((__ev64_s32__){1, 1})); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgsmian (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhsmi (a, b); + return __ev_mwsmian (t, ((__ev64_s32__){1, 1})); +} + +static inline __ev64_opaque__ +__internal_ev_mwhgumian (__ev64_opaque__ a, __ev64_opaque__ b) +{ + __ev64_opaque__ t; + + t = __ev_mwhumi (a, b); + return __ev_mwumian (t, ((__ev64_s32__){1, 1})); +} + +/* END OF NOT SUPPORTED */ + +/* __ev_create* functions. */ + +#define __ev_create_ufix32_u32 __ev_create_u32 +#define __ev_create_sfix32_s32 __ev_create_s32 + +static inline __ev64_opaque__ +__ev_create_s16 (int16_t a, int16_t b, int16_t c, int16_t d) +{ + union + { + __ev64_opaque__ v; + int16_t i[4]; + } u; + + u.i[0] = a; + u.i[1] = b; + u.i[2] = c; + u.i[3] = d; + + return u.v; +} + +static inline __ev64_opaque__ +__ev_create_u16 (uint16_t a, uint16_t b, uint16_t c, uint16_t d) + +{ + union + { + __ev64_opaque__ v; + uint16_t i[4]; + } u; + + u.i[0] = a; + u.i[1] = b; + u.i[2] = c; + u.i[3] = d; + + return u.v; +} + +static inline __ev64_opaque__ +__ev_create_s32 (int32_t a, int32_t b) +{ + union + { + __ev64_opaque__ v; + int32_t i[2]; + } u; + + u.i[0] = a; + u.i[1] = b; + + return u.v; +} + +static inline __ev64_opaque__ +__ev_create_u32 (uint32_t a, uint32_t b) +{ + union + { + __ev64_opaque__ v; + uint32_t i[2]; + } u; + + u.i[0] = a; + u.i[1] = b; + + return u.v; +} + +static inline __ev64_opaque__ +__ev_create_fs (float a, float b) +{ + union + { + __ev64_opaque__ v; + float f[2]; + } u; + + u.f[0] = a; + u.f[1] = b; + + return u.v; +} + +static inline __ev64_opaque__ +__ev_create_sfix32_fs (float a, float b) +{ + __ev64_opaque__ ev; + + ev = (__ev64_opaque__) __ev_create_fs (a, b); + return (__ev64_opaque__) __builtin_spe_evfsctsf ((__v2sf) ev); +} + +static inline __ev64_opaque__ +__ev_create_ufix32_fs (float a, float b) +{ + __ev64_opaque__ ev; + + ev = (__ev64_opaque__) __ev_create_fs (a, b); + return (__ev64_opaque__) __builtin_spe_evfsctuf ((__v2sf) ev); +} + +static inline __ev64_opaque__ +__ev_create_s64 (int64_t a) +{ + union + { + __ev64_opaque__ v; + int64_t i; + } u; + + u.i = a; + return u.v; +} + +static inline __ev64_opaque__ +__ev_create_u64 (uint64_t a) +{ + union + { + __ev64_opaque__ v; + uint64_t i; + } u; + + u.i = a; + return u.v; +} + +static inline uint64_t +__ev_convert_u64 (__ev64_opaque__ a) +{ + return (uint64_t) a; +} + +static inline int64_t +__ev_convert_s64 (__ev64_opaque__ a) +{ + return (int64_t) a; +} + +/* __ev_get_* functions. */ + +#define __ev_get_upper_u32(a) __ev_get_u32_internal ((a), 0) +#define __ev_get_lower_u32(a) __ev_get_u32_internal ((a), 1) +#define __ev_get_upper_s32(a) __ev_get_s32_internal ((a), 0) +#define __ev_get_lower_s32(a) __ev_get_s32_internal ((a), 1) +#define __ev_get_upper_fs(a) __ev_get_fs_internal ((a), 0) +#define __ev_get_lower_fs(a) __ev_get_fs_internal ((a), 1) +#define __ev_get_upper_ufix32_u32 __ev_get_upper_u32 +#define __ev_get_lower_ufix32_u32 __ev_get_lower_u32 +#define __ev_get_upper_sfix32_s32 __ev_get_upper_s32 +#define __ev_get_lower_sfix32_s32 __ev_get_lower_s32 +#define __ev_get_upper_sfix32_fs(a) __ev_get_sfix32_fs ((a), 0) +#define __ev_get_lower_sfix32_fs(a) __ev_get_sfix32_fs ((a), 1) +#define __ev_get_upper_ufix32_fs(a) __ev_get_ufix32_fs ((a), 0) +#define __ev_get_lower_ufix32_fs(a) __ev_get_ufix32_fs ((a), 1) + +#define __ev_get_u32 __ev_get_u32_internal +#define __ev_get_s32 __ev_get_s32_internal +#define __ev_get_fs __ev_get_fs_internal +#define __ev_get_u16 __ev_get_u16_internal +#define __ev_get_s16 __ev_get_s16_internal + +#define __ev_get_ufix32_u32 __ev_get_u32 +#define __ev_get_sfix32_s32 __ev_get_s32 +#define __ev_get_ufix32_fs __ev_get_ufix32_fs_internal +#define __ev_get_sfix32_fs __ev_get_sfix32_fs_internal + +static inline uint32_t +__ev_get_u32_internal (__ev64_opaque__ a, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + uint32_t i[2]; + } u; + + u.v = a; + return u.i[pos]; +} + +static inline int32_t +__ev_get_s32_internal (__ev64_opaque__ a, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + int32_t i[2]; + } u; + + u.v = a; + return u.i[pos]; +} + +static inline float +__ev_get_fs_internal (__ev64_opaque__ a, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + float f[2]; + } u; + + u.v = a; + return u.f[pos]; +} + +static inline float +__ev_get_sfix32_fs_internal (__ev64_opaque__ a, uint32_t pos) +{ + __ev64_fs__ v; + + v = __builtin_spe_evfscfsf ((__v2sf) a); + return __ev_get_fs_internal ((__ev64_opaque__) v, pos); +} + +static inline float +__ev_get_ufix32_fs_internal (__ev64_opaque__ a, uint32_t pos) +{ + __ev64_fs__ v; + + v = __builtin_spe_evfscfuf ((__v2sf) a); + return __ev_get_fs_internal ((__ev64_opaque__) v, pos); +} + +static inline uint16_t +__ev_get_u16_internal (__ev64_opaque__ a, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + uint16_t i[4]; + } u; + + u.v = a; + return u.i[pos]; +} + +static inline int16_t +__ev_get_s16_internal (__ev64_opaque__ a, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + int16_t i[4]; + } u; + + u.v = a; + return u.i[pos]; +} + +/* __ev_set_* functions. */ + +#define __ev_set_u32 __ev_set_u32_internal +#define __ev_set_s32 __ev_set_s32_internal +#define __ev_set_fs __ev_set_fs_internal +#define __ev_set_u16 __ev_set_u16_internal +#define __ev_set_s16 __ev_set_s16_internal + +#define __ev_set_ufix32_u32 __ev_set_u32 +#define __ev_set_sfix32_s32 __ev_set_s32 + +#define __ev_set_sfix32_fs __ev_set_sfix32_fs_internal +#define __ev_set_ufix32_fs __ev_set_ufix32_fs_internal + +#define __ev_set_upper_u32(a, b) __ev_set_u32 (a, b, 0) +#define __ev_set_lower_u32(a, b) __ev_set_u32 (a, b, 1) +#define __ev_set_upper_s32(a, b) __ev_set_s32 (a, b, 0) +#define __ev_set_lower_s32(a, b) __ev_set_s32 (a, b, 1) +#define __ev_set_upper_fs(a, b) __ev_set_fs (a, b, 0) +#define __ev_set_lower_fs(a, b) __ev_set_fs (a, b, 1) +#define __ev_set_upper_ufix32_u32 __ev_set_upper_u32 +#define __ev_set_lower_ufix32_u32 __ev_set_lower_u32 +#define __ev_set_upper_sfix32_s32 __ev_set_upper_s32 +#define __ev_set_lower_sfix32_s32 __ev_set_lower_s32 +#define __ev_set_upper_sfix32_fs(a, b) __ev_set_sfix32_fs (a, b, 0) +#define __ev_set_lower_sfix32_fs(a, b) __ev_set_sfix32_fs (a, b, 1) +#define __ev_set_upper_ufix32_fs(a, b) __ev_set_ufix32_fs (a, b, 0) +#define __ev_set_lower_ufix32_fs(a, b) __ev_set_ufix32_fs (a, b, 1) + +#define __ev_set_acc_vec64 __builtin_spe_evmra + +static inline __ev64_opaque__ +__ev_set_acc_u64 (uint64_t a) +{ + __ev64_opaque__ ev32; + ev32 = __ev_create_u64 (a); + __ev_mra (ev32); + return ev32; +} + +static inline __ev64_opaque__ +__ev_set_acc_s64 (int64_t a) +{ + __ev64_opaque__ ev32; + ev32 = __ev_create_s64 (a); + __ev_mra (ev32); + return ev32; +} + +static inline __ev64_opaque__ +__ev_set_u32_internal (__ev64_opaque__ a, uint32_t b, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + uint32_t i[2]; + } u; + + u.v = a; + u.i[pos] = b; + return u.v; +} + +static inline __ev64_opaque__ +__ev_set_s32_internal (__ev64_opaque__ a, int32_t b, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + int32_t i[2]; + } u; + + u.v = a; + u.i[pos] = b; + return u.v; +} + +static inline __ev64_opaque__ +__ev_set_fs_internal (__ev64_opaque__ a, float b, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + float f[2]; + } u; + + u.v = a; + u.f[pos] = b; + return u.v; +} + +static inline __ev64_opaque__ +__ev_set_sfix32_fs_internal (__ev64_opaque__ a, float b, uint32_t pos) +{ + __ev64_opaque__ v; + float other; + + /* Get other half. */ + other = __ev_get_fs_internal (a, pos ^ 1); + + /* Make an sfix32 with 'b'. */ + v = __ev_create_sfix32_fs (b, b); + + /* Set other half to what it used to be. */ + return __ev_set_fs_internal (v, other, pos ^ 1); +} + +static inline __ev64_opaque__ +__ev_set_ufix32_fs_internal (__ev64_opaque__ a, float b, uint32_t pos) +{ + __ev64_opaque__ v; + float other; + + /* Get other half. */ + other = __ev_get_fs_internal (a, pos ^ 1); + + /* Make an ufix32 with 'b'. */ + v = __ev_create_ufix32_fs (b, b); + + /* Set other half to what it used to be. */ + return __ev_set_fs_internal (v, other, pos ^ 1); +} + +static inline __ev64_opaque__ +__ev_set_u16_internal (__ev64_opaque__ a, uint16_t b, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + uint16_t i[4]; + } u; + + u.v = a; + u.i[pos] = b; + return u.v; +} + +static inline __ev64_opaque__ +__ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos) +{ + union + { + __ev64_opaque__ v; + int16_t i[4]; + } u; + + u.v = a; + u.i[pos] = b; + return u.v; +} + +/* Predicates. */ + +#define __pred_all 0 +#define __pred_any 1 +#define __pred_upper 2 +#define __pred_lower 3 + +#define __ev_any_gts(a, b) __builtin_spe_evcmpgts (__pred_any, (a), (b)) +#define __ev_all_gts(a, b) __builtin_spe_evcmpgts (__pred_all, (a), (b)) +#define __ev_upper_gts(a, b) __builtin_spe_evcmpgts (__pred_upper, (a), (b)) +#define __ev_lower_gts(a, b) __builtin_spe_evcmpgts (__pred_lower, (a), (b)) +#define __ev_select_gts __builtin_spe_evsel_gts + +#define __ev_any_gtu(a, b) __builtin_spe_evcmpgtu (__pred_any, (a), (b)) +#define __ev_all_gtu(a, b) __builtin_spe_evcmpgtu (__pred_all, (a), (b)) +#define __ev_upper_gtu(a, b) __builtin_spe_evcmpgtu (__pred_upper, (a), (b)) +#define __ev_lower_gtu(a, b) __builtin_spe_evcmpgtu (__pred_lower, (a), (b)) +#define __ev_select_gtu __builtin_spe_evsel_gtu + +#define __ev_any_lts(a, b) __builtin_spe_evcmplts (__pred_any, (a), (b)) +#define __ev_all_lts(a, b) __builtin_spe_evcmplts (__pred_all, (a), (b)) +#define __ev_upper_lts(a, b) __builtin_spe_evcmplts (__pred_upper, (a), (b)) +#define __ev_lower_lts(a, b) __builtin_spe_evcmplts (__pred_lower, (a), (b)) +#define __ev_select_lts(a, b, c, d) ((__v2si) __builtin_spe_evsel_lts ((a), (b), (c), (d))) + +#define __ev_any_ltu(a, b) __builtin_spe_evcmpltu (__pred_any, (a), (b)) +#define __ev_all_ltu(a, b) __builtin_spe_evcmpltu (__pred_all, (a), (b)) +#define __ev_upper_ltu(a, b) __builtin_spe_evcmpltu (__pred_upper, (a), (b)) +#define __ev_lower_ltu(a, b) __builtin_spe_evcmpltu (__pred_lower, (a), (b)) +#define __ev_select_ltu __builtin_spe_evsel_ltu +#define __ev_any_eq(a, b) __builtin_spe_evcmpeq (__pred_any, (a), (b)) +#define __ev_all_eq(a, b) __builtin_spe_evcmpeq (__pred_all, (a), (b)) +#define __ev_upper_eq(a, b) __builtin_spe_evcmpeq (__pred_upper, (a), (b)) +#define __ev_lower_eq(a, b) __builtin_spe_evcmpeq (__pred_lower, (a), (b)) +#define __ev_select_eq __builtin_spe_evsel_eq + +#define __ev_any_fs_gt(a, b) __builtin_spe_evfscmpgt (__pred_any, (a), (b)) +#define __ev_all_fs_gt(a, b) __builtin_spe_evfscmpgt (__pred_all, (a), (b)) +#define __ev_upper_fs_gt(a, b) __builtin_spe_evfscmpgt (__pred_upper, (a), (b)) +#define __ev_lower_fs_gt(a, b) __builtin_spe_evfscmpgt (__pred_lower, (a), (b)) +#define __ev_select_fs_gt __builtin_spe_evsel_fsgt + +#define __ev_any_fs_lt(a, b) __builtin_spe_evfscmplt (__pred_any, (a), (b)) +#define __ev_all_fs_lt(a, b) __builtin_spe_evfscmplt (__pred_all, (a), (b)) +#define __ev_upper_fs_lt(a, b) __builtin_spe_evfscmplt (__pred_upper, (a), (b)) +#define __ev_lower_fs_lt(a, b) __builtin_spe_evfscmplt (__pred_lower, (a), (b)) +#define __ev_select_fs_lt __builtin_spe_evsel_fslt + +#define __ev_any_fs_eq(a, b) __builtin_spe_evfscmpeq (__pred_any, (a), (b)) +#define __ev_all_fs_eq(a, b) __builtin_spe_evfscmpeq (__pred_all, (a), (b)) +#define __ev_upper_fs_eq(a, b) __builtin_spe_evfscmpeq (__pred_upper, (a), (b)) +#define __ev_lower_fs_eq(a, b) __builtin_spe_evfscmpeq (__pred_lower, (a), (b)) +#define __ev_select_fs_eq __builtin_spe_evsel_fseq + +#define __ev_any_fs_tst_gt(a, b) __builtin_spe_evfststgt (__pred_any, (a), (b)) +#define __ev_all_fs_tst_gt(a, b) __builtin_spe_evfststgt (__pred_all, (a), (b)) +#define __ev_upper_fs_tst_gt(a, b) __builtin_spe_evfststgt (__pred_upper, (a), (b)) +#define __ev_lower_fs_tst_gt(a, b) __builtin_spe_evfststgt (__pred_lower, (a), (b)) +#define __ev_select_fs_tst_gt __builtin_spe_evsel_fststgt + +#define __ev_any_fs_tst_lt(a, b) __builtin_spe_evfststlt (__pred_any, (a), (b)) +#define __ev_all_fs_tst_lt(a, b) __builtin_spe_evfststlt (__pred_all, (a), (b)) +#define __ev_upper_fs_tst_lt(a, b) __builtin_spe_evfststlt (__pred_upper, (a), (b)) +#define __ev_lower_fs_tst_lt(a, b) __builtin_spe_evfststlt (__pred_lower, (a), (b)) +#define __ev_select_fs_tst_lt __builtin_spe_evsel_fststlt + +#define __ev_any_fs_tst_eq(a, b) __builtin_spe_evfststeq (__pred_any, (a), (b)) +#define __ev_all_fs_tst_eq(a, b) __builtin_spe_evfststeq (__pred_all, (a), (b)) +#define __ev_upper_fs_tst_eq(a, b) __builtin_spe_evfststeq (__pred_upper, (a), (b)) +#define __ev_lower_fs_tst_eq(a, b) __builtin_spe_evfststeq (__pred_lower, (a), (b)) +#define __ev_select_fs_tst_eq __builtin_spe_evsel_fststeq + +/* SPEFSCR accessor functions. */ + +#define __SPEFSCR_SOVH 0x80000000 +#define __SPEFSCR_OVH 0x40000000 +#define __SPEFSCR_FGH 0x20000000 +#define __SPEFSCR_FXH 0x10000000 +#define __SPEFSCR_FINVH 0x08000000 +#define __SPEFSCR_FDBZH 0x04000000 +#define __SPEFSCR_FUNFH 0x02000000 +#define __SPEFSCR_FOVFH 0x01000000 +/* 2 unused bits. */ +#define __SPEFSCR_FINXS 0x00200000 +#define __SPEFSCR_FINVS 0x00100000 +#define __SPEFSCR_FDBZS 0x00080000 +#define __SPEFSCR_FUNFS 0x00040000 +#define __SPEFSCR_FOVFS 0x00020000 +#define __SPEFSCR_MODE 0x00010000 +#define __SPEFSCR_SOV 0x00008000 +#define __SPEFSCR_OV 0x00004000 +#define __SPEFSCR_FG 0x00002000 +#define __SPEFSCR_FX 0x00001000 +#define __SPEFSCR_FINV 0x00000800 +#define __SPEFSCR_FDBZ 0x00000400 +#define __SPEFSCR_FUNF 0x00000200 +#define __SPEFSCR_FOVF 0x00000100 +/* 1 unused bit. */ +#define __SPEFSCR_FINXE 0x00000040 +#define __SPEFSCR_FINVE 0x00000020 +#define __SPEFSCR_FDBZE 0x00000010 +#define __SPEFSCR_FUNFE 0x00000008 +#define __SPEFSCR_FOVFE 0x00000004 +#define __SPEFSCR_FRMC 0x00000003 + +#define __ev_get_spefscr_sovh() (__builtin_spe_mfspefscr () & __SPEFSCR_SOVH) +#define __ev_get_spefscr_ovh() (__builtin_spe_mfspefscr () & __SPEFSCR_OVH) +#define __ev_get_spefscr_fgh() (__builtin_spe_mfspefscr () & __SPEFSCR_FGH) +#define __ev_get_spefscr_fxh() (__builtin_spe_mfspefscr () & __SPEFSCR_FXH) +#define __ev_get_spefscr_finvh() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVH) +#define __ev_get_spefscr_fdbzh() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZH) +#define __ev_get_spefscr_funfh() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFH) +#define __ev_get_spefscr_fovfh() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFH) +#define __ev_get_spefscr_finxs() (__builtin_spe_mfspefscr () & __SPEFSCR_FINXS) +#define __ev_get_spefscr_finvs() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVS) +#define __ev_get_spefscr_fdbzs() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZS) +#define __ev_get_spefscr_funfs() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFS) +#define __ev_get_spefscr_fovfs() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFS) +#define __ev_get_spefscr_mode() (__builtin_spe_mfspefscr () & __SPEFSCR_MODE) +#define __ev_get_spefscr_sov() (__builtin_spe_mfspefscr () & __SPEFSCR_SOV) +#define __ev_get_spefscr_ov() (__builtin_spe_mfspefscr () & __SPEFSCR_OV) +#define __ev_get_spefscr_fg() (__builtin_spe_mfspefscr () & __SPEFSCR_FG) +#define __ev_get_spefscr_fx() (__builtin_spe_mfspefscr () & __SPEFSCR_FX) +#define __ev_get_spefscr_finv() (__builtin_spe_mfspefscr () & __SPEFSCR_FINV) +#define __ev_get_spefscr_fdbz() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZ) +#define __ev_get_spefscr_funf() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNF) +#define __ev_get_spefscr_fovf() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVF) +#define __ev_get_spefscr_finxe() (__builtin_spe_mfspefscr () & __SPEFSCR_FINXE) +#define __ev_get_spefscr_finve() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVE) +#define __ev_get_spefscr_fdbze() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZE) +#define __ev_get_spefscr_funfe() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFE) +#define __ev_get_spefscr_fovfe() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFE) +#define __ev_get_spefscr_frmc() (__builtin_spe_mfspefscr () & __SPEFSCR_FRMC) + +static inline void +__ev_clr_spefscr_field (int mask) +{ + int i; + + i = __builtin_spe_mfspefscr (); + i &= ~mask; + __builtin_spe_mtspefscr (i); +} + +#define __ev_clr_spefscr_sovh() __ev_clr_spefscr_field (__SPEFSCR_SOVH) +#define __ev_clr_spefscr_sov() __ev_clr_spefscr_field (__SPEFSCR_SOV) +#define __ev_clr_spefscr_finxs() __ev_clr_spefscr_field (__SPEFSCR_FINXS) +#define __ev_clr_spefscr_finvs() __ev_clr_spefscr_field (__SPEFSCR_FINVS) +#define __ev_clr_spefscr_fdbzs() __ev_clr_spefscr_field (__SPEFSCR_FDBZS) +#define __ev_clr_spefscr_funfs() __ev_clr_spefscr_field (__SPEFSCR_FUNFS) +#define __ev_clr_spefscr_fovfs() __ev_clr_spefscr_field (__SPEFSCR_FOVFS) + +/* Set rounding mode: + rnd = 0 (nearest) + rnd = 1 (zero) + rnd = 2 (+inf) + rnd = 3 (-inf). */ + +static inline void +__ev_set_spefscr_frmc (int rnd) +{ + int i; + + i = __builtin_spe_mfspefscr (); + i &= ~__SPEFSCR_FRMC; + i |= rnd; + __builtin_spe_mtspefscr (i); +} + +/* The SPE PIM says these are declared in <spe.h>, although they are + not provided by GCC: they must be taken from a separate + library. */ +extern short int atosfix16 (const char *); +extern int atosfix32 (const char *); +extern long long atosfix64 (const char *); + +extern unsigned short atoufix16 (const char *); +extern unsigned int atoufix32 (const char *); +extern unsigned long long atoufix64 (const char *); + +extern short int strtosfix16 (const char *, char **); +extern int strtosfix32 (const char *, char **); +extern long long strtosfix64 (const char *, char **); + +extern unsigned short int strtoufix16 (const char *, char **); +extern unsigned int strtoufix32 (const char *, char **); +extern unsigned long long strtoufix64 (const char *, char **); + +#endif /* _SPE_H */ diff --git a/gcc/config/powerpcspe/spe.md b/gcc/config/powerpcspe/spe.md new file mode 100644 index 000000000000..2351152dc249 --- /dev/null +++ b/gcc/config/powerpcspe/spe.md @@ -0,0 +1,3512 @@ +;; e500 SPE description +;; Copyright (C) 2002-2017 Free Software Foundation, Inc. +;; Contributed by Aldy Hernandez (aldy@quesejoda.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_constants + [(CMPDFEQ_GPR 1006) + (TSTDFEQ_GPR 1007) + (CMPDFGT_GPR 1008) + (TSTDFGT_GPR 1009) + (CMPDFLT_GPR 1010) + (TSTDFLT_GPR 1011) + (CMPTFEQ_GPR 1012) + (TSTTFEQ_GPR 1013) + (CMPTFGT_GPR 1014) + (TSTTFGT_GPR 1015) + (CMPTFLT_GPR 1016) + (TSTTFLT_GPR 1017) + (E500_CR_IOR_COMPARE 1018) + ]) + +;; Modes using a 64-bit register. +(define_mode_iterator SPE64 [DF V4HI V2SF V1DI V2SI]) + +;; Likewise, but allow TFmode (two registers) as well. +(define_mode_iterator SPE64TF [DF V4HI V2SF V1DI V2SI TF]) + +;; DImode and TImode. +(define_mode_iterator DITI [DI TI]) + +(define_insn "*negsf2_gpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (neg:SF (match_operand:SF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsneg %0,%1" + [(set_attr "type" "fpsimple")]) + +(define_insn "*abssf2_gpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (abs:SF (match_operand:SF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsabs %0,%1" + [(set_attr "type" "fpsimple")]) + +(define_insn "*nabssf2_gpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "r"))))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsnabs %0,%1" + [(set_attr "type" "fpsimple")]) + +(define_insn "*addsf3_gpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (plus:SF (match_operand:SF 1 "gpc_reg_operand" "%r") + (match_operand:SF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsadd %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "*subsf3_gpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (minus:SF (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efssub %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "*mulsf3_gpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%r") + (match_operand:SF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsmul %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "*divsf3_gpr" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (div:SF (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsdiv %0,%1,%2" + [(set_attr "type" "vecfdiv")]) + +;; Floating point conversion instructions. + +(define_insn "spe_fixuns_truncdfsi2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unsigned_fix:SI (match_operand:DF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdctuiz %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_extendsfdf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (float_extend:DF (match_operand:SF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdcfs %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_fixuns_truncsfsi2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unsigned_fix:SI (match_operand:SF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsctuiz %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_fix_truncsfsi2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (fix:SI (match_operand:SF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efsctsiz %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_fix_truncdfsi2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (fix:SI (match_operand:DF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdctsiz %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_floatunssisf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (unsigned_float:SF (match_operand:SI 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efscfui %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_floatunssidf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (unsigned_float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdcfui %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_floatsisf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (float:SF (match_operand:SI 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "efscfsi %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "spe_floatsidf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdcfsi %0,%1" + [(set_attr "type" "fp")]) + +;; SPE SIMD instructions + +(define_insn "absv2si2" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (abs:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evabs %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evandc" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (not:V2SI (match_operand:V2SI 2 "gpc_reg_operand" "r"))))] + "TARGET_SPE" + "evandc %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "andv2si3" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evand %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +;; Vector compare instructions + +(define_insn "spe_evcmpeq" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 500))] + "TARGET_SPE" + "evcmpeq %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evcmpgts" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 501))] + "TARGET_SPE" + "evcmpgts %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evcmpgtu" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 502))] + "TARGET_SPE" + "evcmpgtu %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evcmplts" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 503))] + "TARGET_SPE" + "evcmplts %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evcmpltu" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 504))] + "TARGET_SPE" + "evcmpltu %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +;; Floating point vector compare instructions + +(define_insn "spe_evfscmpeq" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r")] 538)) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evfscmpeq %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evfscmpgt" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r")] 539)) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evfscmpgt %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evfscmplt" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r")] 540)) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evfscmplt %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evfststeq" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r")] 541))] + "TARGET_SPE" + "evfststeq %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evfststgt" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r")] 542))] + "TARGET_SPE" + "evfststgt %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evfststlt" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r")] 543))] + "TARGET_SPE" + "evfststlt %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +;; End of vector compare instructions + +(define_insn "spe_evcntlsw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 505))] + "TARGET_SPE" + "evcntlsw %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evcntlzw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 506))] + "TARGET_SPE" + "evcntlzw %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_eveqv" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (not:V2SI (xor:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r"))))] + "TARGET_SPE" + "eveqv %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evextsb" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 507))] + "TARGET_SPE" + "evextsb %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evextsh" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 508))] + "TARGET_SPE" + "evextsh %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evlhhesplat" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 509)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlhhesplat %0,%2*2(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlhhesplatx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 510)] + "TARGET_SPE" + "evlhhesplatx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlhhossplat" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 511)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlhhossplat %0,%2*2(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlhhossplatx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 512)] + "TARGET_SPE" + "evlhhossplatx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlhhousplat" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 513)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlhhousplat %0,%2*2(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlhhousplatx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 514)] + "TARGET_SPE" + "evlhhousplatx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhsplat" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 515)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlwhsplat %0,%2*4(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhsplatx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 516)] + "TARGET_SPE" + "evlwhsplatx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwwsplat" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 517)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlwwsplat %0,%2*4(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwwsplatx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 518)] + "TARGET_SPE" + "evlwwsplatx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +;; Integer vector permutation instructions. The pairs of digits in the +;; names of these instructions indicate the indices, in the memory vector +;; element ordering, of the vector elements permuted to the output vector +;; from the first and the second input vector respectively. + +(define_insn "vec_perm00_v2si" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 0) (const_int 2)])))] + "TARGET_SPE" +{ + if (WORDS_BIG_ENDIAN) + return "evmergehi %0,%1,%2"; + else + return "evmergelo %0,%2,%1"; +} + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "vec_perm01_v2si" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 0) (const_int 3)])))] + "TARGET_SPE" +{ + if (WORDS_BIG_ENDIAN) + return "evmergehilo %0,%1,%2"; + else + return "evmergehilo %0,%2,%1"; +} + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "vec_perm11_v2si" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 1) (const_int 3)])))] + "TARGET_SPE" +{ + if (WORDS_BIG_ENDIAN) + return "evmergelo %0,%1,%2"; + else + return "evmergehi %0,%2,%1"; +} + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "vec_perm10_v2si" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 1) (const_int 2)])))] + "TARGET_SPE" +{ + if (WORDS_BIG_ENDIAN) + return "evmergelohi %0,%1,%2"; + else + return "evmergelohi %0,%2,%1"; +} + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_expand "vec_perm_constv2si" + [(match_operand:V2SI 0 "gpc_reg_operand" "") + (match_operand:V2SI 1 "gpc_reg_operand" "") + (match_operand:V2SI 2 "gpc_reg_operand" "") + (match_operand:V2SI 3 "" "")] + "TARGET_SPE" +{ + if (rs6000_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +(define_expand "spe_evmergehi" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:V2SI 1 "register_operand" "") + (match_operand:V2SI 2 "register_operand" "")] + "TARGET_SPE" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm00_v2si (operands[0], operands[1], operands[2])); + else + emit_insn (gen_vec_perm11_v2si (operands[0], operands[2], operands[1])); + DONE; +}) + +(define_expand "spe_evmergehilo" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:V2SI 1 "register_operand" "") + (match_operand:V2SI 2 "register_operand" "")] + "TARGET_SPE" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm01_v2si (operands[0], operands[1], operands[2])); + else + emit_insn (gen_vec_perm01_v2si (operands[0], operands[2], operands[1])); + DONE; +}) + +(define_expand "spe_evmergelo" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:V2SI 1 "register_operand" "") + (match_operand:V2SI 2 "register_operand" "")] + "TARGET_SPE" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm11_v2si (operands[0], operands[1], operands[2])); + else + emit_insn (gen_vec_perm00_v2si (operands[0], operands[2], operands[1])); + DONE; +}) + +(define_expand "spe_evmergelohi" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:V2SI 1 "register_operand" "") + (match_operand:V2SI 2 "register_operand" "")] + "TARGET_SPE" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm10_v2si (operands[0], operands[1], operands[2])); + else + emit_insn (gen_vec_perm10_v2si (operands[0], operands[2], operands[1])); + DONE; +}) + +;; End of integer vector permutation instructions. + +(define_insn "spe_evnand" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (not:V2SI (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r"))))] + "TARGET_SPE" + "evnand %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "negv2si2" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (neg:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evneg %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evnor" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (not:V2SI (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r"))))] + "TARGET_SPE" + "evnor %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evorc" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (not:V2SI (match_operand:V2SI 2 "gpc_reg_operand" "r"))))] + "TARGET_SPE" + "evorc %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evor" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evor %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evrlwi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] 519))] + "TARGET_SPE" + "evrlwi %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evrlw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 520))] + "TARGET_SPE" + "evrlw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evrndw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 521))] + "TARGET_SPE" + "evrndw %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evsel" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (match_operand:CC 3 "cc_reg_operand" "y")] 522))] + "TARGET_SPE" + "evsel %0,%1,%2,%3" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evsel_fs" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r") + (match_operand:CC 3 "cc_reg_operand" "y")] 725))] + "TARGET_SPE" + "evsel %0,%1,%2,%3" + [(set_attr "type" "veccmp") + (set_attr "length" "4")]) + +(define_insn "spe_evslwi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] + 523))] + "TARGET_SPE" + "evslwi %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evslw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 524))] + "TARGET_SPE" + "evslw %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evsrwis" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] + 525))] + "TARGET_SPE" + "evsrwis %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evsrwiu" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] + 526))] + "TARGET_SPE" + "evsrwiu %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evsrws" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 527))] + "TARGET_SPE" + "evsrws %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evsrwu" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 528))] + "TARGET_SPE" + "evsrwu %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +;; vector xors + +(define_insn "xorv2si3" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (xor:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evxor %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "xorv4hi3" + [(set (match_operand:V4HI 0 "gpc_reg_operand" "=r") + (xor:V4HI (match_operand:V4HI 1 "gpc_reg_operand" "r") + (match_operand:V4HI 2 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evxor %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "xorv1di3" + [(set (match_operand:V1DI 0 "gpc_reg_operand" "=r") + (xor:V1DI (match_operand:V1DI 1 "gpc_reg_operand" "r") + (match_operand:V1DI 2 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evxor %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +;; end of vector xors + +(define_insn "spe_evfsabs" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evfsabs %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evfsadd" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r"))) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evfsadd %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfscfsf" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 529))] + "TARGET_SPE" + "evfscfsf %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfscfsi" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (float:V2SF (match_operand:V2SI 1 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evfscfsi %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfscfuf" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 530))] + "TARGET_SPE" + "evfscfuf %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfscfui" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (unspec:V2SF [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 701))] + "TARGET_SPE" + "evfscfui %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsctsf" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 531))] + "TARGET_SPE" + "evfsctsf %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsctsi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 532))] + "TARGET_SPE" + "evfsctsi %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsctsiz" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 533))] + "TARGET_SPE" + "evfsctsiz %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsctuf" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 534))] + "TARGET_SPE" + "evfsctuf %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsctui" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 535))] + "TARGET_SPE" + "evfsctui %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsctuiz" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 536))] + "TARGET_SPE" + "evfsctuiz %0,%1" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsdiv" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r"))) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evfsdiv %0,%1,%2" + [(set_attr "type" "vecfdiv") + (set_attr "length" "4")]) + +(define_insn "spe_evfsmul" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r"))) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evfsmul %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +(define_insn "spe_evfsnabs" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 537))] + "TARGET_SPE" + "evfsnabs %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evfsneg" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evfsneg %0,%1" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evfssub" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r") + (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r") + (match_operand:V2SF 2 "gpc_reg_operand" "r"))) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evfssub %0,%1,%2" + [(set_attr "type" "vecfloat") + (set_attr "length" "4")]) + +;; SPE SIMD load instructions. + +;; Only the hardware engineer who designed the SPE understands the +;; plethora of load and store instructions ;-). We have no way of +;; differentiating between them with RTL so use an unspec of const_int 0 +;; to avoid identical RTL. + +(define_insn "spe_evldd" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 544)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evldd %0,%2*8(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlddx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 545)] + "TARGET_SPE" + "evlddx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evldh" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 546)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evldh %0,%2*8(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evldhx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 547)] + "TARGET_SPE" + "evldhx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evldw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 548)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evldw %0,%2*8(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evldwx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 549)] + "TARGET_SPE" + "evldwx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhe" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 550)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlwhe %0,%2*4(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhex" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 551)] + "TARGET_SPE" + "evlwhex %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhos" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 552)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlwhos %0,%2*4(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhosx" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 553)] + "TARGET_SPE" + "evlwhosx %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhou" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:QI 2 "immediate_operand" "i")))) + (unspec [(const_int 0)] 554)] + "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31" + "evlwhou %0,%2*4(%1)" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_evlwhoux" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand:SI 2 "gpc_reg_operand" "r")))) + (unspec [(const_int 0)] 555)] + "TARGET_SPE" + "evlwhoux %0,%1,%2" + [(set_attr "type" "vecload") + (set_attr "length" "4")]) + +(define_insn "spe_brinc" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] 556))] + "TARGET_SPE" + "brinc %0,%1,%2" + [(set_attr "type" "brinc") + (set_attr "length" "4")]) + +(define_insn "spe_evmhegsmfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 557)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhegsmfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhegsmfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 558)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhegsmfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhegsmiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 559)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhegsmiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhegsmian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 560)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhegsmian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhegumiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 561)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhegumiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhegumian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 562)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhegumian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmfaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 563)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhesmfaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmfanw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 564)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhesmfanw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 565)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhesmfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 566))] + "TARGET_SPE" + "evmhesmf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 567)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhesmiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 568)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhesmianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 569)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhesmia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhesmi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 570))] + "TARGET_SPE" + "evmhesmi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhessfaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 571)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhessfaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhessfanw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 572)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhessfanw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhessfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 573)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhessfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhessf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 574)) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evmhessf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhessiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 575)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhessiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhessianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 576)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhessianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmheumiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 577)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmheumiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmheumianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 578)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmheumianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmheumia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 579)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmheumia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmheumi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 580))] + "TARGET_SPE" + "evmheumi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmheusiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 581)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmheusiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmheusianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 582)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmheusianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhogsmfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 583)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhogsmfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhogsmfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 584)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhogsmfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhogsmiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 585)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhogsmiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhogsmian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 586)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhogsmian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhogumiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 587)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhogumiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhogumian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 588)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhogumian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmfaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 589)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhosmfaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmfanw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 590)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhosmfanw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 591))] + "TARGET_SPE" + "evmhosmfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 592)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhosmf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 593)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhosmiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 594)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhosmianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 595)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhosmia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhosmi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 596))] + "TARGET_SPE" + "evmhosmi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhossfaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 597)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhossfaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhossfanw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 598)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhossfanw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhossfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 599)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhossfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhossf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 600)) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evmhossf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhossiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 601)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhossiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhossianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 602)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhossianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhoumiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 603)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhoumiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhoumianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 604)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhoumianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhoumia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 605)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhoumia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhoumi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 606))] + "TARGET_SPE" + "evmhoumi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhousiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 607)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhousiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmhousianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 608)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmhousianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmmlssfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 609))] + "TARGET_SPE" + "evmmlssfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmmlssf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 610))] + "TARGET_SPE" + "evmmlssf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 611)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhsmfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 612))] + "TARGET_SPE" + "evmwhsmf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 613)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhsmia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 614))] + "TARGET_SPE" + "evmwhsmi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhssfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 615)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhssfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhusian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 626))] + "TARGET_SPE" + "evmwhusian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhssf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 628)) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evmwhssf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhumia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 629)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhumia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhumi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 630))] + "TARGET_SPE" + "evmwhumi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlsmiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 635)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlsmiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlsmianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 636)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlsmianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlssiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 641)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlssiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlssianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 642)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlssianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlumiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 643)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlumiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlumianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 644)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlumianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlumia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 645)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlumia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlumi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 646))] + "TARGET_SPE" + "evmwlumi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlusiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 647)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlusiaaw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwlusianw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 648)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwlusianw %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 649)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwsmfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 650)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwsmfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 651)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwsmfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 652))] + "TARGET_SPE" + "evmwsmf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 653)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwsmiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 654)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwsmian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 655)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwsmia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwsmi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 656))] + "TARGET_SPE" + "evmwsmi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwssfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 657)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwssfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwssfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 658)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwssfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwssfa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 659)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwssfa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwssf" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 660)) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evmwssf %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwumiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 661)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwumiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwumian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 662)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwumian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwumia" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 663)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwumia %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwumi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 664))] + "TARGET_SPE" + "evmwumi %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (plus:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evaddw %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evaddusiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 673)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evaddusiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evaddumiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 674)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evaddumiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evaddssiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 675)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evaddssiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evaddsmiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 676)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evaddsmiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evaddiw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] 677))] + "TARGET_SPE" + "evaddiw %0,%1,%2" + [(set_attr "type" "vecsimple") + (set_attr "length" "4")]) + +(define_insn "spe_evsubifw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:QI 2 "immediate_operand" "i")] 678))] + "TARGET_SPE" + "evsubifw %0,%2,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (minus:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")))] + "TARGET_SPE" + "evsubfw %0,%2,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evsubfusiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 679)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evsubfusiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evsubfumiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 680)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evsubfumiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evsubfssiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 681)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evsubfssiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evsubfsmiaaw" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (reg:V2SI SPE_ACC_REGNO)] 682)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evsubfsmiaaw %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmra" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (match_operand:V2SI 1 "gpc_reg_operand" "r")) + (set (reg:V2SI SPE_ACC_REGNO) + (unspec:V2SI [(match_dup 1)] 726))] + "TARGET_SPE" + "evmra %0,%1" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "divv2si3" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (div:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r"))) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evdivws %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "length" "4")]) + +(define_insn "spe_evdivwu" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (udiv:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r"))) + (clobber (reg:SI SPEFSCR_REGNO))] + "TARGET_SPE" + "evdivwu %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "length" "4")]) + +(define_insn "spe_evsplatfi" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:QI 1 "immediate_operand" "i")] 684))] + "TARGET_SPE" + "evsplatfi %0,%1" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_insn "spe_evsplati" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:QI 1 "immediate_operand" "i")] 685))] + "TARGET_SPE" + "evsplati %0,%1" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + +(define_insn "spe_evstdd" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:QI 1 "immediate_operand" "i"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 686)] + "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31" + "evstdd %2,%1*8(%0)" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstddx" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:SI 1 "gpc_reg_operand" "r"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 687)] + "TARGET_SPE" + "evstddx %2,%0,%1" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstdh" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:QI 1 "immediate_operand" "i"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 688)] + "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31" + "evstdh %2,%1*8(%0)" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstdhx" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:SI 1 "gpc_reg_operand" "r"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 689)] + "TARGET_SPE" + "evstdhx %2,%0,%1" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstdw" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:QI 1 "immediate_operand" "i"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 690)] + "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31" + "evstdw %2,%1*8(%0)" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstdwx" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:SI 1 "gpc_reg_operand" "r"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 691)] + "TARGET_SPE" + "evstdwx %2,%0,%1" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwhe" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:QI 1 "immediate_operand" "i"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 692)] + "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31" + "evstwhe %2,%1*4(%0)" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwhex" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:SI 1 "gpc_reg_operand" "r"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 693)] + "TARGET_SPE" + "evstwhex %2,%0,%1" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwho" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:QI 1 "immediate_operand" "i"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 694)] + "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31" + "evstwho %2,%1*4(%0)" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwhox" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:SI 1 "gpc_reg_operand" "r"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 695)] + "TARGET_SPE" + "evstwhox %2,%0,%1" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwwe" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:QI 1 "immediate_operand" "i"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 696)] + "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31" + "evstwwe %2,%1*4(%0)" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwwex" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:SI 1 "gpc_reg_operand" "r"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 697)] + "TARGET_SPE" + "evstwwex %2,%0,%1" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwwo" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:QI 1 "immediate_operand" "i"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 698)] + "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31" + "evstwwo %2,%1*4(%0)" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +(define_insn "spe_evstwwox" + [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b") + (match_operand:SI 1 "gpc_reg_operand" "r"))) + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (unspec [(const_int 0)] 699)] + "TARGET_SPE" + "evstwwox %2,%0,%1" + [(set_attr "type" "vecstore") + (set_attr "length" "4")]) + +;; Double-precision floating point instructions. + +;; FIXME: Add o=r option. +(define_insn "*frob_<SPE64:mode>_<DITI:mode>" + [(set (match_operand:SPE64 0 "nonimmediate_operand" "=r,r") + (subreg:SPE64 (match_operand:DITI 1 "input_operand" "r,m") 0))] + "(TARGET_E500_DOUBLE && <SPE64:MODE>mode == DFmode) + || (TARGET_SPE && <SPE64:MODE>mode != DFmode)" +{ + switch (which_alternative) + { + default: + gcc_unreachable (); + case 0: + if (WORDS_BIG_ENDIAN) + return "evmergelo %0,%1,%L1"; + else + return "evmergelo %0,%L1,%1"; + case 1: + return "evldd%X1 %0,%y1"; + } +}) + +(define_insn "*frob_<SPE64:mode>_ti_8" + [(set (match_operand:SPE64 0 "nonimmediate_operand" "=r") + (subreg:SPE64 (match_operand:TI 1 "input_operand" "r") 8))] + "(TARGET_E500_DOUBLE && <SPE64:MODE>mode == DFmode) + || (TARGET_SPE && <SPE64:MODE>mode != DFmode)" +{ + if (WORDS_BIG_ENDIAN) + return "evmergelo %0,%Y1,%Z1"; + else + return "evmergelo %0,%Z1,%Y1"; +}) + +(define_insn "*frob_tf_ti" + [(set (match_operand:TF 0 "gpc_reg_operand" "=r") + (subreg:TF (match_operand:TI 1 "gpc_reg_operand" "r") 0))] + "TARGET_E500_DOUBLE" +{ + if (WORDS_BIG_ENDIAN) + return "evmergelo %0,%1,%L1\;evmergelo %L0,%Y1,%Z1"; + else + return "evmergelo %L0,%Z1,%Y1\;evmergelo %0,%L1,%1"; +} + [(set_attr "length" "8")]) + +(define_insn "*frob_<mode>_di_2" + [(set (subreg:DI (match_operand:SPE64TF 0 "nonimmediate_operand" "+&r,r") 0) + (match_operand:DI 1 "input_operand" "r,m"))] + "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)" +{ + switch (which_alternative) + { + default: + gcc_unreachable (); + case 0: + if (WORDS_BIG_ENDIAN) + return "evmergelo %0,%1,%L1"; + else + return "evmergelo %0,%L1,%1"; + case 1: + return "evldd%X1 %0,%y1"; + } +}) + +(define_insn "*frob_tf_di_8_2" + [(set (subreg:DI (match_operand:TF 0 "nonimmediate_operand" "+&r,r") 8) + (match_operand:DI 1 "input_operand" "r,m"))] + "TARGET_E500_DOUBLE" +{ + switch (which_alternative) + { + default: + gcc_unreachable (); + case 0: + if (WORDS_BIG_ENDIAN) + return "evmergelo %L0,%1,%L1"; + else + return "evmergelo %L0,%L1,%1"; + case 1: + return "evldd%X1 %L0,%y1"; + } +}) + +(define_insn "*frob_di_<mode>" + [(set (match_operand:DI 0 "nonimmediate_operand" "=&r") + (subreg:DI (match_operand:SPE64TF 1 "input_operand" "r") 0))] + "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)" +{ + if (WORDS_BIG_ENDIAN) + return "evmergehi %0,%1,%1\;mr %L0,%1"; + else + return "evmergehi %L0,%1,%1\;mr %0,%1"; +} + [(set_attr "length" "8")]) + +(define_insn "*frob_ti_tf" + [(set (match_operand:TI 0 "nonimmediate_operand" "=&r") + (subreg:TI (match_operand:TF 1 "input_operand" "r") 0))] + "TARGET_E500_DOUBLE" +{ + if (WORDS_BIG_ENDIAN) + return "evmergehi %0,%1,%1\;mr %L0,%1\;evmergehi %Y0,%L1,%L1\;mr %Z0,%L1"; + else + return "evmergehi %Z0,%L1,%L1\;mr %Y0,%L1\;evmergehi %L0,%1,%1\;mr %0,%1"; +} + [(set_attr "length" "16")]) + +(define_insn "*frob_<DITI:mode>_<SPE64:mode>_2" + [(set (subreg:SPE64 (match_operand:DITI 0 "register_operand" "+&r,r") 0) + (match_operand:SPE64 1 "input_operand" "r,m"))] + "(TARGET_E500_DOUBLE && <SPE64:MODE>mode == DFmode) + || (TARGET_SPE && <SPE64:MODE>mode != DFmode)" + "* +{ + switch (which_alternative) + { + default: + gcc_unreachable (); + case 0: + if (WORDS_BIG_ENDIAN) + return \"evmergehi %0,%1,%1\;mr %L0,%1\"; + else + return \"evmergehi %L0,%1,%1\;mr %0,%1\"; + case 1: + /* If the address is not offsettable we need to load the whole + doubleword into a 64-bit register and then copy the high word + to form the correct output layout. */ + if (!offsettable_nonstrict_memref_p (operands[1])) + { + if (WORDS_BIG_ENDIAN) + return \"evldd%X1 %L0,%y1\;evmergehi %0,%L0,%L0\"; + else + return \"evldd%X1 %0,%y1\;evmergehi %L0,%0,%0\"; + } + /* If the low-address word is used in the address, we must load + it last. Otherwise, load it first. Note that we cannot have + auto-increment in that case since the address register is + known to be dead. */ + if (refers_to_regno_p (REGNO (operands[0]), operands[1])) + { + if (WORDS_BIG_ENDIAN) + return \"lwz %L0,%L1\;lwz %0,%1\"; + else + return \"lwz %0,%1\;lwz %L0,%L1\"; + } + else + { + if (WORDS_BIG_ENDIAN) + return \"lwz%U1%X1 %0,%1\;lwz %L0,%L1\"; + else + return \"lwz%U1%X1 %L0,%L1\;lwz %0,%1\"; + } + } +}" + [(set_attr "length" "8,8")]) + +; As the above, but TImode at offset 8. +(define_insn "*frob_ti_<mode>_8_2" + [(set (subreg:SPE64 (match_operand:TI 0 "register_operand" "+&r,r") 8) + (match_operand:SPE64 1 "input_operand" "r,m"))] + "(TARGET_E500_DOUBLE && <MODE>mode == DFmode) + || (TARGET_SPE && <MODE>mode != DFmode)" + "* +{ + switch (which_alternative) + { + default: + gcc_unreachable (); + case 0: + if (WORDS_BIG_ENDIAN) + return \"evmergehi %Y0,%1,%1\;mr %Z0,%1\"; + else + return \"evmergehi %Z0,%1,%1\;mr %Y0,%1\"; + case 1: + if (!offsettable_nonstrict_memref_p (operands[1])) + { + if (WORDS_BIG_ENDIAN) + return \"evldd%X1 %Z0,%y1\;evmergehi %Y0,%Z0,%Z0\"; + else + return \"evldd%X1 %Y0,%y1\;evmergehi %Z0,%Y0,%Y0\"; + } + if (refers_to_regno_p (REGNO (operands[0]), operands[1])) + { + if (WORDS_BIG_ENDIAN) + return \"lwz %Z0,%L1\;lwz %Y0,%1\"; + else + return \"lwz %Y0,%1\;lwz %Z0,%L1\"; + } + else + { + if (WORDS_BIG_ENDIAN) + return \"lwz%U1%X1 %Y0,%1\;lwz %Z0,%L1\"; + else + return \"lwz%U1%X1 %Z0,%L1\;lwz %Y0,%1\"; + } + } +}" + [(set_attr "length" "8,8")]) + +(define_insn "*frob_ti_tf_2" + [(set (subreg:TF (match_operand:TI 0 "gpc_reg_operand" "=&r") 0) + (match_operand:TF 1 "input_operand" "r"))] + "TARGET_E500_DOUBLE" +{ + if (WORDS_BIG_ENDIAN) + return "evmergehi %0,%1,%1\;mr %L0,%1\;evmergehi %Y0,%L1,%L1\;mr %Z0,%L1"; + else + return "evmergehi %Z0,%L1,%L1\;mr %Y0,%L1\;evmergehi %L0,%1,%1\;mr %0,%1"; +} + [(set_attr "length" "16")]) + +(define_insn "mov_si<mode>_e500_subreg0_be" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,&r") 0) + (match_operand:SI 1 "input_operand" "r,m"))] + "WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + evmergelo %0,%1,%0 + evmergelohi %0,%0,%0\;lwz%U1%X1 %0,%1\;evmergelohi %0,%0,%0" + [(set_attr "length" "4,12")]) + +(define_insn "*mov_si<mode>_e500_subreg0_le" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,r") 0) + (match_operand:SI 1 "input_operand" "r,m"))] + "!WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + mr %0,%1 + lwz%U1%X1 %0,%1") + +(define_insn_and_split "*mov_si<mode>_e500_subreg0_elf_low_be" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r") 0) + (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" "")))] + "WORDS_BIG_ENDIAN + && (((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)) + && TARGET_ELF && !TARGET_64BIT && can_create_pseudo_p ())" + "#" + "&& 1" + [(pc)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_elf_low (tmp, operands[1], operands[2])); + emit_insn (gen_mov_si<mode>_e500_subreg0_be (operands[0], tmp)); + DONE; +} + [(set_attr "length" "8")]) + +(define_insn "*mov_si<mode>_e500_subreg0_elf_low_le" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r") 0) + (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" "")))] + "!WORDS_BIG_ENDIAN + && (((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)) + && TARGET_ELF && !TARGET_64BIT)" + "addi %0,%1,%K2") + +;; ??? Could use evstwwe for memory stores in some cases, depending on +;; the offset. +(define_insn "*mov_si<mode>_e500_subreg0_2_be" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:SPE64TF 1 "register_operand" "+r,&r") 0))] + "WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + evmergelohi %0,%1,%1 + evmergelohi %1,%1,%1\;stw%U0%X0 %1,%0" + [(set_attr "length" "4,8")]) + +(define_insn "*mov_si<mode>_e500_subreg0_2_le" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:SPE64TF 1 "register_operand" "r,r") 0))] + "!WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + mr %0,%1 + stw%U0%X0 %1,%0") + +(define_insn "*mov_si<mode>_e500_subreg4_be" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,r") 4) + (match_operand:SI 1 "input_operand" "r,m"))] + "WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + mr %0,%1 + lwz%U1%X1 %0,%1") + +(define_insn "mov_si<mode>_e500_subreg4_le" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,&r") 4) + (match_operand:SI 1 "input_operand" "r,m"))] + "!WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + evmergelo %0,%1,%0 + evmergelohi %0,%0,%0\;lwz%U1%X1 %0,%1\;evmergelohi %0,%0,%0" + [(set_attr "length" "4,12")]) + +(define_insn "*mov_si<mode>_e500_subreg4_elf_low_be" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r") 4) + (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" "")))] + "WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)) + && TARGET_ELF && !TARGET_64BIT" + "addi %0,%1,%K2") + +(define_insn_and_split "*mov_si<mode>_e500_subreg4_elf_low_le" + [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r") 4) + (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") + (match_operand 2 "" "")))] + "!WORDS_BIG_ENDIAN + && (((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)) + && TARGET_ELF && !TARGET_64BIT && can_create_pseudo_p ())" + "#" + "&& 1" + [(pc)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_elf_low (tmp, operands[1], operands[2])); + emit_insn (gen_mov_si<mode>_e500_subreg4_le (operands[0], tmp)); + DONE; +} + [(set_attr "length" "8")]) + +(define_insn "*mov_si<mode>_e500_subreg4_2_be" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:SPE64TF 1 "register_operand" "r,r") 4))] + "WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + mr %0,%1 + stw%U0%X0 %1,%0") + +(define_insn "*mov_si<mode>_e500_subreg4_2_le" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:SPE64TF 1 "register_operand" "+r,&r") 4))] + "!WORDS_BIG_ENDIAN + && ((TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode)) + || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode))" + "@ + evmergelohi %0,%1,%1 + evmergelohi %1,%1,%1\;stw%U0%X0 %1,%0" + [(set_attr "length" "4,8")]) + +(define_insn "*mov_sitf_e500_subreg8_be" + [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,&r") 8) + (match_operand:SI 1 "input_operand" "r,m"))] + "WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + evmergelo %L0,%1,%L0 + evmergelohi %L0,%L0,%L0\;lwz%U1%X1 %L0,%1\;evmergelohi %L0,%L0,%L0" + [(set_attr "length" "4,12")]) + +(define_insn "*mov_sitf_e500_subreg8_le" + [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,r") 8) + (match_operand:SI 1 "input_operand" "r,m"))] + "!WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + mr %L0,%1 + lwz%U1%X1 %L0,%1") + +(define_insn "*mov_sitf_e500_subreg8_2_be" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:TF 1 "register_operand" "+r,&r") 8))] + "WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + evmergelohi %0,%L1,%L1 + evmergelohi %L1,%L1,%L1\;stw%U0%X0 %L1,%0" + [(set_attr "length" "4,8")]) + +(define_insn "*mov_sitf_e500_subreg8_2_le" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:TF 1 "register_operand" "r,r") 8))] + "!WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + mr %0,%L1 + stw%U0%X0 %L1,%0") + +(define_insn "*mov_sitf_e500_subreg12_be" + [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,r") 12) + (match_operand:SI 1 "input_operand" "r,m"))] + "WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + mr %L0,%1 + lwz%U1%X1 %L0,%1") + +(define_insn "*mov_sitf_e500_subreg12_le" + [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,&r") 12) + (match_operand:SI 1 "input_operand" "r,m"))] + "!WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + evmergelo %L0,%1,%L0 + evmergelohi %L0,%L0,%L0\;lwz%U1%X1 %L0,%1\;evmergelohi %L0,%L0,%L0" + [(set_attr "length" "4,12")]) + +(define_insn "*mov_sitf_e500_subreg12_2_be" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:TF 1 "register_operand" "r,r") 12))] + "WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + mr %0,%L1 + stw%U0%X0 %L1,%0") + +(define_insn "*mov_sitf_e500_subreg12_2_le" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,m") + (subreg:SI (match_operand:TF 1 "register_operand" "+r,&r") 12))] + "!WORDS_BIG_ENDIAN && TARGET_E500_DOUBLE" + "@ + evmergelohi %0,%L1,%L1 + evmergelohi %L1,%L1,%L1\;stw%U0%X0 %L1,%0" + [(set_attr "length" "4,8")]) + +;; FIXME: Allow r=CONST0. +(define_insn "*movdf_e500_double" + [(set (match_operand:DF 0 "rs6000_nonimmediate_operand" "=r,r,m") + (match_operand:DF 1 "input_operand" "r,m,r"))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE + && (gpc_reg_operand (operands[0], DFmode) + || gpc_reg_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + case 0: + return \"evor %0,%1,%1\"; + case 1: + return \"evldd%X1 %0,%y1\"; + case 2: + return \"evstdd%X0 %1,%y0\"; + default: + gcc_unreachable (); + } + }" + [(set_attr "type" "*,vecload,vecstore") + (set_attr "length" "*,*,*")]) + +(define_insn "spe_truncdfsf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efscfd %0,%1") + +(define_insn "spe_absdf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (abs:DF (match_operand:DF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdabs %0,%1") + +(define_insn "spe_nabsdf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "r"))))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdnabs %0,%1") + +(define_insn "spe_negdf2" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (neg:DF (match_operand:DF 1 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdneg %0,%1") + +(define_insn "spe_adddf3" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (plus:DF (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdadd %0,%1,%2") + +(define_insn "spe_subdf3" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (minus:DF (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdsub %0,%1,%2") + +(define_insn "spe_muldf3" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (mult:DF (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efdmul %0,%1,%2") + +(define_insn "spe_divdf3" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r") + (div:DF (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r")))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE" + "efddiv %0,%1,%2") + +;; Double-precision floating point instructions for IBM long double. + +(define_insn_and_split "spe_trunctfdf2_internal1" + [(set (match_operand:DF 0 "gpc_reg_operand" "=r,?r") + (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "0,r")))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" + "@ + # + evor %0,%1,%1" + "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])" + [(const_int 0)] +{ + emit_note (NOTE_INSN_DELETED); + DONE; +}) + +(define_insn_and_split "spe_trunctfsf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "=r") + (float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "r"))) + (clobber (match_scratch:DF 2 "=r"))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" + "#" + "&& reload_completed" + [(set (match_dup 2) + (float_truncate:DF (match_dup 1))) + (set (match_dup 0) + (float_truncate:SF (match_dup 2)))] + "") + +(define_insn "spe_extenddftf2" + [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=r,?r,r,o") + (float_extend:TF (match_operand:DF 1 "input_operand" "0,r,m,r"))) + (clobber (match_scratch:DF 2 "=X,X,X,&r"))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" + "@ + evxor %L0,%L0,%L0 + evor %0,%1,%1\;evxor %L0,%L0,%L0 + evldd%X1 %0,%y1\;evxor %L0,%L0,%L0 + evstdd%X0 %1,%y0\;evxor %2,%2,%2\;evstdd %2,%Y0" + [(set_attr "length" "4,8,8,12")]) + +(define_expand "spe_fix_trunctfsi2" + [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "") + (fix:SI (match_operand:TF 1 "gpc_reg_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 4))])] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" +{ + operands[2] = gen_reg_rtx (DFmode); + operands[3] = gen_reg_rtx (SImode); + operands[4] = gen_reg_rtx (SImode); +}) + +; Like fix_trunc_helper, add with rounding towards 0. +(define_insn "spe_fix_trunctfsi2_internal" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (fix:SI (match_operand:TF 1 "gpc_reg_operand" "r"))) + (clobber (match_operand:DF 2 "gpc_reg_operand" "=r")) + (clobber (match_operand:SI 3 "gpc_reg_operand" "=&r")) + (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" + "mfspefscr %3\;rlwinm %4,%3,0,0,29\;ori %4,%4,1\;efdadd %2,%1,%L1\;mtspefscr %3\;efdctsiz %0, %2" + [(set_attr "length" "24")]) + +(define_insn "spe_negtf2_internal" + [(set (match_operand:TF 0 "gpc_reg_operand" "=r") + (neg:TF (match_operand:TF 1 "gpc_reg_operand" "r")))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" + "* +{ + if (REGNO (operands[0]) == REGNO (operands[1]) + 1) + return \"efdneg %L0,%L1\;efdneg %0,%1\"; + else + return \"efdneg %0,%1\;efdneg %L0,%L1\"; +}" + [(set_attr "length" "8")]) + +(define_expand "spe_abstf2_cmp" + [(set (match_operand:TF 0 "gpc_reg_operand" "=f") + (match_operand:TF 1 "gpc_reg_operand" "f")) + (set (match_dup 3) (match_dup 5)) + (set (match_dup 5) (abs:DF (match_dup 5))) + (set (match_dup 4) (unspec:CCFP [(compare:CCFP (match_dup 3) + (match_dup 5))] CMPDFEQ_GPR)) + (set (pc) (if_then_else (eq (match_dup 4) (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc))) + (set (match_dup 6) (neg:DF (match_dup 6)))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" + " +{ + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + operands[3] = gen_reg_rtx (DFmode); + operands[4] = gen_reg_rtx (CCFPmode); + operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); + operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word); +}") + +(define_expand "spe_abstf2_tst" + [(set (match_operand:TF 0 "gpc_reg_operand" "=f") + (match_operand:TF 1 "gpc_reg_operand" "f")) + (set (match_dup 3) (match_dup 5)) + (set (match_dup 5) (abs:DF (match_dup 5))) + (set (match_dup 4) (unspec:CCFP [(compare:CCFP (match_dup 3) + (match_dup 5))] TSTDFEQ_GPR)) + (set (pc) (if_then_else (eq (match_dup 4) (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc))) + (set (match_dup 6) (neg:DF (match_dup 6)))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128" + " +{ + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + operands[3] = gen_reg_rtx (DFmode); + operands[4] = gen_reg_rtx (CCFPmode); + operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); + operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word); +}") + +;; Vector move instructions. + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "") + (match_operand:V2SI 1 "any_operand" ""))] + "TARGET_SPE" + "{ rs6000_emit_move (operands[0], operands[1], V2SImode); DONE; }") + +(define_insn "*movv2si_internal" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=m,r,r,r") + (match_operand:V2SI 1 "input_operand" "r,m,r,W"))] + "TARGET_SPE + && (gpc_reg_operand (operands[0], V2SImode) + || gpc_reg_operand (operands[1], V2SImode))" + "* +{ + switch (which_alternative) + { + case 0: return \"evstdd%X0 %1,%y0\"; + case 1: return \"evldd%X1 %0,%y1\"; + case 2: return \"evor %0,%1,%1\"; + case 3: return output_vec_const_move (operands); + default: gcc_unreachable (); + } +}" + [(set_attr "type" "vecload,vecstore,*,*") + (set_attr "length" "*,*,*,12")]) + +(define_split + [(set (match_operand:V2SI 0 "register_operand" "") + (match_operand:V2SI 1 "zero_constant" ""))] + "TARGET_SPE && reload_completed" + [(set (match_dup 0) + (xor:V2SI (match_dup 0) (match_dup 0)))] + "") + +(define_expand "movv1di" + [(set (match_operand:V1DI 0 "nonimmediate_operand" "") + (match_operand:V1DI 1 "any_operand" ""))] + "TARGET_SPE" + "{ rs6000_emit_move (operands[0], operands[1], V1DImode); DONE; }") + +(define_insn "*movv1di_internal" + [(set (match_operand:V1DI 0 "nonimmediate_operand" "=m,r,r,r") + (match_operand:V1DI 1 "input_operand" "r,m,r,W"))] + "TARGET_SPE + && (gpc_reg_operand (operands[0], V1DImode) + || gpc_reg_operand (operands[1], V1DImode))" + "@ + evstdd%X0 %1,%y0 + evldd%X1 %0,%y1 + evor %0,%1,%1 + evxor %0,%0,%0" + [(set_attr "type" "vecload,vecstore,*,*") + (set_attr "length" "*,*,*,*")]) + +(define_expand "movv4hi" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "") + (match_operand:V4HI 1 "any_operand" ""))] + "TARGET_SPE" + "{ rs6000_emit_move (operands[0], operands[1], V4HImode); DONE; }") + +(define_insn "*movv4hi_internal" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=m,r,r,r") + (match_operand:V4HI 1 "input_operand" "r,m,r,W"))] + "TARGET_SPE + && (gpc_reg_operand (operands[0], V4HImode) + || gpc_reg_operand (operands[1], V4HImode))" + "@ + evstdd%X0 %1,%y0 + evldd%X1 %0,%y1 + evor %0,%1,%1 + evxor %0,%0,%0" + [(set_attr "type" "vecload")]) + +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "any_operand" ""))] + "TARGET_SPE || TARGET_PAIRED_FLOAT" + "{ rs6000_emit_move (operands[0], operands[1], V2SFmode); DONE; }") + +(define_insn "*movv2sf_internal" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,r,r,r") + (match_operand:V2SF 1 "input_operand" "r,m,r,W"))] + "TARGET_SPE + && (gpc_reg_operand (operands[0], V2SFmode) + || gpc_reg_operand (operands[1], V2SFmode))" + "@ + evstdd%X0 %1,%y0 + evldd%X1 %0,%y1 + evor %0,%1,%1 + evxor %0,%0,%0" + [(set_attr "type" "vecload,vecstore,*,*") + (set_attr "length" "*,*,*,*")]) + +;; End of vector move instructions. + +(define_insn "spe_evmwhssfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 702)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhssfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhssmaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 703)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhssmaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 704)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhsmfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 705)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhsmiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhusiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 706)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhusiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhumiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 707)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhumiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhssfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 708)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhssfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhssian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 709)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhssian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 710)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhsmfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhsmian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 711)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhsmian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhumian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 713)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhumian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgssfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 714)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgssfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgsmfaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 715)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgsmfaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgsmiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 716)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgsmiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgumiaa" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 717)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgumiaa %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgssfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 718)) + (clobber (reg:SI SPEFSCR_REGNO)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgssfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgsmfan" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 719)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgsmfan %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgsmian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 720)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgsmian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_evmwhgumian" + [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") + (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")] 721)) + (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI [(const_int 0)] 0))] + "TARGET_SPE" + "evmwhgumian %0,%1,%2" + [(set_attr "type" "veccomplex") + (set_attr "length" "4")]) + +(define_insn "spe_mtspefscr" + [(set (reg:SI SPEFSCR_REGNO) + (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + 722))] + "TARGET_SPE" + "mtspefscr %0" + [(set_attr "type" "vecsimple")]) + +(define_insn "spe_mfspefscr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(reg:SI SPEFSCR_REGNO)] 723))] + "TARGET_SPE" + "mfspefscr %0" + [(set_attr "type" "vecsimple")]) + +;; Flip the GT bit. +(define_insn "e500_flip_gt_bit" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(match_operand:CCFP 1 "cc_reg_operand" "y")] 999))] + "!TARGET_FPRS && TARGET_HARD_FLOAT" + "* +{ + return output_e500_flip_gt_bit (operands[0], operands[1]); +}" + [(set_attr "type" "cr_logical")]) + +;; MPC8540 single-precision FP instructions on GPRs. +;; We have 2 variants for each. One for IEEE compliant math and one +;; for non IEEE compliant math. + +(define_insn "cmpsfeq_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r"))] + 1000))] + "TARGET_HARD_FLOAT && !TARGET_FPRS + && !(flag_finite_math_only && !flag_trapping_math)" + "efscmpeq %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "tstsfeq_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r"))] + 1001))] + "TARGET_HARD_FLOAT && !TARGET_FPRS + && flag_finite_math_only && !flag_trapping_math" + "efststeq %0,%1,%2" + [(set_attr "type" "veccmpsimple")]) + +(define_insn "cmpsfgt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r"))] + 1002))] + "TARGET_HARD_FLOAT && !TARGET_FPRS + && !(flag_finite_math_only && !flag_trapping_math)" + "efscmpgt %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "tstsfgt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r"))] + 1003))] + "TARGET_HARD_FLOAT && !TARGET_FPRS + && flag_finite_math_only && !flag_trapping_math" + "efststgt %0,%1,%2" + [(set_attr "type" "veccmpsimple")]) + +(define_insn "cmpsflt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r"))] + 1004))] + "TARGET_HARD_FLOAT && !TARGET_FPRS + && !(flag_finite_math_only && !flag_trapping_math)" + "efscmplt %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "tstsflt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r") + (match_operand:SF 2 "gpc_reg_operand" "r"))] + 1005))] + "TARGET_HARD_FLOAT && !TARGET_FPRS + && flag_finite_math_only && !flag_trapping_math" + "efststlt %0,%1,%2" + [(set_attr "type" "veccmpsimple")]) + +;; Same thing, but for double-precision. + +(define_insn "cmpdfeq_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r"))] + CMPDFEQ_GPR))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE + && !(flag_finite_math_only && !flag_trapping_math)" + "efdcmpeq %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "tstdfeq_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r"))] + TSTDFEQ_GPR))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE + && flag_finite_math_only && !flag_trapping_math" + "efdtsteq %0,%1,%2" + [(set_attr "type" "veccmpsimple")]) + +(define_insn "cmpdfgt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r"))] + CMPDFGT_GPR))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE + && !(flag_finite_math_only && !flag_trapping_math)" + "efdcmpgt %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "tstdfgt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r"))] + TSTDFGT_GPR))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE + && flag_finite_math_only && !flag_trapping_math" + "efdtstgt %0,%1,%2" + [(set_attr "type" "veccmpsimple")]) + +(define_insn "cmpdflt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r"))] + CMPDFLT_GPR))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE + && !(flag_finite_math_only && !flag_trapping_math)" + "efdcmplt %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "tstdflt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r") + (match_operand:DF 2 "gpc_reg_operand" "r"))] + TSTDFLT_GPR))] + "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE + && flag_finite_math_only && !flag_trapping_math" + "efdtstlt %0,%1,%2" + [(set_attr "type" "veccmpsimple")]) + +;; Same thing, but for IBM long double. + +(define_insn "cmptfeq_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r") + (match_operand:TF 2 "gpc_reg_operand" "r"))] + CMPTFEQ_GPR))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128 + && !(flag_finite_math_only && !flag_trapping_math)" + "efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmpeq %0,%L1,%L2" + [(set_attr "type" "veccmp") + (set_attr "length" "12")]) + +(define_insn "tsttfeq_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r") + (match_operand:TF 2 "gpc_reg_operand" "r"))] + TSTTFEQ_GPR))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128 + && flag_finite_math_only && !flag_trapping_math" + "efdtsteq %0,%1,%2\;bng %0,$+8\;efdtsteq %0,%L1,%L2" + [(set_attr "type" "veccmpsimple") + (set_attr "length" "12")]) + +(define_insn "cmptfgt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r") + (match_operand:TF 2 "gpc_reg_operand" "r"))] + CMPTFGT_GPR))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128 + && !(flag_finite_math_only && !flag_trapping_math)" + "efdcmpgt %0,%1,%2\;bgt %0,$+16\;efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmpgt %0,%L1,%L2" + [(set_attr "type" "veccmp") + (set_attr "length" "20")]) + +(define_insn "tsttfgt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r") + (match_operand:TF 2 "gpc_reg_operand" "r"))] + TSTTFGT_GPR))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128 + && flag_finite_math_only && !flag_trapping_math" + "efdtstgt %0,%1,%2\;bgt %0,$+16\;efdtsteq %0,%1,%2\;bng %0,$+8\;efdtstgt %0,%L1,%L2" + [(set_attr "type" "veccmpsimple") + (set_attr "length" "20")]) + +(define_insn "cmptflt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r") + (match_operand:TF 2 "gpc_reg_operand" "r"))] + CMPTFLT_GPR))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128 + && !(flag_finite_math_only && !flag_trapping_math)" + "efdcmplt %0,%1,%2\;bgt %0,$+16\;efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmplt %0,%L1,%L2" + [(set_attr "type" "veccmp") + (set_attr "length" "20")]) + +(define_insn "tsttflt_gpr" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP + [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r") + (match_operand:TF 2 "gpc_reg_operand" "r"))] + TSTTFLT_GPR))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128 + && flag_finite_math_only && !flag_trapping_math" + "efdtstlt %0,%1,%2\;bgt %0,$+16\;efdtsteq %0,%1,%2\;bng %0,$+8\;efdtstlt %0,%L1,%L2" + [(set_attr "type" "veccmpsimple") + (set_attr "length" "20")]) + +;; Like cceq_ior_compare, but compare the GT bits. +(define_insn "e500_cr_ior_compare" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (unspec:CCFP [(match_operand 1 "cc_reg_operand" "y") + (match_operand 2 "cc_reg_operand" "y")] + E500_CR_IOR_COMPARE))] + "TARGET_HARD_FLOAT && !TARGET_FPRS" + "cror 4*%0+gt,4*%1+gt,4*%2+gt" + [(set_attr "type" "cr_logical")]) + +;; Out-of-line prologues and epilogues. +(define_insn "*save_gpregs_spe" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:V2SI 2 "memory_operand" "=m") + (match_operand:V2SI 3 "gpc_reg_operand" "r"))])] + "TARGET_SPE_ABI" + "bl %z1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*restore_gpregs_spe" + [(match_parallel 0 "any_parallel_operand" + [(clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:V2SI 2 "gpc_reg_operand" "=r") + (match_operand:V2SI 3 "memory_operand" "m"))])] + "TARGET_SPE_ABI" + "bl %z1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + +(define_insn "*return_and_restore_gpregs_spe" + [(match_parallel 0 "any_parallel_operand" + [(return) + (clobber (reg:P LR_REGNO)) + (use (match_operand:P 1 "symbol_ref_operand" "s")) + (use (reg:P 11)) + (set (match_operand:V2SI 2 "gpc_reg_operand" "=r") + (match_operand:V2SI 3 "memory_operand" "m"))])] + "TARGET_SPE_ABI" + "b %z1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) diff --git a/gcc/config/powerpcspe/spu2vmx.h b/gcc/config/powerpcspe/spu2vmx.h new file mode 100644 index 000000000000..1f4b4ed91bed --- /dev/null +++ b/gcc/config/powerpcspe/spu2vmx.h @@ -0,0 +1,2415 @@ +/* Cell SPU 2 VMX intrinsics header + Copyright (C) 2007-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SPU2VMX_H_ +#define _SPU2VMX_H_ 1 + +#ifdef __cplusplus + +#ifndef __SPU__ + +#include <si2vmx.h> + +/* spu_absd (absolute difference) + * ======== + */ +static __inline vec_uchar16 spu_absd(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_absdb((qword)(a), (qword)(b)))); + +} + + +/* spu_add + * ======= + */ +static __inline vec_uint4 spu_add(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_a((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_add(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_a((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_add(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_ah((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_add(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_ah((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_add(vec_uint4 a, unsigned int b) +{ + return ((vec_uint4)(si_ai((qword)(a), (int)(b)))); +} + +static __inline vec_int4 spu_add(vec_int4 a, int b) +{ + return ((vec_int4)(si_ai((qword)(a), b))); +} + +static __inline vec_ushort8 spu_add(vec_ushort8 a, unsigned short b) +{ + return ((vec_ushort8)(si_ahi((qword)(a), (short)(b)))); +} + +static __inline vec_short8 spu_add(vec_short8 a, short b) +{ + return ((vec_short8)(si_ahi((qword)(a), b))); +} + +static __inline vec_float4 spu_add(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_fa((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_add(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_dfa((qword)(a), (qword)(b)))); +} + + +/* spu_addx + * ======== + */ +static __inline vec_uint4 spu_addx(vec_uint4 a, vec_uint4 b, vec_uint4 c) +{ + return ((vec_uint4)(si_addx((qword)(a), (qword)(b), (qword)(c)))); +} + +static __inline vec_int4 spu_addx(vec_int4 a, vec_int4 b, vec_int4 c) +{ + return ((vec_int4)(si_addx((qword)(a), (qword)(b), (qword)(c)))); +} + + +/* spu_and + * ======= + */ +static __inline vec_uchar16 spu_and(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_char16 spu_and(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_and(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_and(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_and(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_and(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_float4 spu_and(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_ullong2 spu_and(vec_ullong2 a, vec_ullong2 b) +{ + return ((vec_ullong2)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_llong2 spu_and(vec_llong2 a, vec_llong2 b) +{ + return ((vec_llong2)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_and(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_and((qword)(a), (qword)(b)))); +} + +static __inline vec_uchar16 spu_and(vec_uchar16 a, unsigned char b) +{ + return ((vec_uchar16)(si_andbi((qword)(a), (signed char)(b)))); +} + + +static __inline vec_char16 spu_and(vec_char16 a, signed char b) +{ + return ((vec_char16)(si_andbi((qword)(a), b))); +} + +static __inline vec_ushort8 spu_and(vec_ushort8 a, unsigned short b) +{ + return ((vec_ushort8)(si_andhi((qword)(a), (signed short)(b)))); +} + +static __inline vec_short8 spu_and(vec_short8 a, signed short b) +{ + return ((vec_short8)(si_andhi((qword)(a), b))); +} + +static __inline vec_uint4 spu_and(vec_uint4 a, unsigned int b) +{ + return ((vec_uint4)(si_andi((qword)(a), (signed int)(b)))); +} + +static __inline vec_int4 spu_and(vec_int4 a, signed int b) +{ + return ((vec_int4)(si_andi((qword)(a), b))); +} + + +/* spu_andc + * ======== + */ +#define spu_andc(_a, _b) vec_andc(_a, _b) + + +/* spu_avg + * ======= + */ +#define spu_avg(_a, _b) vec_avg(_a, _b) + + +/* spu_bisled + * spu_bisled_d + * spu_bisled_e + * ============ + */ +#define spu_bisled(_func) /* not mappable */ +#define spu_bisled_d(_func) /* not mappable */ +#define spu_bisled_e(_func) /* not mappable */ + +/* spu_cmpabseq + * ============ + */ +static __inline vec_uint4 spu_cmpabseq(vec_float4 a, vec_float4 b) +{ + return ((vec_uint4)(si_fcmeq((qword)(a), (qword)(b)))); + +} + +static __inline vec_ullong2 spu_cmpabseq(vec_double2 a, vec_double2 b) +{ + return ((vec_ullong2)(si_dfcmeq((qword)(a), (qword)(b)))); +} + + +/* spu_cmpabsgt + * ============ + */ +static __inline vec_uint4 spu_cmpabsgt(vec_float4 a, vec_float4 b) +{ + return ((vec_uint4)(si_fcmgt((qword)(a), (qword)(b)))); +} + +static __inline vec_ullong2 spu_cmpabsgt(vec_double2 a, vec_double2 b) +{ + return ((vec_ullong2)(si_dfcmgt((qword)(a), (qword)(b)))); +} + + +/* spu_cmpeq + * ======== + */ +static __inline vec_uchar16 spu_cmpeq(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_ceqb((qword)(a), (qword)(b)))); +} + +static __inline vec_uchar16 spu_cmpeq(vec_char16 a, vec_char16 b) +{ + return ((vec_uchar16)(si_ceqb((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_cmpeq(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_ceqh((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_cmpeq(vec_short8 a, vec_short8 b) +{ + return ((vec_ushort8)(si_ceqh((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_cmpeq(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_ceq((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_cmpeq(vec_int4 a, vec_int4 b) +{ + return ((vec_uint4)(si_ceq((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_cmpeq(vec_float4 a, vec_float4 b) +{ + return ((vec_uint4)(si_fceq((qword)(a), (qword)(b)))); +} + +static __inline vec_uchar16 spu_cmpeq(vec_uchar16 a, unsigned char b) +{ + return ((vec_uchar16)(si_ceqbi((qword)(a), (signed char)(b)))); +} + +static __inline vec_uchar16 spu_cmpeq(vec_char16 a, signed char b) +{ + return ((vec_uchar16)(si_ceqbi((qword)(a), b))); +} + +static __inline vec_ushort8 spu_cmpeq(vec_ushort8 a, unsigned short b) +{ + return ((vec_ushort8)(si_ceqhi((qword)(a), (signed short)(b)))); +} + +static __inline vec_ushort8 spu_cmpeq(vec_short8 a, signed short b) +{ + return ((vec_ushort8)(si_ceqhi((qword)(a), b))); +} + +static __inline vec_uint4 spu_cmpeq(vec_uint4 a, unsigned int b) +{ + return ((vec_uint4)(si_ceqi((qword)(a), (signed int)(b)))); +} + +static __inline vec_uint4 spu_cmpeq(vec_int4 a, signed int b) +{ + return ((vec_uint4)(si_ceqi((qword)(a), b))); +} + +static __inline vec_ullong2 spu_cmpeq(vec_double2 a, vec_double2 b) +{ + return ((vec_ullong2)(si_dfceq((qword)(a), (qword)(b)))); +} + + +/* spu_cmpgt + * ======== + */ +static __inline vec_uchar16 spu_cmpgt(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_clgtb((qword)(a), (qword)(b)))); +} + +static __inline vec_uchar16 spu_cmpgt(vec_char16 a, vec_char16 b) +{ + return ((vec_uchar16)(si_cgtb((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_cmpgt(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_clgth((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_cmpgt(vec_short8 a, vec_short8 b) +{ + return ((vec_ushort8)(si_cgth((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_cmpgt(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_clgt((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_cmpgt(vec_int4 a, vec_int4 b) +{ + return ((vec_uint4)(si_cgt((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_cmpgt(vec_float4 a, vec_float4 b) +{ + return ((vec_uint4)(si_fcgt((qword)(a), (qword)(b)))); +} + +static __inline vec_uchar16 spu_cmpgt(vec_uchar16 a, unsigned char b) +{ + return ((vec_uchar16)(si_clgtbi((qword)(a), b))); +} + +static __inline vec_uchar16 spu_cmpgt(vec_char16 a, signed char b) +{ + return ((vec_uchar16)(si_cgtbi((qword)(a), b))); +} + +static __inline vec_ushort8 spu_cmpgt(vec_ushort8 a, unsigned short b) +{ + return ((vec_ushort8)(si_clgthi((qword)(a), b))); +} + +static __inline vec_ushort8 spu_cmpgt(vec_short8 a, signed short b) +{ + return ((vec_ushort8)(si_cgthi((qword)(a), b))); +} + +static __inline vec_uint4 spu_cmpgt(vec_uint4 a, unsigned int b) +{ + return ((vec_uint4)(si_clgti((qword)(a), b))); +} + +static __inline vec_uint4 spu_cmpgt(vec_int4 a, signed int b) +{ + return ((vec_uint4)(si_cgti((qword)(a), b))); +} + +static __inline vec_ullong2 spu_cmpgt(vec_double2 a, vec_double2 b) +{ + return ((vec_ullong2)(si_dfcgt((qword)(a), (qword)(b)))); +} + + +/* spu_cntb + * ======== + */ +static __inline vec_uchar16 spu_cntb(vec_uchar16 a) +{ + return ((vec_uchar16)(si_cntb((qword)(a)))); +} + + +static __inline vec_uchar16 spu_cntb(vec_char16 a) +{ + return ((vec_uchar16)(si_cntb((qword)(a)))); +} + +/* spu_cntlz + * ========= + */ +static __inline vec_uint4 spu_cntlz(vec_uint4 a) +{ + return ((vec_uint4)(si_clz((qword)(a)))); +} + +static __inline vec_uint4 spu_cntlz(vec_int4 a) +{ + return ((vec_uint4)(si_clz((qword)(a)))); +} + +static __inline vec_uint4 spu_cntlz(vec_float4 a) +{ + return ((vec_uint4)(si_clz((qword)(a)))); +} + +/* spu_testsv + * ========== + */ +static __inline vec_ullong2 spu_testsv(vec_double2 a, char b) +{ + return ((vec_ullong2)(si_dftsv((qword)(a), b))); +} + +/* spu_convtf + * ========== + */ +#define spu_convtf(_a, _b) (vec_ctf(_a, _b)) + +/* spu_convts + * ========== + */ +#define spu_convts(_a, _b) (vec_cts(_a, _b)) + +/* spu_convtu + * ========== + */ +#define spu_convtu(_a, _b) (vec_ctu(_a, _b)) + + +/* spu_dsync + * ======== + */ +#define spu_dsync() + +/* spu_eqv + * ======= + */ +static __inline vec_uchar16 spu_eqv(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_char16 spu_eqv(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_eqv(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_eqv(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_eqv(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_eqv(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_float4 spu_eqv(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_ullong2 spu_eqv(vec_ullong2 a, vec_ullong2 b) +{ + return ((vec_ullong2)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_llong2 spu_eqv(vec_llong2 a, vec_llong2 b) +{ + return ((vec_llong2)(si_eqv((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_eqv(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_eqv((qword)(a), (qword)(b)))); +} + +/* spu_extend + * ======== + */ +static __inline vec_short8 spu_extend(vec_char16 a) +{ + return ((vec_short8)(si_xsbh((qword)(a)))); +} + + +static __inline vec_int4 spu_extend(vec_short8 a) +{ + return ((vec_int4)(si_xshw((qword)(a)))); +} + +static __inline vec_llong2 spu_extend(vec_int4 a) +{ + return ((vec_llong2)(si_xswd((qword)(a)))); +} + + +static __inline vec_double2 spu_extend(vec_float4 a) +{ + return ((vec_double2)(si_fesd((qword)(a)))); +} + + +/* spu_extract + * ======== + */ +static __inline unsigned char spu_extract(vec_uchar16 a, int element) +{ + union { + vec_uchar16 v; + unsigned char c[16]; + } in; + + in.v = a; + return (in.c[element & 15]); +} + +static __inline signed char spu_extract(vec_char16 a, int element) +{ + union { + vec_char16 v; + signed char c[16]; + } in; + + in.v = a; + return (in.c[element & 15]); +} + +static __inline unsigned short spu_extract(vec_ushort8 a, int element) +{ + union { + vec_ushort8 v; + unsigned short s[8]; + } in; + + in.v = a; + return (in.s[element & 7]); +} + +static __inline signed short spu_extract(vec_short8 a, int element) +{ + union { + vec_short8 v; + signed short s[8]; + } in; + + in.v = a; + return (in.s[element & 7]); +} + +static __inline unsigned int spu_extract(vec_uint4 a, int element) +{ + union { + vec_uint4 v; + unsigned int i[4]; + } in; + + in.v = a; + return (in.i[element & 3]); +} + +static __inline signed int spu_extract(vec_int4 a, int element) +{ + union { + vec_int4 v; + signed int i[4]; + } in; + + in.v = a; + return (in.i[element & 3]); +} + +static __inline float spu_extract(vec_float4 a, int element) +{ + union { + vec_float4 v; + float f[4]; + } in; + + in.v = a; + return (in.f[element & 3]); +} + +static __inline unsigned long long spu_extract(vec_ullong2 a, int element) +{ + union { + vec_ullong2 v; + unsigned long long l[2]; + } in; + + in.v = a; + return (in.l[element & 1]); +} + +static __inline signed long long spu_extract(vec_llong2 a, int element) +{ + union { + vec_llong2 v; + signed long long l[2]; + } in; + + in.v = a; + return (in.l[element & 1]); +} + +static __inline double spu_extract(vec_double2 a, int element) +{ + union { + vec_double2 v; + double d[2]; + } in; + + in.v = a; + return (in.d[element & 1]); +} + +/* spu_gather + * ======== + */ +static __inline vec_uint4 spu_gather(vec_uchar16 a) +{ + return ((vec_uint4)(si_gbb((qword)(a)))); +} + + +static __inline vec_uint4 spu_gather(vec_char16 a) +{ + return ((vec_uint4)(si_gbb((qword)(a)))); +} + +static __inline vec_uint4 spu_gather(vec_ushort8 a) +{ + return ((vec_uint4)(si_gbh((qword)(a)))); +} + +static __inline vec_uint4 spu_gather(vec_short8 a) +{ + return ((vec_uint4)(si_gbh((qword)(a)))); +} + + +static __inline vec_uint4 spu_gather(vec_uint4 a) +{ + return ((vec_uint4)(si_gb((qword)(a)))); +} + +static __inline vec_uint4 spu_gather(vec_int4 a) +{ + return ((vec_uint4)(si_gb((qword)(a)))); +} + +static __inline vec_uint4 spu_gather(vec_float4 a) +{ + return ((vec_uint4)(si_gb((qword)(a)))); +} + +/* spu_genb + * ======== + */ +static __inline vec_uint4 spu_genb(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_bg((qword)(b), (qword)(a)))); +} + +static __inline vec_int4 spu_genb(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_bg((qword)(b), (qword)(a)))); +} + +/* spu_genbx + * ========= + */ +static __inline vec_uint4 spu_genbx(vec_uint4 a, vec_uint4 b, vec_uint4 c) +{ + return ((vec_uint4)(si_bgx((qword)(b), (qword)(a), (qword)(c)))); +} + +static __inline vec_int4 spu_genbx(vec_int4 a, vec_int4 b, vec_int4 c) +{ + return ((vec_int4)(si_bgx((qword)(b), (qword)(a), (qword)(c)))); +} + + +/* spu_genc + * ======== + */ +static __inline vec_uint4 spu_genc(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_cg((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_genc(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_cg((qword)(a), (qword)(b)))); +} + +/* spu_gencx + * ========= + */ +static __inline vec_uint4 spu_gencx(vec_uint4 a, vec_uint4 b, vec_uint4 c) +{ + return ((vec_uint4)(si_cgx((qword)(a), (qword)(b), (qword)(c)))); +} + +static __inline vec_int4 spu_gencx(vec_int4 a, vec_int4 b, vec_int4 c) +{ + return ((vec_int4)(si_cgx((qword)(a), (qword)(b), (qword)(c)))); +} + + +/* spu_hcmpeq + * ======== + */ +#define spu_hcmpeq(_a, _b) if (_a == _b) { SPU_HALT_ACTION; }; + + +/* spu_hcmpgt + * ======== + */ +#define spu_hcmpgt(_a, _b) if (_a > _b) { SPU_HALT_ACTION; }; + + +/* spu_idisable + * ============ + */ +#define spu_idisable() SPU_UNSUPPORTED_ACTION + + +/* spu_ienable + * =========== + */ +#define spu_ienable() SPU_UNSUPPORTED_ACTION + + +/* spu_insert + * ======== + */ +static __inline vec_uchar16 spu_insert(unsigned char a, vec_uchar16 b, int element) +{ + union { + vec_uchar16 v; + unsigned char c[16]; + } in; + + in.v = b; + in.c[element & 15] = a; + return (in.v); +} + +static __inline vec_char16 spu_insert(signed char a, vec_char16 b, int element) +{ + return ((vec_char16)spu_insert((unsigned char)(a), (vec_uchar16)(b), element)); +} + +static __inline vec_ushort8 spu_insert(unsigned short a, vec_ushort8 b, int element) +{ + union { + vec_ushort8 v; + unsigned short s[8]; + } in; + + in.v = b; + in.s[element & 7] = a; + return (in.v); +} + +static __inline vec_short8 spu_insert(signed short a, vec_short8 b, int element) +{ + return ((vec_short8)spu_insert((unsigned short)(a), (vec_ushort8)(b), element)); +} + +static __inline vec_uint4 spu_insert(unsigned int a, vec_uint4 b, int element) +{ + union { + vec_uint4 v; + unsigned int i[4]; + } in; + + in.v = b; + in.i[element & 3] = a; + return (in.v); +} + +static __inline vec_int4 spu_insert(signed int a, vec_int4 b, int element) +{ + return ((vec_int4)spu_insert((unsigned int)(a), (vec_uint4)(b), element)); +} + +static __inline vec_float4 spu_insert(float a, vec_float4 b, int element) +{ + union { + vec_float4 v; + float f[4]; + } in; + + in.v = b; + in.f[element & 3] = a; + return (in.v); +} + +static __inline vec_ullong2 spu_insert(unsigned long long a, vec_ullong2 b, int element) +{ + union { + vec_ullong2 v; + unsigned long long l[2]; + } in; + + in.v = b; + in.l[element & 1] = a; + return (in.v); +} + +static __inline vec_llong2 spu_insert(signed long long a, vec_llong2 b, int element) +{ + return ((vec_llong2)spu_insert((unsigned long long)(a), (vec_ullong2)(b), element)); +} + +static __inline vec_double2 spu_insert(double a, vec_double2 b, int element) +{ + union { + vec_double2 v; + double d[2]; + } in; + + in.v = b; + in.d[element & 1] = a; + return (in.v); +} + + +/* spu_madd + * ======== + */ +static __inline vec_int4 spu_madd(vec_short8 a, vec_short8 b, vec_int4 c) +{ + return ((vec_int4)(si_mpya((qword)(a), (qword)(b), (qword)(c)))); +} + +static __inline vec_float4 spu_madd(vec_float4 a, vec_float4 b, vec_float4 c) +{ + return ((vec_float4)(si_fma((qword)(a), (qword)(b), (qword)(c)))); +} + +static __inline vec_double2 spu_madd(vec_double2 a, vec_double2 b, vec_double2 c) +{ + return ((vec_double2)(si_dfma((qword)(a), (qword)(b), (qword)(c)))); +} + + +/* spu_maskb + * ======== + */ +#define spu_maskb(_a) (vec_uchar16)(si_fsmb(si_from_int((int)(_a)))) + +/* spu_maskh + * ======== + */ +#define spu_maskh(_a) (vec_ushort8)(si_fsmh(si_from_int((int)(_a)))) + + +/* spu_maskw + * ======== + */ +#define spu_maskw(_a) (vec_uint4)(si_fsm(si_from_int((int)(_a)))) + + +/* spu_mfcdma32 + * ======== + */ +#define spu_mfcdma32(_ls, _ea, _size, _tagid, _cmd) + + +/* spu_mfcdma64 + * ======== + */ +#define spu_mfcdma64(_ls, _eahi, _ealow, _size, _tagid, _cmd) + +/* spu_mfcstat + * ======== + */ +#define spu_mfcstat(_type) 0xFFFFFFFF + + + +/* spu_mffpscr + * =========== + */ +#define spu_mffpscr() (vec_uint4)(si_fscrrd()) + + +/* spu_mfspr + * ======== + */ + +#define spu_mfspr(_reg) si_to_uint(si_mfspr(_reg)) + + + +/* spu_mhhadd + * ========== + */ +static __inline vec_int4 spu_mhhadd(vec_short8 a, vec_short8 b, vec_int4 c) +{ + return ((vec_int4)(si_mpyhha((qword)(a), (qword)(b), (qword)(c)))); +} + + +static __inline vec_uint4 spu_mhhadd(vec_ushort8 a, vec_ushort8 b, vec_uint4 c) +{ + return ((vec_uint4)(si_mpyhhau((qword)(a), (qword)(b), (qword)(c)))); +} + + +/* spu_msub + * ======== + */ +static __inline vec_float4 spu_msub(vec_float4 a, vec_float4 b, vec_float4 c) +{ + return ((vec_float4)(si_fms((qword)(a), (qword)(b), (qword)(c)))); +} + +static __inline vec_double2 spu_msub(vec_double2 a, vec_double2 b, vec_double2 c) +{ + return ((vec_double2)(si_dfms((qword)(a), (qword)(b), (qword)(c)))); +} + + +/* spu_mtfpscr + * =========== + */ +#define spu_mtfpscr(_a) + + +/* spu_mtspr + * ======== + */ +#define spu_mtspr(_reg, _a) + + +/* spu_mul + * ======== + */ +static __inline vec_float4 spu_mul(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_fm((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_mul(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_dfm((qword)(a), (qword)(b)))); +} + + +/* spu_mulh + * ======== + */ +static __inline vec_int4 spu_mulh(vec_short8 a, vec_short8 b) +{ + return ((vec_int4)(si_mpyh((qword)(a), (qword)(b)))); +} + +/* spu_mule + * ========= + */ +#define spu_mule(_a, _b) vec_mule(_a, _b) + + + +/* spu_mulo + * ======== + */ +static __inline vec_int4 spu_mulo(vec_short8 a, vec_short8 b) +{ + return ((vec_int4)(si_mpy((qword)(a), (qword)(b)))); +} + + +static __inline vec_uint4 spu_mulo(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_uint4)(si_mpyu((qword)(a), (qword)(b)))); +} + + +static __inline vec_int4 spu_mulo(vec_short8 a, short b) +{ + return ((vec_int4)(si_mpyi((qword)(a), b))); +} + +static __inline vec_uint4 spu_mulo(vec_ushort8 a, unsigned short b) +{ + return ((vec_uint4)(si_mpyui((qword)(a), b))); +} + + +/* spu_mulsr + * ========= + */ +static __inline vec_int4 spu_mulsr(vec_short8 a, vec_short8 b) +{ + return ((vec_int4)(si_mpys((qword)(a), (qword)(b)))); +} + + +/* spu_nand + * ======== + */ +static __inline vec_uchar16 spu_nand(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_char16 spu_nand(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_nand(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_nand(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_nand(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_nand(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_float4 spu_nand(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_ullong2 spu_nand(vec_ullong2 a, vec_ullong2 b) +{ + return ((vec_ullong2)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_llong2 spu_nand(vec_llong2 a, vec_llong2 b) +{ + return ((vec_llong2)(si_nand((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_nand(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_nand((qword)(a), (qword)(b)))); +} + + +/* spu_nmadd + * ========= + */ +static __inline vec_double2 spu_nmadd(vec_double2 a, vec_double2 b, vec_double2 c) +{ + return ((vec_double2)(si_dfnma((qword)(a), (qword)(b), (qword)(c)))); +} + + +/* spu_nmsub + * ========= + */ +static __inline vec_float4 spu_nmsub(vec_float4 a, vec_float4 b, vec_float4 c) +{ + return ((vec_float4)(si_fnms((qword)(a), (qword)(b), (qword)(c)))); +} + +static __inline vec_double2 spu_nmsub(vec_double2 a, vec_double2 b, vec_double2 c) +{ + return ((vec_double2)(si_dfnms((qword)(a), (qword)(b), (qword)(c)))); +} + + +/* spu_nor + * ======= + */ +#define spu_nor(_a, _b) vec_nor(_a, _b) + + +/* spu_or + * ====== + */ +static __inline vec_uchar16 spu_or(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_char16 spu_or(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_or(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_or(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_or(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_or(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_float4 spu_or(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_ullong2 spu_or(vec_ullong2 a, vec_ullong2 b) +{ + return ((vec_ullong2)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_llong2 spu_or(vec_llong2 a, vec_llong2 b) +{ + return ((vec_llong2)(si_or((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_or(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_or((qword)(a), (qword)(b)))); +} + + +static __inline vec_uchar16 spu_or(vec_uchar16 a, unsigned char b) +{ + return ((vec_uchar16)(si_orbi((qword)(a), b))); +} + +static __inline vec_char16 spu_or(vec_char16 a, signed char b) +{ + return ((vec_char16)(si_orbi((qword)(a), (unsigned char)(b)))); +} + +static __inline vec_ushort8 spu_or(vec_ushort8 a, unsigned short b) +{ + return ((vec_ushort8)(si_orhi((qword)(a), b))); +} + +static __inline vec_short8 spu_or(vec_short8 a, signed short b) +{ + return ((vec_short8)(si_orhi((qword)(a), (unsigned short)(b)))); +} + +static __inline vec_uint4 spu_or(vec_uint4 a, unsigned int b) +{ + return ((vec_uint4)(si_ori((qword)(a), b))); +} + +static __inline vec_int4 spu_or(vec_int4 a, signed int b) +{ + return ((vec_int4)(si_ori((qword)(a), (unsigned int)(b)))); +} + + +/* spu_orc + * ======= + */ +#define spu_orc(_a, _b) vec_or(_a, vec_nor(_b, _b)) + + +/* spu_orx + * ======= + */ +static __inline vec_uint4 spu_orx(vec_uint4 a) +{ + return ((vec_uint4)(si_orx((qword)(a)))); +} + +static __inline vec_int4 spu_orx(vec_int4 a) +{ + return ((vec_int4)(si_orx((qword)(a)))); +} + + +/* spu_promote + * =========== + */ +static __inline vec_uchar16 spu_promote(unsigned char a, int element) +{ + union { + vec_uchar16 v; + unsigned char c[16]; + } in; + + in.c[element & 15] = a; + return (in.v); +} + +static __inline vec_char16 spu_promote(signed char a, int element) +{ + union { + vec_char16 v; + signed char c[16]; + } in; + + in.c[element & 15] = a; + return (in.v); +} + +static __inline vec_ushort8 spu_promote(unsigned short a, int element) +{ + union { + vec_ushort8 v; + unsigned short s[8]; + } in; + + in.s[element & 7] = a; + return (in.v); +} + +static __inline vec_short8 spu_promote(signed short a, int element) +{ + union { + vec_short8 v; + signed short s[8]; + } in; + + in.s[element & 7] = a; + return (in.v); +} + +static __inline vec_uint4 spu_promote(unsigned int a, int element) +{ + union { + vec_uint4 v; + unsigned int i[4]; + } in; + + in.i[element & 3] = a; + return (in.v); +} + +static __inline vec_int4 spu_promote(signed int a, int element) +{ + union { + vec_int4 v; + signed int i[4]; + } in; + + in.i[element & 3] = a; + return (in.v); +} + +static __inline vec_float4 spu_promote(float a, int element) +{ + union { + vec_float4 v; + float f[4]; + } in; + + in.f[element & 3] = a; + return (in.v); +} + +static __inline vec_ullong2 spu_promote(unsigned long long a, int element) +{ + union { + vec_ullong2 v; + unsigned long long l[2]; + } in; + + in.l[element & 1] = a; + return (in.v); +} + +static __inline vec_llong2 spu_promote(signed long long a, int element) +{ + union { + vec_llong2 v; + signed long long l[2]; + } in; + + in.l[element & 1] = a; + return (in.v); +} + +static __inline vec_double2 spu_promote(double a, int element) +{ + union { + vec_double2 v; + double d[2]; + } in; + + in.d[element & 1] = a; + return (in.v); +} + +/* spu_re + * ====== + */ +#define spu_re(_a) vec_re(_a) + + +/* spu_readch + * ========== + */ +#define spu_readch(_channel) 0 /* not mappable */ + + +/* spu_readchcnt + * ============= + */ +#define spu_readchcnt(_channel) 0 /* not mappable */ + + +/* spu_readchqw + * ============ + */ +#define spu_readchqw(_channel) __extension__ ({ vec_uint4 result = { 0, 0, 0, 0 }; result; }) + +/* spu_rl + * ====== + */ +static __inline vec_ushort8 spu_rl(vec_ushort8 a, vec_short8 b) +{ + return ((vec_ushort8)(si_roth((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_rl(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_roth((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_rl(vec_uint4 a, vec_int4 b) +{ + return ((vec_uint4)(si_rot((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_rl(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_rot((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_rl(vec_ushort8 a, int b) +{ + return ((vec_ushort8)(si_rothi((qword)(a), b))); +} + +static __inline vec_short8 spu_rl(vec_short8 a, int b) +{ + return ((vec_short8)(si_rothi((qword)(a), b))); +} + +static __inline vec_uint4 spu_rl(vec_uint4 a, int b) +{ + return ((vec_uint4)(si_roti((qword)(a), b))); +} + +static __inline vec_int4 spu_rl(vec_int4 a, int b) +{ + return ((vec_int4)(si_roti((qword)(a), b))); +} + + +/* spu_rlmask + * ========== + */ +static __inline vec_ushort8 spu_rlmask(vec_ushort8 a, vec_short8 b) +{ + return ((vec_ushort8)(si_rothm((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_rlmask(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_rothm((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_rlmask(vec_uint4 a, vec_int4 b) +{ + return ((vec_uint4)(si_rotm((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_rlmask(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_rotm((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_rlmask(vec_ushort8 a, int b) +{ + return ((vec_ushort8)(si_rothmi((qword)(a), b))); +} + +static __inline vec_short8 spu_rlmask(vec_short8 a, int b) +{ + return ((vec_short8)(si_rothmi((qword)(a), b))); +} + + +static __inline vec_uint4 spu_rlmask(vec_uint4 a, int b) +{ + return ((vec_uint4)(si_rotmi((qword)(a), b))); +} + +static __inline vec_int4 spu_rlmask(vec_int4 a, int b) +{ + return ((vec_int4)(si_rotmi((qword)(a), b))); +} + +/* spu_rlmaska + * =========== + */ +static __inline vec_short8 spu_rlmaska(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_rotmah((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_rlmaska(vec_ushort8 a, vec_short8 b) +{ + return ((vec_ushort8)(si_rotmah((qword)(a), (qword)(b)))); +} + + +static __inline vec_int4 spu_rlmaska(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_rotma((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_rlmaska(vec_uint4 a, vec_int4 b) +{ + return ((vec_uint4)(si_rotma((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_rlmaska(vec_ushort8 a, int b) +{ + return ((vec_ushort8)(si_rotmahi((qword)(a), b))); +} + +static __inline vec_short8 spu_rlmaska(vec_short8 a, int b) +{ + return ((vec_short8)(si_rotmahi((qword)(a), b))); +} + +static __inline vec_uint4 spu_rlmaska(vec_uint4 a, int b) +{ + return ((vec_uint4)(si_rotmai((qword)(a), b))); +} + +static __inline vec_int4 spu_rlmaska(vec_int4 a, int b) +{ + return ((vec_int4)(si_rotmai((qword)(a), b))); +} + + +/* spu_rlmaskqw + * ============ + */ +static __inline vec_uchar16 spu_rlmaskqw(vec_uchar16 a, int count) +{ + return ((vec_uchar16)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_char16 spu_rlmaskqw(vec_char16 a, int count) +{ + return ((vec_char16)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ushort8 spu_rlmaskqw(vec_ushort8 a, int count) +{ + return ((vec_ushort8)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_short8 spu_rlmaskqw(vec_short8 a, int count) +{ + return ((vec_short8)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_uint4 spu_rlmaskqw(vec_uint4 a, int count) +{ + return ((vec_uint4)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_int4 spu_rlmaskqw(vec_int4 a, int count) +{ + return ((vec_int4)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_float4 spu_rlmaskqw(vec_float4 a, int count) +{ + return ((vec_float4)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ullong2 spu_rlmaskqw(vec_ullong2 a, int count) +{ + return ((vec_ullong2)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_llong2 spu_rlmaskqw(vec_llong2 a, int count) +{ + return ((vec_llong2)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_double2 spu_rlmaskqw(vec_double2 a, int count) +{ + return ((vec_double2)(si_rotqmbi((qword)(a), si_from_int(count)))); +} + +/* spu_rlmaskqwbyte + * ================ + */ +static __inline vec_uchar16 spu_rlmaskqwbyte(vec_uchar16 a, int count) +{ + return ((vec_uchar16)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_char16 spu_rlmaskqwbyte(vec_char16 a, int count) +{ + return ((vec_char16)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_ushort8 spu_rlmaskqwbyte(vec_ushort8 a, int count) +{ + return ((vec_ushort8)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_short8 spu_rlmaskqwbyte(vec_short8 a, int count) +{ + return ((vec_short8)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_uint4 spu_rlmaskqwbyte(vec_uint4 a, int count) +{ + return ((vec_uint4)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_int4 spu_rlmaskqwbyte(vec_int4 a, int count) +{ + return ((vec_int4)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_float4 spu_rlmaskqwbyte(vec_float4 a, int count) +{ + return ((vec_float4)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_ullong2 spu_rlmaskqwbyte(vec_ullong2 a, int count) +{ + return ((vec_ullong2)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_llong2 spu_rlmaskqwbyte(vec_llong2 a, int count) +{ + return ((vec_llong2)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +static __inline vec_double2 spu_rlmaskqwbyte(vec_double2 a, int count) +{ + return ((vec_double2)(si_rotqmby((qword)(a), si_from_int(count)))); +} + +/* spu_rlmaskqwbytebc + * ================== + */ +static __inline vec_uchar16 spu_rlmaskqwbytebc(vec_uchar16 a, int count) +{ + return ((vec_uchar16)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_char16 spu_rlmaskqwbytebc(vec_char16 a, int count) +{ + return ((vec_char16)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ushort8 spu_rlmaskqwbytebc(vec_ushort8 a, int count) +{ + return ((vec_ushort8)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_short8 spu_rlmaskqwbytebc(vec_short8 a, int count) +{ + return ((vec_short8)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_uint4 spu_rlmaskqwbytebc(vec_uint4 a, int count) +{ + return ((vec_uint4)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_int4 spu_rlmaskqwbytebc(vec_int4 a, int count) +{ + return ((vec_int4)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_float4 spu_rlmaskqwbytebc(vec_float4 a, int count) +{ + return ((vec_float4)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ullong2 spu_rlmaskqwbytebc(vec_ullong2 a, int count) +{ + return ((vec_ullong2)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_llong2 spu_rlmaskqwbytebc(vec_llong2 a, int count) +{ + return ((vec_llong2)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_double2 spu_rlmaskqwbytebc(vec_double2 a, int count) +{ + return ((vec_double2)(si_rotqmbybi((qword)(a), si_from_int(count)))); +} + + +/* spu_rlqwbyte + * ============ + */ +static __inline vec_uchar16 spu_rlqwbyte(vec_uchar16 a, int count) +{ + return ((vec_uchar16)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_char16 spu_rlqwbyte(vec_char16 a, int count) +{ + return ((vec_char16)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_ushort8 spu_rlqwbyte(vec_ushort8 a, int count) +{ + return ((vec_ushort8)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_short8 spu_rlqwbyte(vec_short8 a, int count) +{ + return ((vec_short8)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_uint4 spu_rlqwbyte(vec_uint4 a, int count) +{ + return ((vec_uint4)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_int4 spu_rlqwbyte(vec_int4 a, int count) +{ + return ((vec_int4)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_float4 spu_rlqwbyte(vec_float4 a, int count) +{ + return ((vec_float4)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_ullong2 spu_rlqwbyte(vec_ullong2 a, int count) +{ + return ((vec_ullong2)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_llong2 spu_rlqwbyte(vec_llong2 a, int count) +{ + return ((vec_llong2)(si_rotqby((qword)(a), si_from_int(count)))); +} + +static __inline vec_double2 spu_rlqwbyte(vec_double2 a, int count) +{ + return ((vec_double2)(si_rotqby((qword)(a), si_from_int(count)))); +} + + +/* spu_rlqwbytebc + * ============== + */ +static __inline vec_uchar16 spu_rlqwbytebc(vec_uchar16 a, int count) +{ + return ((vec_uchar16)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_char16 spu_rlqwbytebc(vec_char16 a, int count) +{ + return ((vec_char16)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ushort8 spu_rlqwbytebc(vec_ushort8 a, int count) +{ + return ((vec_ushort8)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_short8 spu_rlqwbytebc(vec_short8 a, int count) +{ + return ((vec_short8)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_uint4 spu_rlqwbytebc(vec_uint4 a, int count) +{ + return ((vec_uint4)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_int4 spu_rlqwbytebc(vec_int4 a, int count) +{ + return ((vec_int4)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_float4 spu_rlqwbytebc(vec_float4 a, int count) +{ + return ((vec_float4)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ullong2 spu_rlqwbytebc(vec_ullong2 a, int count) +{ + return ((vec_ullong2)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_llong2 spu_rlqwbytebc(vec_llong2 a, int count) +{ + return ((vec_llong2)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +static __inline vec_double2 spu_rlqwbytebc(vec_double2 a, int count) +{ + return ((vec_double2)(si_rotqbybi((qword)(a), si_from_int(count)))); +} + +/* spu_rlqw + * ======== + */ +static __inline vec_uchar16 spu_rlqw(vec_uchar16 a, int count) +{ + return ((vec_uchar16)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_char16 spu_rlqw(vec_char16 a, int count) +{ + return ((vec_char16)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ushort8 spu_rlqw(vec_ushort8 a, int count) +{ + return ((vec_ushort8)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_short8 spu_rlqw(vec_short8 a, int count) +{ + return ((vec_short8)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_uint4 spu_rlqw(vec_uint4 a, int count) +{ + return ((vec_uint4)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_int4 spu_rlqw(vec_int4 a, int count) +{ + return ((vec_int4)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_float4 spu_rlqw(vec_float4 a, int count) +{ + return ((vec_float4)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_ullong2 spu_rlqw(vec_ullong2 a, int count) +{ + return ((vec_ullong2)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_llong2 spu_rlqw(vec_llong2 a, int count) +{ + return ((vec_llong2)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +static __inline vec_double2 spu_rlqw(vec_double2 a, int count) +{ + return ((vec_double2)(si_rotqbi((qword)(a), si_from_int(count)))); +} + +/* spu_roundtf + * =========== + */ +static __inline vec_float4 spu_roundtf(vec_double2 a) +{ + return ((vec_float4)(si_frds((qword)(a)))); +} + + +/* spu_rsqrte + * ========== + */ +#define spu_rsqrte(_a) vec_rsqrte(_a) + + +/* spu_sel + * ======= + */ +static __inline vec_uchar16 spu_sel(vec_uchar16 a, vec_uchar16 b, vec_uchar16 pattern) +{ + return ((vec_uchar16)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_char16 spu_sel(vec_char16 a, vec_char16 b, vec_uchar16 pattern) +{ + return ((vec_char16)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_ushort8 spu_sel(vec_ushort8 a, vec_ushort8 b, vec_ushort8 pattern) +{ + return ((vec_ushort8)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_short8 spu_sel(vec_short8 a, vec_short8 b, vec_ushort8 pattern) +{ + return ((vec_short8)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_uint4 spu_sel(vec_uint4 a, vec_uint4 b, vec_uint4 pattern) +{ + return ((vec_uint4)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_int4 spu_sel(vec_int4 a, vec_int4 b, vec_uint4 pattern) +{ + return ((vec_int4)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_float4 spu_sel(vec_float4 a, vec_float4 b, vec_uint4 pattern) +{ + return ((vec_float4)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_ullong2 spu_sel(vec_ullong2 a, vec_ullong2 b, vec_ullong2 pattern) +{ + return ((vec_ullong2)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_llong2 spu_sel(vec_llong2 a, vec_llong2 b, vec_ullong2 pattern) +{ + return ((vec_llong2)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_double2 spu_sel(vec_double2 a, vec_double2 b, vec_ullong2 pattern) +{ + return ((vec_double2)(si_selb((qword)(a), (qword)(b), (qword)(pattern)))); +} + + + +/* spu_shuffle + * =========== + */ +static __inline vec_uchar16 spu_shuffle(vec_uchar16 a, vec_uchar16 b, vec_uchar16 pattern) +{ + return ((vec_uchar16)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_char16 spu_shuffle(vec_char16 a, vec_char16 b, vec_uchar16 pattern) +{ + return ((vec_char16)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_ushort8 spu_shuffle(vec_ushort8 a, vec_ushort8 b, vec_uchar16 pattern) +{ + return ((vec_ushort8)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_short8 spu_shuffle(vec_short8 a, vec_short8 b, vec_uchar16 pattern) +{ + return ((vec_short8)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_uint4 spu_shuffle(vec_uint4 a, vec_uint4 b, vec_uchar16 pattern) +{ + return ((vec_uint4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_int4 spu_shuffle(vec_int4 a, vec_int4 b, vec_uchar16 pattern) +{ + return ((vec_int4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_float4 spu_shuffle(vec_float4 a, vec_float4 b, vec_uchar16 pattern) +{ + return ((vec_float4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_ullong2 spu_shuffle(vec_ullong2 a, vec_ullong2 b, vec_uchar16 pattern) +{ + return ((vec_ullong2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_llong2 spu_shuffle(vec_llong2 a, vec_llong2 b, vec_uchar16 pattern) +{ + return ((vec_llong2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + +static __inline vec_double2 spu_shuffle(vec_double2 a, vec_double2 b, vec_uchar16 pattern) +{ + return ((vec_double2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern)))); +} + + +/* spu_sl + * ====== + */ +static __inline vec_ushort8 spu_sl(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_shlh((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_sl(vec_short8 a, vec_ushort8 b) +{ + return ((vec_short8)(si_shlh((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_sl(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_shl((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_sl(vec_int4 a, vec_uint4 b) +{ + return ((vec_int4)(si_shl((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_sl(vec_ushort8 a, unsigned int b) +{ + return ((vec_ushort8)(si_shlhi((qword)(a), b))); +} + +static __inline vec_short8 spu_sl(vec_short8 a, unsigned int b) +{ + return ((vec_short8)(si_shlhi((qword)(a), b))); +} + +static __inline vec_uint4 spu_sl(vec_uint4 a, unsigned int b) +{ + return ((vec_uint4)(si_shli((qword)(a), b))); +} + +static __inline vec_int4 spu_sl(vec_int4 a, unsigned int b) +{ + return ((vec_int4)(si_shli((qword)(a), b))); +} + + +/* spu_slqw + * ======== + */ +static __inline vec_uchar16 spu_slqw(vec_uchar16 a, unsigned int count) +{ + return ((vec_uchar16)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_char16 spu_slqw(vec_char16 a, unsigned int count) +{ + return ((vec_char16)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_ushort8 spu_slqw(vec_ushort8 a, unsigned int count) +{ + return ((vec_ushort8)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_short8 spu_slqw(vec_short8 a, unsigned int count) +{ + return ((vec_short8)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_uint4 spu_slqw(vec_uint4 a, unsigned int count) +{ + return ((vec_uint4)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_int4 spu_slqw(vec_int4 a, unsigned int count) +{ + return ((vec_int4)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_float4 spu_slqw(vec_float4 a, unsigned int count) +{ + return ((vec_float4)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_ullong2 spu_slqw(vec_ullong2 a, unsigned int count) +{ + return ((vec_ullong2)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_llong2 spu_slqw(vec_llong2 a, unsigned int count) +{ + return ((vec_llong2)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_double2 spu_slqw(vec_double2 a, unsigned int count) +{ + return ((vec_double2)(si_shlqbi((qword)(a), si_from_uint(count)))); +} + +/* spu_slqwbyte + * ============ + */ +static __inline vec_uchar16 spu_slqwbyte(vec_uchar16 a, unsigned int count) +{ + return ((vec_uchar16)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_char16 spu_slqwbyte(vec_char16 a, unsigned int count) +{ + return ((vec_char16)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_ushort8 spu_slqwbyte(vec_ushort8 a, unsigned int count) +{ + return ((vec_ushort8)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_short8 spu_slqwbyte(vec_short8 a, unsigned int count) +{ + return ((vec_short8)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_uint4 spu_slqwbyte(vec_uint4 a, unsigned int count) +{ + return ((vec_uint4)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_int4 spu_slqwbyte(vec_int4 a, unsigned int count) +{ + return ((vec_int4)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_float4 spu_slqwbyte(vec_float4 a, unsigned int count) +{ + return ((vec_float4)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_ullong2 spu_slqwbyte(vec_ullong2 a, unsigned int count) +{ + return ((vec_ullong2)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_llong2 spu_slqwbyte(vec_llong2 a, unsigned int count) +{ + return ((vec_llong2)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +static __inline vec_double2 spu_slqwbyte(vec_double2 a, unsigned int count) +{ + return ((vec_double2)(si_shlqby((qword)(a), si_from_uint(count)))); +} + +/* spu_slqwbytebc + * ============== + */ +static __inline vec_uchar16 spu_slqwbytebc(vec_uchar16 a, unsigned int count) +{ + return ((vec_uchar16)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_char16 spu_slqwbytebc(vec_char16 a, unsigned int count) +{ + return ((vec_char16)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_ushort8 spu_slqwbytebc(vec_ushort8 a, unsigned int count) +{ + return ((vec_ushort8)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_short8 spu_slqwbytebc(vec_short8 a, unsigned int count) +{ + return ((vec_short8)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_uint4 spu_slqwbytebc(vec_uint4 a, unsigned int count) +{ + return ((vec_uint4)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_int4 spu_slqwbytebc(vec_int4 a, unsigned int count) +{ + return ((vec_int4)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_float4 spu_slqwbytebc(vec_float4 a, unsigned int count) +{ + return ((vec_float4)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_ullong2 spu_slqwbytebc(vec_ullong2 a, unsigned int count) +{ + return ((vec_ullong2)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_llong2 spu_slqwbytebc(vec_llong2 a, unsigned int count) +{ + return ((vec_llong2)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +static __inline vec_double2 spu_slqwbytebc(vec_double2 a, unsigned int count) +{ + return ((vec_double2)(si_shlqbybi((qword)(a), si_from_uint(count)))); +} + +/* spu_splats + * ========== + */ +static __inline vec_uchar16 spu_splats(unsigned char a) +{ + union { + vec_uchar16 v; + unsigned char c[16]; + } in; + + in.c[0] = a; + return (vec_splat(in.v, 0)); +} + +static __inline vec_char16 spu_splats(signed char a) +{ + return ((vec_char16)spu_splats((unsigned char)(a))); +} + +static __inline vec_ushort8 spu_splats(unsigned short a) +{ + union { + vec_ushort8 v; + unsigned short s[8]; + } in; + + in.s[0] = a; + return (vec_splat(in.v, 0)); +} + +static __inline vec_short8 spu_splats(signed short a) +{ + return ((vec_short8)spu_splats((unsigned short)(a))); +} + +static __inline vec_uint4 spu_splats(unsigned int a) +{ + union { + vec_uint4 v; + unsigned int i[4]; + } in; + + in.i[0] = a; + return (vec_splat(in.v, 0)); +} + +static __inline vec_int4 spu_splats(signed int a) +{ + return ((vec_int4)spu_splats((unsigned int)(a))); +} + +static __inline vec_float4 spu_splats(float a) +{ + union { + vec_float4 v; + float f[4]; + } in; + + in.f[0] = a; + return (vec_splat(in.v, 0)); +} + +static __inline vec_ullong2 spu_splats(unsigned long long a) +{ + union { + vec_ullong2 v; + unsigned long long l[2]; + } in; + + in.l[0] = a; + in.l[1] = a; + return (in.v); +} + +static __inline vec_llong2 spu_splats(signed long long a) +{ + return ((vec_llong2)spu_splats((unsigned long long)(a))); +} + +static __inline vec_double2 spu_splats(double a) +{ + union { + vec_double2 v; + double d[2]; + } in; + + in.d[0] = a; + in.d[1] = a; + return (in.v); +} + + +/* spu_stop + * ======== + */ +#define spu_stop(_type) si_stop(_type) + + +/* spu_sub + * ======= + */ +static __inline vec_ushort8 spu_sub(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_sfh((qword)(b), (qword)(a)))); +} + +static __inline vec_short8 spu_sub(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_sfh((qword)(b), (qword)(a)))); +} + +static __inline vec_uint4 spu_sub(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_sf((qword)(b), (qword)(a)))); +} + +static __inline vec_int4 spu_sub(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_sf((qword)(b), (qword)(a)))); +} + +static __inline vec_float4 spu_sub(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_fs((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_sub(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_dfs((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_sub(unsigned int a, vec_uint4 b) +{ + return ((vec_uint4)(si_sfi((qword)b, (int)a))); +} + +static __inline vec_int4 spu_sub(signed int a, vec_int4 b) +{ + return ((vec_int4)(si_sfi((qword)b, (int)a))); +} + +static __inline vec_ushort8 spu_sub(unsigned short a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_sfhi((qword)b, (short)a))); +} + +static __inline vec_short8 spu_sub(signed short a, vec_short8 b) +{ + return ((vec_short8)(si_sfhi((qword)b, (short)a))); +} + +/* spu_subx + * ======== + */ +static __inline vec_uint4 spu_subx(vec_uint4 a, vec_uint4 b, vec_uint4 c) +{ + return ((vec_uint4)(si_sfx((qword)(b), (qword)(a), (qword)(c)))); +} + +static __inline vec_int4 spu_subx(vec_int4 a, vec_int4 b, vec_int4 c) +{ + return ((vec_int4)(si_sfx((qword)(b), (qword)(a), (qword)(c)))); +} + +/* spu_sumb + * ======== + */ +static __inline vec_ushort8 spu_sumb(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_ushort8)(si_sumb((qword)(a), (qword)(b)))); +} + + +/* spu_sync + * spu_sync_c + * ======== + */ +#define spu_sync() /* do nothing */ + +#define spu_sync_c() /* do nothing */ + + +/* spu_writech + * =========== + */ +#define spu_writech(_channel, _a) /* not mappable */ + +/* spu_writechqw + * ============= + */ +#define spu_writechqw(_channel, _a) /* not mappable */ + + +/* spu_xor + * ======= + */ +static __inline vec_uchar16 spu_xor(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_char16 spu_xor(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_ushort8 spu_xor(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_short8 spu_xor(vec_short8 a, vec_short8 b) +{ + return ((vec_short8)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_uint4 spu_xor(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_int4 spu_xor(vec_int4 a, vec_int4 b) +{ + return ((vec_int4)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_float4 spu_xor(vec_float4 a, vec_float4 b) +{ + return ((vec_float4)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_ullong2 spu_xor(vec_ullong2 a, vec_ullong2 b) +{ + return ((vec_ullong2)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_llong2 spu_xor(vec_llong2 a, vec_llong2 b) +{ + return ((vec_llong2)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_double2 spu_xor(vec_double2 a, vec_double2 b) +{ + return ((vec_double2)(si_xor((qword)(a), (qword)(b)))); +} + +static __inline vec_uchar16 spu_xor(vec_uchar16 a, unsigned char b) +{ + return ((vec_uchar16)(si_xorbi((qword)(a), b))); +} + +static __inline vec_char16 spu_xor(vec_char16 a, signed char b) +{ + return ((vec_char16)(si_xorbi((qword)(a), (unsigned char)(b)))); +} + +static __inline vec_ushort8 spu_xor(vec_ushort8 a, unsigned short b) +{ + return ((vec_ushort8)(si_xorhi((qword)(a), b))); +} + +static __inline vec_short8 spu_xor(vec_short8 a, signed short b) +{ + return ((vec_short8)(si_xorhi((qword)(a), (unsigned short)(b)))); +} + +static __inline vec_uint4 spu_xor(vec_uint4 a, unsigned int b) +{ + return ((vec_uint4)(si_xori((qword)(a), b))); +} + +static __inline vec_int4 spu_xor(vec_int4 a, signed int b) +{ + return ((vec_int4)(si_xori((qword)(a), (unsigned int)(b)))); +} + +#endif /* !__SPU__ */ +#endif /* __cplusplus */ +#endif /* !_SPU2VMX_H_ */ diff --git a/gcc/config/powerpcspe/sync.md b/gcc/config/powerpcspe/sync.md new file mode 100644 index 000000000000..2a1828e3e7ae --- /dev/null +++ b/gcc/config/powerpcspe/sync.md @@ -0,0 +1,484 @@ +;; Machine description for PowerPC synchronization instructions. +;; Copyright (C) 2005-2017 Free Software Foundation, Inc. +;; Contributed by Geoffrey Keating. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_mode_attr larx [(QI "lbarx") + (HI "lharx") + (SI "lwarx") + (DI "ldarx") + (TI "lqarx")]) + +(define_mode_attr stcx [(QI "stbcx.") + (HI "sthcx.") + (SI "stwcx.") + (DI "stdcx.") + (TI "stqcx.")]) + +(define_code_iterator FETCHOP [plus minus ior xor and]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")]) +(define_code_attr fetchop_pred + [(plus "add_operand") (minus "int_reg_operand") + (ior "logical_operand") (xor "logical_operand") (and "and_operand")]) + +(define_expand "mem_thread_fence" + [(match_operand:SI 0 "const_int_operand" "")] ;; model + "" +{ + enum memmodel model = memmodel_base (INTVAL (operands[0])); + switch (model) + { + case MEMMODEL_RELAXED: + break; + case MEMMODEL_CONSUME: + case MEMMODEL_ACQUIRE: + case MEMMODEL_RELEASE: + case MEMMODEL_ACQ_REL: + emit_insn (gen_lwsync ()); + break; + case MEMMODEL_SEQ_CST: + emit_insn (gen_hwsync ()); + break; + default: + gcc_unreachable (); + } + DONE; +}) + +(define_expand "hwsync" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_SYNC))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*hwsync" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_SYNC))] + "" + "sync" + [(set_attr "type" "sync")]) + +(define_expand "lwsync" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_LWSYNC))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*lwsync" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_LWSYNC))] + "" +{ + /* Some AIX assemblers don't accept lwsync, so we use a .long. */ + if (TARGET_NO_LWSYNC) + return "sync"; + else if (TARGET_LWSYNC_INSTRUCTION) + return "lwsync"; + else + return ".long 0x7c2004ac"; +} + [(set_attr "type" "sync")]) + +(define_insn "isync" + [(unspec_volatile:BLK [(const_int 0)] UNSPECV_ISYNC)] + "" + "isync" + [(set_attr "type" "isync")]) + +;; Types that we should provide atomic instructions for. +(define_mode_iterator AINT [QI + HI + SI + (DI "TARGET_POWERPC64") + (TI "TARGET_SYNC_TI")]) + +;; The control dependency used for load dependency described +;; in B.2.3 of the Power ISA 2.06B. +(define_insn "loadsync_<mode>" + [(unspec_volatile:BLK [(match_operand:AINT 0 "register_operand" "r")] + UNSPECV_ISYNC) + (clobber (match_scratch:CC 1 "=y"))] + "" + "cmpw %1,%0,%0\;bne- %1,$+4\;isync" + [(set_attr "type" "isync") + (set_attr "length" "12")]) + +(define_insn "load_quadpti" + [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r") + (unspec:PTI + [(match_operand:TI 1 "quad_memory_operand" "wQ")] UNSPEC_LSQ))] + "TARGET_SYNC_TI + && !reg_mentioned_p (operands[0], operands[1])" + "lq %0,%1" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +(define_expand "atomic_load<mode>" + [(set (match_operand:AINT 0 "register_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "")) ;; memory + (use (match_operand:SI 2 "const_int_operand" ""))] ;; model + "" +{ + if (<MODE>mode == TImode && !TARGET_SYNC_TI) + FAIL; + + enum memmodel model = memmodel_base (INTVAL (operands[2])); + + if (is_mm_seq_cst (model)) + emit_insn (gen_hwsync ()); + + if (<MODE>mode != TImode) + emit_move_insn (operands[0], operands[1]); + else + { + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx pti_reg = gen_reg_rtx (PTImode); + + if (!quad_address_p (XEXP (op1, 0), TImode, false)) + { + rtx old_addr = XEXP (op1, 0); + rtx new_addr = force_reg (Pmode, old_addr); + operands[1] = op1 = replace_equiv_address (op1, new_addr); + } + + emit_insn (gen_load_quadpti (pti_reg, op1)); + + if (WORDS_BIG_ENDIAN) + emit_move_insn (op0, gen_lowpart (TImode, pti_reg)); + else + { + emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti_reg)); + emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti_reg)); + } + } + + switch (model) + { + case MEMMODEL_RELAXED: + break; + case MEMMODEL_CONSUME: + case MEMMODEL_ACQUIRE: + case MEMMODEL_SEQ_CST: + emit_insn (gen_loadsync_<mode> (operands[0])); + break; + default: + gcc_unreachable (); + } + DONE; +}) + +(define_insn "store_quadpti" + [(set (match_operand:PTI 0 "quad_memory_operand" "=wQ") + (unspec:PTI + [(match_operand:PTI 1 "quad_int_reg_operand" "r")] UNSPEC_LSQ))] + "TARGET_SYNC_TI" + "stq %1,%0" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_expand "atomic_store<mode>" + [(set (match_operand:AINT 0 "memory_operand" "") ;; memory + (match_operand:AINT 1 "register_operand" "")) ;; input + (use (match_operand:SI 2 "const_int_operand" ""))] ;; model + "" +{ + if (<MODE>mode == TImode && !TARGET_SYNC_TI) + FAIL; + + enum memmodel model = memmodel_base (INTVAL (operands[2])); + switch (model) + { + case MEMMODEL_RELAXED: + break; + case MEMMODEL_RELEASE: + emit_insn (gen_lwsync ()); + break; + case MEMMODEL_SEQ_CST: + emit_insn (gen_hwsync ()); + break; + default: + gcc_unreachable (); + } + if (<MODE>mode != TImode) + emit_move_insn (operands[0], operands[1]); + else + { + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx pti_reg = gen_reg_rtx (PTImode); + + if (!quad_address_p (XEXP (op0, 0), TImode, false)) + { + rtx old_addr = XEXP (op0, 0); + rtx new_addr = force_reg (Pmode, old_addr); + operands[0] = op0 = replace_equiv_address (op0, new_addr); + } + + if (WORDS_BIG_ENDIAN) + emit_move_insn (pti_reg, gen_lowpart (PTImode, op1)); + else + { + emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op1)); + emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op1)); + } + + emit_insn (gen_store_quadpti (gen_lowpart (PTImode, op0), pti_reg)); + } + + DONE; +}) + +;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons +;; other than the quad memory operations, which have special restrictions. +;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased +;; in and did not show up until power8. TImode atomic lqarx/stqcx. require +;; special handling due to even/odd register requirements. +(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI") + (HI "TARGET_SYNC_HI_QI") + SI + (DI "TARGET_POWERPC64")]) + +(define_insn "load_locked<mode>" + [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r") + (unspec_volatile:ATOMIC + [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))] + "" + "<larx> %0,%y1" + [(set_attr "type" "load_l")]) + +(define_insn "load_locked<QHI:mode>_si" + [(set (match_operand:SI 0 "int_reg_operand" "=r") + (unspec_volatile:SI + [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_HI_QI" + "<QHI:larx> %0,%y1" + [(set_attr "type" "load_l")]) + +;; Use PTImode to get even/odd register pairs. +;; Use a temporary register to force getting an even register for the +;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra +;; copy on big endian systems. + +;; On little endian systems where non-atomic quad word load/store instructions +;; are not used, the address can be register+offset, so make sure the address +;; is indexed or indirect before register allocation. + +(define_expand "load_lockedti" + [(use (match_operand:TI 0 "quad_int_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" ""))] + "TARGET_SYNC_TI" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx pti = gen_reg_rtx (PTImode); + + if (!indexed_or_indirect_operand (op1, TImode)) + { + rtx old_addr = XEXP (op1, 0); + rtx new_addr = force_reg (Pmode, old_addr); + operands[1] = op1 = change_address (op1, TImode, new_addr); + } + + emit_insn (gen_load_lockedpti (pti, op1)); + if (WORDS_BIG_ENDIAN) + emit_move_insn (op0, gen_lowpart (TImode, pti)); + else + { + emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti)); + emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti)); + } + DONE; +}) + +(define_insn "load_lockedpti" + [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r") + (unspec_volatile:PTI + [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_TI + && !reg_mentioned_p (operands[0], operands[1]) + && quad_int_reg_operand (operands[0], PTImode)" + "lqarx %0,%y1" + [(set_attr "type" "load_l")]) + +(define_insn "store_conditional<mode>" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) + (set (match_operand:ATOMIC 1 "memory_operand" "=Z") + (match_operand:ATOMIC 2 "int_reg_operand" "r"))] + "" + "<stcx> %2,%y1" + [(set_attr "type" "store_c")]) + +;; Use a temporary register to force getting an even register for the +;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra +;; copy on big endian systems. + +;; On little endian systems where non-atomic quad word load/store instructions +;; are not used, the address can be register+offset, so make sure the address +;; is indexed or indirect before register allocation. + +(define_expand "store_conditionalti" + [(use (match_operand:CC 0 "cc_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" "")) + (use (match_operand:TI 2 "quad_int_reg_operand" ""))] + "TARGET_SYNC_TI" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx addr = XEXP (op1, 0); + rtx pti_mem; + rtx pti_reg; + + if (!indexed_or_indirect_operand (op1, TImode)) + { + rtx new_addr = force_reg (Pmode, addr); + operands[1] = op1 = change_address (op1, TImode, new_addr); + addr = new_addr; + } + + pti_mem = change_address (op1, PTImode, addr); + pti_reg = gen_reg_rtx (PTImode); + + if (WORDS_BIG_ENDIAN) + emit_move_insn (pti_reg, gen_lowpart (PTImode, op2)); + else + { + emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op2)); + emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op2)); + } + + emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg)); + DONE; +}) + +(define_insn "store_conditionalpti" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) + (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z") + (match_operand:PTI 2 "quad_int_reg_operand" "r"))] + "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)" + "stqcx. %2,%y1" + [(set_attr "type" "store_c")]) + +(define_expand "atomic_compare_and_swap<mode>" + [(match_operand:SI 0 "int_reg_operand" "") ;; bool out + (match_operand:AINT 1 "int_reg_operand" "") ;; val out + (match_operand:AINT 2 "memory_operand" "") ;; memory + (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected + (match_operand:AINT 4 "int_reg_operand" "") ;; desired + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; model succ + (match_operand:SI 7 "const_int_operand" "")] ;; model fail + "" +{ + rs6000_expand_atomic_compare_and_swap (operands); + DONE; +}) + +(define_expand "atomic_exchange<mode>" + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; input + (match_operand:SI 3 "const_int_operand" "")] ;; model + "" +{ + rs6000_expand_atomic_exchange (operands); + DONE; +}) + +(define_expand "atomic_<fetchop_name><mode>" + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 0) + (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand + (match_operand:SI 2 "const_int_operand" "")] ;; model + "" +{ + rs6000_expand_atomic_op (<CODE>, operands[0], operands[1], + NULL_RTX, NULL_RTX, operands[2]); + DONE; +}) + +(define_expand "atomic_nand<mode>" + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (match_operand:AINT 1 "int_reg_operand" "") ;; operand + (match_operand:SI 2 "const_int_operand" "")] ;; model + "" +{ + rs6000_expand_atomic_op (NOT, operands[0], operands[1], + NULL_RTX, NULL_RTX, operands[2]); + DONE; +}) + +(define_expand "atomic_fetch_<fetchop_name><mode>" + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand + (match_operand:SI 3 "const_int_operand" "")] ;; model + "" +{ + rs6000_expand_atomic_op (<CODE>, operands[1], operands[2], + operands[0], NULL_RTX, operands[3]); + DONE; +}) + +(define_expand "atomic_fetch_nand<mode>" + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand + (match_operand:SI 3 "const_int_operand" "")] ;; model + "" +{ + rs6000_expand_atomic_op (NOT, operands[1], operands[2], + operands[0], NULL_RTX, operands[3]); + DONE; +}) + +(define_expand "atomic_<fetchop_name>_fetch<mode>" + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand + (match_operand:SI 3 "const_int_operand" "")] ;; model + "" +{ + rs6000_expand_atomic_op (<CODE>, operands[1], operands[2], + NULL_RTX, operands[0], operands[3]); + DONE; +}) + +(define_expand "atomic_nand_fetch<mode>" + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand + (match_operand:SI 3 "const_int_operand" "")] ;; model + "" +{ + rs6000_expand_atomic_op (NOT, operands[1], operands[2], + NULL_RTX, operands[0], operands[3]); + DONE; +}) diff --git a/gcc/config/powerpcspe/sysv4.h b/gcc/config/powerpcspe/sysv4.h new file mode 100644 index 000000000000..de386291a510 --- /dev/null +++ b/gcc/config/powerpcspe/sysv4.h @@ -0,0 +1,1048 @@ +/* Target definitions for GNU compiler for PowerPC running System V.4 + Copyright (C) 1995-2017 Free Software Foundation, Inc. + Contributed by Cygnus Support. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Header files should be C++ aware in general. */ +#undef NO_IMPLICIT_EXTERN_C +#define NO_IMPLICIT_EXTERN_C + +/* Yes! We are ELF. */ +#define TARGET_OBJECT_FORMAT OBJECT_ELF + +/* Default ABI to compile code for. */ +#define DEFAULT_ABI rs6000_current_abi + +/* Default ABI to use. */ +#define RS6000_ABI_NAME "sysv" + +/* Override rs6000.h definition. */ +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc" + +#define TARGET_TOC (TARGET_64BIT \ + || (TARGET_MINIMAL_TOC \ + && flag_pic > 1) \ + || DEFAULT_ABI != ABI_V4) + +#define TARGET_BITFIELD_TYPE (! TARGET_NO_BITFIELD_TYPE) +#define TARGET_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN) +#define TARGET_PROTOTYPE target_prototype +#define TARGET_NO_PROTOTYPE (! TARGET_PROTOTYPE) +#define TARGET_NO_TOC (! TARGET_TOC) +#define TARGET_NO_EABI (! TARGET_EABI) +#define TARGET_REGNAMES rs6000_regnames + +#ifdef HAVE_AS_REL16 +#undef TARGET_SECURE_PLT +#define TARGET_SECURE_PLT secure_plt +#endif + +#define SDATA_DEFAULT_SIZE 8 + +/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to + get control in TARGET_OPTION_OVERRIDE. */ + +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (!global_options_set.x_g_switch_value) \ + g_switch_value = SDATA_DEFAULT_SIZE; \ + \ + if (rs6000_abi_name == NULL) \ + rs6000_abi_name = RS6000_ABI_NAME; \ + \ + if (!strcmp (rs6000_abi_name, "sysv")) \ + rs6000_current_abi = ABI_V4; \ + else if (!strcmp (rs6000_abi_name, "sysv-noeabi")) \ + { \ + rs6000_current_abi = ABI_V4; \ + rs6000_isa_flags &= ~ OPTION_MASK_EABI; \ + } \ + else if (!strcmp (rs6000_abi_name, "sysv-eabi") \ + || !strcmp (rs6000_abi_name, "eabi")) \ + { \ + rs6000_current_abi = ABI_V4; \ + rs6000_isa_flags |= OPTION_MASK_EABI; \ + } \ + else if (!strcmp (rs6000_abi_name, "aixdesc")) \ + rs6000_current_abi = ABI_AIX; \ + else if (!strcmp (rs6000_abi_name, "freebsd") \ + || !strcmp (rs6000_abi_name, "linux")) \ + { \ + if (TARGET_64BIT) \ + rs6000_current_abi = ABI_AIX; \ + else \ + rs6000_current_abi = ABI_V4; \ + } \ + else if (!strcmp (rs6000_abi_name, "netbsd")) \ + rs6000_current_abi = ABI_V4; \ + else if (!strcmp (rs6000_abi_name, "openbsd")) \ + rs6000_current_abi = ABI_V4; \ + else if (!strcmp (rs6000_abi_name, "i960-old")) \ + { \ + rs6000_current_abi = ABI_V4; \ + rs6000_isa_flags |= (OPTION_MASK_LITTLE_ENDIAN | OPTION_MASK_EABI); \ + rs6000_isa_flags &= ~OPTION_MASK_STRICT_ALIGN; \ + TARGET_NO_BITFIELD_WORD = 1; \ + } \ + else \ + { \ + rs6000_current_abi = ABI_V4; \ + error ("bad value for -mcall-%s", rs6000_abi_name); \ + } \ + \ + if (rs6000_sdata_name) \ + { \ + if (!strcmp (rs6000_sdata_name, "none")) \ + rs6000_sdata = SDATA_NONE; \ + else if (!strcmp (rs6000_sdata_name, "data")) \ + rs6000_sdata = SDATA_DATA; \ + else if (!strcmp (rs6000_sdata_name, "default")) \ + rs6000_sdata = (TARGET_EABI) ? SDATA_EABI : SDATA_SYSV; \ + else if (!strcmp (rs6000_sdata_name, "sysv")) \ + rs6000_sdata = SDATA_SYSV; \ + else if (!strcmp (rs6000_sdata_name, "eabi")) \ + rs6000_sdata = SDATA_EABI; \ + else \ + error ("bad value for -msdata=%s", rs6000_sdata_name); \ + } \ + else if (DEFAULT_ABI == ABI_V4) \ + { \ + rs6000_sdata = SDATA_DATA; \ + rs6000_sdata_name = "data"; \ + } \ + else \ + { \ + rs6000_sdata = SDATA_NONE; \ + rs6000_sdata_name = "none"; \ + } \ + \ + if (TARGET_RELOCATABLE && \ + (rs6000_sdata == SDATA_EABI || rs6000_sdata == SDATA_SYSV)) \ + { \ + rs6000_sdata = SDATA_DATA; \ + error ("-mrelocatable and -msdata=%s are incompatible", \ + rs6000_sdata_name); \ + } \ + \ + else if (flag_pic && DEFAULT_ABI == ABI_V4 \ + && (rs6000_sdata == SDATA_EABI \ + || rs6000_sdata == SDATA_SYSV)) \ + { \ + rs6000_sdata = SDATA_DATA; \ + error ("-f%s and -msdata=%s are incompatible", \ + (flag_pic > 1) ? "PIC" : "pic", \ + rs6000_sdata_name); \ + } \ + \ + if ((rs6000_sdata != SDATA_NONE && DEFAULT_ABI != ABI_V4) \ + || (rs6000_sdata == SDATA_EABI && !TARGET_EABI)) \ + { \ + rs6000_sdata = SDATA_NONE; \ + error ("-msdata=%s and -mcall-%s are incompatible", \ + rs6000_sdata_name, rs6000_abi_name); \ + } \ + \ + targetm.have_srodata_section = rs6000_sdata == SDATA_EABI; \ + \ + if (TARGET_RELOCATABLE && !TARGET_MINIMAL_TOC) \ + { \ + rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; \ + error ("-mrelocatable and -mno-minimal-toc are incompatible"); \ + } \ + \ + if (TARGET_RELOCATABLE && rs6000_current_abi != ABI_V4) \ + { \ + rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \ + error ("-mrelocatable and -mcall-%s are incompatible", \ + rs6000_abi_name); \ + } \ + \ + if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi != ABI_V4) \ + { \ + flag_pic = 0; \ + error ("-fPIC and -mcall-%s are incompatible", \ + rs6000_abi_name); \ + } \ + \ + if (TARGET_SECURE_PLT != secure_plt) \ + { \ + error ("-msecure-plt not supported by your assembler"); \ + } \ + \ + if (flag_pic > 1 && DEFAULT_ABI == ABI_V4) \ + { \ + /* Note: flag_pic should not change any option flags that would \ + be invalid with or pessimise -fno-PIC code. LTO turns off \ + flag_pic when linking/recompiling a fixed position executable. \ + However, if the objects were originally compiled with -fPIC, \ + then other target options forced on here by -fPIC are restored \ + when recompiling those objects without -fPIC. In particular \ + TARGET_RELOCATABLE must not be enabled here by flag_pic. */ \ + rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC; \ + TARGET_NO_FP_IN_TOC = 1; \ + } \ + \ + if (TARGET_RELOCATABLE) \ + { \ + if (!flag_pic) \ + flag_pic = 2; \ + TARGET_NO_FP_IN_TOC = 1; \ + } \ +} while (0) + +#ifndef RS6000_BI_ARCH +# define SUBSUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if ((TARGET_DEFAULT ^ rs6000_isa_flags) & OPTION_MASK_64BIT) \ + error ("-m%s not supported in this configuration", \ + (rs6000_isa_flags & OPTION_MASK_64BIT) ? "64" : "32"); \ +} while (0) +#endif + +/* Override rs6000.h definition. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT 0 + +/* Override rs6000.h definition. */ +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_PPC750 + +#define FIXED_R2 1 +/* System V.4 uses register 13 as a pointer to the small data area, + so it is not available to the normal user. */ +#define FIXED_R13 1 + +/* Override default big endianism definitions in rs6000.h. */ +#undef BYTES_BIG_ENDIAN +#undef WORDS_BIG_ENDIAN +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN) +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN) + +/* Put jump tables in read-only memory, rather than in .text. */ +#define JUMP_TABLES_IN_TEXT_SECTION 0 + +/* Prefix and suffix to use to saving floating point. */ +#define SAVE_FP_PREFIX "_savefpr_" +#define SAVE_FP_SUFFIX "" + +/* Prefix and suffix to use to restoring floating point. */ +#define RESTORE_FP_PREFIX "_restfpr_" +#define RESTORE_FP_SUFFIX "" + +/* Type used for size_t, as a string used in a declaration. */ +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +/* Type used for ptrdiff_t, as a string used in a declaration. */ +#define PTRDIFF_TYPE "int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* Make int foo : 8 not cause structures to be aligned to an int boundary. */ +/* Override elfos.h definition. */ +#undef PCC_BITFIELD_TYPE_MATTERS +#define PCC_BITFIELD_TYPE_MATTERS (TARGET_BITFIELD_TYPE) + +#undef BITFIELD_NBYTES_LIMITED +#define BITFIELD_NBYTES_LIMITED (TARGET_NO_BITFIELD_WORD) + +/* Define this macro to be the value 1 if instructions will fail to + work if given data not on the nominal alignment. If instructions + will merely go slower in that case, define this macro as 0. */ +#undef STRICT_ALIGNMENT +#define STRICT_ALIGNMENT (TARGET_STRICT_ALIGN) + +/* Define this macro if you wish to preserve a certain alignment for + the stack pointer, greater than what the hardware enforces. The + definition is a C expression for the desired alignment (measured + in bits). This macro must evaluate to a value equal to or larger + than STACK_BOUNDARY. + For the SYSV ABI and variants the alignment of the stack pointer + is usually controlled manually in rs6000.c. However, to maintain + alignment across alloca () in all circumstances, + PREFERRED_STACK_BOUNDARY needs to be set as well. + This has the additional advantage of allowing a bigger maximum + alignment of user objects on the stack. */ + +#undef PREFERRED_STACK_BOUNDARY +#define PREFERRED_STACK_BOUNDARY 128 + +/* Real stack boundary as mandated by the appropriate ABI. */ +#define ABI_STACK_BOUNDARY \ + ((TARGET_EABI && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI) ? 64 : 128) + +/* An expression for the alignment of a structure field FIELD if the + alignment computed in the usual way is COMPUTED. */ +#define ADJUST_FIELD_ALIGN(FIELD, TYPE, COMPUTED) \ + (rs6000_special_adjust_field_align_p ((TYPE), (COMPUTED)) \ + ? 128 : COMPUTED) + +#undef BIGGEST_FIELD_ALIGNMENT + +/* Use ELF style section commands. */ + +#define TEXT_SECTION_ASM_OP "\t.section\t\".text\"" + +#define DATA_SECTION_ASM_OP "\t.section\t\".data\"" + +#define BSS_SECTION_ASM_OP "\t.section\t\".bss\"" + +/* Override elfos.h definition. */ +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP "\t.section\t\".init\",\"ax\"" + +/* Override elfos.h definition. */ +#undef FINI_SECTION_ASM_OP +#define FINI_SECTION_ASM_OP "\t.section\t\".fini\",\"ax\"" + +#define TOC_SECTION_ASM_OP "\t.section\t\".got\",\"aw\"" + +/* Put PC relative got entries in .got2. */ +#define MINIMAL_TOC_SECTION_ASM_OP \ + (flag_pic ? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"") + +#define SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\"" +#define SDATA2_SECTION_ASM_OP "\t.section\t\".sdata2\",\"a\"" +#define SBSS_SECTION_ASM_OP "\t.section\t\".sbss\",\"aw\",@nobits" + +/* Override default elf definitions. */ +#define TARGET_ASM_INIT_SECTIONS rs6000_elf_asm_init_sections +#undef TARGET_ASM_RELOC_RW_MASK +#define TARGET_ASM_RELOC_RW_MASK rs6000_elf_reloc_rw_mask +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION rs6000_elf_select_rtx_section + +/* Return nonzero if this entry is to be written into the constant pool + in a special way. We do so if this is a SYMBOL_REF, LABEL_REF or a CONST + containing one of them. If -mfp-in-toc (the default), we also do + this for floating-point constants. We actually can only do this + if the FP formats of the target and host machines are the same, but + we can't check that since not every file that uses these target macros + includes real.h. + + Unlike AIX, we don't key off of -mminimal-toc, but instead do not + allow floating point constants in the TOC if -mrelocatable. */ + +#undef ASM_OUTPUT_SPECIAL_POOL_ENTRY_P +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) \ + (TARGET_TOC \ + && (GET_CODE (X) == SYMBOL_REF \ + || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF) \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST_INT \ + && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode)) \ + || (!TARGET_NO_FP_IN_TOC \ + && GET_CODE (X) == CONST_DOUBLE \ + && SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && BITS_PER_WORD == HOST_BITS_PER_INT))) + +/* These macros generate the special .type and .size directives which + are used to set the corresponding fields of the linker symbol table + entries in an ELF object file under SVR4. These macros also output + the starting labels for the relevant functions/objects. */ + +/* Write the extra assembler code needed to declare a function properly. + Some svr4 assemblers need to also have something extra said about the + function's return value. We allow for that here. */ + +/* Override elfos.h definition. */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + rs6000_elf_declare_function_name ((FILE), (NAME), (DECL)) + +/* The USER_LABEL_PREFIX stuff is affected by the -fleading-underscore + flag. The LOCAL_LABEL_PREFIX variable is used by dbxelf.h. */ + +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" + +#define ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX) \ + asm_fprintf (FILE, "%L%s", PREFIX) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " + +/* This says how to output assembler code to declare an + uninitialized internal linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#define LOCAL_ASM_OP "\t.local\t" + +#define LCOMM_ASM_OP "\t.lcomm\t" + +/* Describe how to emit uninitialized local items. */ +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ +do { \ + if ((DECL) && rs6000_elf_in_small_data_p (DECL)) \ + { \ + switch_to_section (sbss_section); \ + ASM_OUTPUT_ALIGN (FILE, exact_log2 (ALIGN / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + ASM_OUTPUT_SKIP (FILE, SIZE); \ + if (!flag_inhibit_size_directive && (SIZE) > 0) \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE); \ + } \ + else \ + { \ + fprintf (FILE, "%s", LCOMM_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \ + (SIZE), (ALIGN) / BITS_PER_UNIT); \ + } \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ +} while (0) + +/* Describe how to emit uninitialized external linkage items. */ +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ +do { \ + ASM_OUTPUT_ALIGNED_DECL_LOCAL (FILE, DECL, NAME, SIZE, ALIGN); \ +} while (0) + +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN +/* To support -falign-* switches we need to use .p2align so + that alignment directives in code sections will be padded + with no-op instructions, rather than zeroes. */ +#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ + if ((LOG) != 0) \ + { \ + if ((MAX_SKIP) == 0) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ + fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ + } +#endif + +/* This is how to output code to push a register on the stack. + It need not be very fast code. + + On the rs6000, we must keep the backchain up to date. In order + to simplify things, always allocate 16 bytes for a push (System V + wants to keep stack aligned to a 16 byte boundary). */ + +#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \ +do { \ + if (DEFAULT_ABI == ABI_V4) \ + asm_fprintf (FILE, \ + "\tstwu %s,-16(%s)\n\tstw %s,12(%s)\n", \ + reg_names[1], reg_names[1], reg_names[REGNO], \ + reg_names[1]); \ +} while (0) + +/* This is how to output an insn to pop a register from the stack. + It need not be very fast code. */ + +#define ASM_OUTPUT_REG_POP(FILE, REGNO) \ +do { \ + if (DEFAULT_ABI == ABI_V4) \ + asm_fprintf (FILE, \ + "\tlwz %s,12(%s)\n\taddi %s,%s,16\n", \ + reg_names[REGNO], reg_names[1], reg_names[1], \ + reg_names[1]); \ +} while (0) + +extern int fixuplabelno; + +/* Handle constructors specially for -mrelocatable. */ +#define TARGET_ASM_CONSTRUCTOR rs6000_elf_asm_out_constructor +#define TARGET_ASM_DESTRUCTOR rs6000_elf_asm_out_destructor + +/* This is the end of what might become sysv4.h. */ + +/* Use DWARF 2 debugging information by default. */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* Historically we have also supported stabs debugging. */ +#define DBX_DEBUGGING_INFO 1 + +#define TARGET_ENCODE_SECTION_INFO rs6000_elf_encode_section_info +#define TARGET_IN_SMALL_DATA_P rs6000_elf_in_small_data_p + +/* The ELF version doesn't encode [DS] or whatever at the end of symbols. */ + +#define RS6000_OUTPUT_BASENAME(FILE, NAME) \ + assemble_name (FILE, NAME) + +/* We have to output the stabs for the function name *first*, before + outputting its label. */ + +#define DBX_FUNCTION_FIRST + +/* This is the end of what might become sysv4dbx.h. */ + +#define TARGET_OS_SYSV_CPP_BUILTINS() \ + do \ + { \ + if (rs6000_isa_flags_explicit \ + & OPTION_MASK_RELOCATABLE) \ + builtin_define ("_RELOCATABLE"); \ + } \ + while (0) + +#ifndef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("PPC"); \ + builtin_define_std ("unix"); \ + builtin_define ("__svr4__"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=svr4"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + while (0) +#endif + +/* Select one of BIG_OPT, LITTLE_OPT or DEFAULT_OPT depending + on various -mbig, -mlittle and -mcall- options. */ +#define ENDIAN_SELECT(BIG_OPT, LITTLE_OPT, DEFAULT_OPT) \ +"%{mlittle|mlittle-endian:" LITTLE_OPT ";" \ + "mbig|mbig-endian:" BIG_OPT ";" \ + "mcall-i960-old:" LITTLE_OPT ";" \ + ":" DEFAULT_OPT "}" + +#define DEFAULT_ASM_ENDIAN " -mbig" + +#undef ASM_SPEC +#define ASM_SPEC "%(asm_cpu) \ +%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \ +%{mrelocatable} %{mrelocatable-lib} %{" FPIE_OR_FPIC_SPEC ":-K PIC} \ +%{memb|msdata=eabi: -memb}" \ +ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) + +#ifndef CC1_SECURE_PLT_DEFAULT_SPEC +#define CC1_SECURE_PLT_DEFAULT_SPEC "" +#endif +#ifndef LINK_SECURE_PLT_DEFAULT_SPEC +#define LINK_SECURE_PLT_DEFAULT_SPEC "" +#endif + +/* Pass -G xxx to the compiler. */ +#undef CC1_SPEC +#define CC1_SPEC "%{G*} %(cc1_cpu)" \ +"%{meabi: %{!mcall-*: -mcall-sysv }} \ +%{!meabi: %{!mno-eabi: \ + %{mrelocatable: -meabi } \ + %{mcall-freebsd: -mno-eabi } \ + %{mcall-i960-old: -meabi } \ + %{mcall-linux: -mno-eabi } \ + %{mcall-netbsd: -mno-eabi } \ + %{mcall-openbsd: -mno-eabi }}} \ +%{msdata: -msdata=default} \ +%{mno-sdata: -msdata=none} \ +%{!mbss-plt: %{!msecure-plt: %(cc1_secure_plt_default)}} \ +%{profile: -p}" + +/* Default starting address if specified. */ +#define LINK_START_SPEC "\ +%{mads : %(link_start_ads) ; \ + myellowknife : %(link_start_yellowknife) ; \ + mmvme : %(link_start_mvme) ; \ + msim : %(link_start_sim) ; \ + mcall-freebsd: %(link_start_freebsd) ; \ + mcall-linux : %(link_start_linux) ; \ + mcall-netbsd : %(link_start_netbsd) ; \ + mcall-openbsd: %(link_start_openbsd) ; \ + : %(link_start_default) }" + +#define LINK_START_DEFAULT_SPEC "" +#define LINK_SECURE_PLT_SPEC LINK_SECURE_PLT_DEFAULT_SPEC + +#undef LINK_SPEC +#define LINK_SPEC "\ +%{h*} %{v:-V} %{!msdata=none:%{G*}} %{msdata=none:-G0} \ +%{R*} \ +%(link_shlib) \ +%{!T*: %(link_start) } \ +%{!static: %{!mbss-plt: %(link_secure_plt)}} \ +%(link_os)" + +/* Shared libraries are not default. */ +#define LINK_SHLIB_SPEC "\ +%{!mshlib: %{!shared: %{!symbolic: -dn -Bstatic}}} \ +%{static: } \ +%{shared:-G -dy -z text } \ +%{symbolic:-Bsymbolic -G -dy -z text }" + +/* Any specific OS flags. */ +#define LINK_OS_SPEC "\ +%{mads : %(link_os_ads) ; \ + myellowknife : %(link_os_yellowknife) ; \ + mmvme : %(link_os_mvme) ; \ + msim : %(link_os_sim) ; \ + mcall-freebsd: %(link_os_freebsd) ; \ + mcall-linux : %(link_os_linux) ; \ + mcall-netbsd : %(link_os_netbsd) ; \ + mcall-openbsd: %(link_os_openbsd) ; \ + : %(link_os_default) }" + +#define LINK_OS_DEFAULT_SPEC "" + +#define DRIVER_SELF_SPECS "%{mfpu=none: %<mfpu=* \ + %<msingle-float %<mdouble-float}" + +/* Override rs6000.h definition. */ +#undef CPP_SPEC +#define CPP_SPEC "%{posix: -D_POSIX_SOURCE} \ +%{mads : %(cpp_os_ads) ; \ + myellowknife : %(cpp_os_yellowknife) ; \ + mmvme : %(cpp_os_mvme) ; \ + msim : %(cpp_os_sim) ; \ + mcall-freebsd: %(cpp_os_freebsd) ; \ + mcall-linux : %(cpp_os_linux) ; \ + mcall-netbsd : %(cpp_os_netbsd) ; \ + mcall-openbsd: %(cpp_os_openbsd) ; \ + : %(cpp_os_default) }" + +#define CPP_OS_DEFAULT_SPEC "" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "\ +%{mads : %(startfile_ads) ; \ + myellowknife : %(startfile_yellowknife) ; \ + mmvme : %(startfile_mvme) ; \ + msim : %(startfile_sim) ; \ + mcall-freebsd: %(startfile_freebsd) ; \ + mcall-linux : %(startfile_linux) ; \ + mcall-netbsd : %(startfile_netbsd) ; \ + mcall-openbsd: %(startfile_openbsd) ; \ + : %(startfile_default) }" + +#define STARTFILE_DEFAULT_SPEC "ecrti.o%s crtbegin.o%s" + +#undef LIB_SPEC +#define LIB_SPEC "\ +%{mads : %(lib_ads) ; \ + myellowknife : %(lib_yellowknife) ; \ + mmvme : %(lib_mvme) ; \ + msim : %(lib_sim) ; \ + mcall-freebsd: %(lib_freebsd) ; \ + mcall-linux : %(lib_linux) ; \ + mcall-netbsd : %(lib_netbsd) ; \ + mcall-openbsd: %(lib_openbsd) ; \ + : %(lib_default) }" + +#define LIB_DEFAULT_SPEC "-lc" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "\ +%{mads : %(endfile_ads) ; \ + myellowknife : %(endfile_yellowknife) ; \ + mmvme : %(endfile_mvme) ; \ + msim : %(endfile_sim) ; \ + mcall-freebsd: %(endfile_freebsd) ; \ + mcall-linux : %(endfile_linux) ; \ + mcall-netbsd : %(endfile_netbsd) ; \ + mcall-openbsd: %(endfile_openbsd) ; \ + : %(crtsavres_default) %(endfile_default) }" + +#define CRTSAVRES_DEFAULT_SPEC "" + +#define ENDFILE_DEFAULT_SPEC "crtend.o%s ecrtn.o%s" + +/* Motorola ADS support. */ +#define LIB_ADS_SPEC "--start-group -lads -lc --end-group" + +#define STARTFILE_ADS_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s" + +#define ENDFILE_ADS_SPEC "crtend.o%s ecrtn.o%s" + +#define LINK_START_ADS_SPEC "-T ads.ld%s" + +#define LINK_OS_ADS_SPEC "" + +#define CPP_OS_ADS_SPEC "" + +/* Motorola Yellowknife support. */ +#define LIB_YELLOWKNIFE_SPEC "--start-group -lyk -lc --end-group" + +#define STARTFILE_YELLOWKNIFE_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s" + +#define ENDFILE_YELLOWKNIFE_SPEC "crtend.o%s ecrtn.o%s" + +#define LINK_START_YELLOWKNIFE_SPEC "-T yellowknife.ld%s" + +#define LINK_OS_YELLOWKNIFE_SPEC "" + +#define CPP_OS_YELLOWKNIFE_SPEC "" + +/* Motorola MVME support. */ +#define LIB_MVME_SPEC "--start-group -lmvme -lc --end-group" + +#define STARTFILE_MVME_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s" + +#define ENDFILE_MVME_SPEC "crtend.o%s ecrtn.o%s" + +#define LINK_START_MVME_SPEC "-Ttext 0x40000" + +#define LINK_OS_MVME_SPEC "" + +#define CPP_OS_MVME_SPEC "" + +/* PowerPC simulator based on netbsd system calls support. */ +#define LIB_SIM_SPEC "--start-group -lsim -lc --end-group" + +#define STARTFILE_SIM_SPEC "ecrti.o%s sim-crt0.o%s crtbegin.o%s" + +#define ENDFILE_SIM_SPEC "crtend.o%s ecrtn.o%s" + +#define LINK_START_SIM_SPEC "" + +#define LINK_OS_SIM_SPEC "-m elf32ppcsim" + +#define CPP_OS_SIM_SPEC "" + +/* FreeBSD support. */ + +#define CPP_OS_FREEBSD_SPEC "\ + -D__PPC__ -D__ppc__ -D__PowerPC__ -D__powerpc__ \ + -Acpu=powerpc -Amachine=powerpc" + +#define STARTFILE_FREEBSD_SPEC FBSD_STARTFILE_SPEC +#define ENDFILE_FREEBSD_SPEC FBSD_ENDFILE_SPEC +#define LIB_FREEBSD_SPEC FBSD_LIB_SPEC +#define LINK_START_FREEBSD_SPEC "" + +#define LINK_OS_FREEBSD_SPEC "\ + %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \ + %{v:-V} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{!shared: \ + %{!static: \ + %{rdynamic: -export-dynamic} \ + -dynamic-linker %(fbsd_dynamic_linker) } \ + %{static:-Bstatic}} \ + %{symbolic:-Bsymbolic}" + +/* GNU/Linux support. */ +#define LIB_LINUX_SPEC "%{mnewlib: --start-group -llinux -lc --end-group } \ +%{!mnewlib: %{pthread:-lpthread} %{shared:-lc} \ +%{!shared: %{profile:-lc_p} %{!profile:-lc}}}" + +#if ENABLE_OFFLOADING == 1 +#define CRTOFFLOADBEGIN "%{fopenacc|fopenmp:crtoffloadbegin%O%s}" +#define CRTOFFLOADEND "%{fopenacc|fopenmp:crtoffloadend%O%s}" +#else +#define CRTOFFLOADBEGIN "" +#define CRTOFFLOADEND "" +#endif + +#ifdef HAVE_LD_PIE +#define STARTFILE_LINUX_SPEC "\ +%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \ +%{mnewlib:ecrti.o%s;:crti.o%s} \ +%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \ +" CRTOFFLOADBEGIN +#else +#define STARTFILE_LINUX_SPEC "\ +%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \ +%{mnewlib:ecrti.o%s;:crti.o%s} \ +%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \ +" CRTOFFLOADBEGIN +#endif + +#define ENDFILE_LINUX_SPEC "\ +%{shared|pie:crtendS.o%s;:crtend.o%s} \ +%{mnewlib:ecrtn.o%s;:crtn.o%s} \ +" CRTOFFLOADEND + +#define LINK_START_LINUX_SPEC "" + +#define MUSL_DYNAMIC_LINKER_E ENDIAN_SELECT("","le","") + +#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" +#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" +#define MUSL_DYNAMIC_LINKER \ + "/lib/ld-musl-powerpc" MUSL_DYNAMIC_LINKER_E "%{msoft-float:-sf}.so.1" +#if DEFAULT_LIBC == LIBC_UCLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U, M) \ + "%{mglibc:" G ";:%{mmusl:" M ";:" U "}}" +#elif DEFAULT_LIBC == LIBC_MUSL +#define CHOOSE_DYNAMIC_LINKER(G, U, M) \ + "%{mglibc:" G ";:%{muclibc:" U ";:" M "}}" +#elif !defined (DEFAULT_LIBC) || DEFAULT_LIBC == LIBC_GLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U, M) \ + "%{muclibc:" U ";:%{mmusl:" M ";:" G "}}" +#else +#error "Unsupported DEFAULT_LIBC" +#endif +#define GNU_USER_DYNAMIC_LINKER \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER, \ + MUSL_DYNAMIC_LINKER) + +#define LINK_OS_LINUX_SPEC "-m elf32ppclinux %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}" + +#if defined(HAVE_LD_EH_FRAME_HDR) +# define LINK_EH_SPEC "%{!static:--eh-frame-hdr} " +#endif + +#define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \ +%{!undef: \ + %{!ansi: \ + %{!std=*:-Dunix -D__unix -Dlinux -D__linux} \ + %{std=gnu*:-Dunix -D__unix -Dlinux -D__linux}}} \ +-Asystem=linux -Asystem=unix -Asystem=posix %{pthread:-D_REENTRANT}" + +/* NetBSD support. */ +#define LIB_NETBSD_SPEC "\ +-lc" + +#define STARTFILE_NETBSD_SPEC "\ +ncrti.o%s crt0.o%s \ +%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" + +#define ENDFILE_NETBSD_SPEC "\ +%{!shared:crtend.o%s} %{shared:crtendS.o%s} \ +ncrtn.o%s" + +#define LINK_START_NETBSD_SPEC "\ +" + +#define LINK_OS_NETBSD_SPEC "\ +%{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker /usr/libexec/ld.elf_so}}" + +#define CPP_OS_NETBSD_SPEC "\ +-D__powerpc__ -D__NetBSD__ -D__KPRINTF_ATTRIBUTE__" + +/* OpenBSD support. */ +#ifndef LIB_OPENBSD_SPEC +#define LIB_OPENBSD_SPEC "%{!shared:%{pthread:-lpthread%{p:_p}%{!p:%{pg:_p}}}} %{!shared:-lc%{p:_p}%{!p:%{pg:_p}}}" +#endif + +#ifndef STARTFILE_OPENBSD_SPEC +#define STARTFILE_OPENBSD_SPEC "\ +%{!shared: %{pg:gcrt0.o%s} %{!pg:%{p:gcrt0.o%s} %{!p:crt0.o%s}}} \ +%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" +#endif + +#ifndef ENDFILE_OPENBSD_SPEC +#define ENDFILE_OPENBSD_SPEC "\ +%{!shared:crtend.o%s} %{shared:crtendS.o%s}" +#endif + +#ifndef LINK_START_OPENBSD_SPEC +#define LINK_START_OPENBSD_SPEC "-Ttext 0x400074" +#endif + +#ifndef LINK_OS_OPENBSD_SPEC +#define LINK_OS_OPENBSD_SPEC "" +#endif + +#ifndef CPP_OS_OPENBSD_SPEC +#define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}" +#endif + +/* Define any extra SPECS that the compiler needs to generate. */ +/* Override rs6000.h definition. */ +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "crtsavres_default", CRTSAVRES_DEFAULT_SPEC }, \ + { "lib_ads", LIB_ADS_SPEC }, \ + { "lib_yellowknife", LIB_YELLOWKNIFE_SPEC }, \ + { "lib_mvme", LIB_MVME_SPEC }, \ + { "lib_sim", LIB_SIM_SPEC }, \ + { "lib_freebsd", LIB_FREEBSD_SPEC }, \ + { "lib_linux", LIB_LINUX_SPEC }, \ + { "lib_netbsd", LIB_NETBSD_SPEC }, \ + { "lib_openbsd", LIB_OPENBSD_SPEC }, \ + { "lib_default", LIB_DEFAULT_SPEC }, \ + { "startfile_ads", STARTFILE_ADS_SPEC }, \ + { "startfile_yellowknife", STARTFILE_YELLOWKNIFE_SPEC }, \ + { "startfile_mvme", STARTFILE_MVME_SPEC }, \ + { "startfile_sim", STARTFILE_SIM_SPEC }, \ + { "startfile_freebsd", STARTFILE_FREEBSD_SPEC }, \ + { "startfile_linux", STARTFILE_LINUX_SPEC }, \ + { "startfile_netbsd", STARTFILE_NETBSD_SPEC }, \ + { "startfile_openbsd", STARTFILE_OPENBSD_SPEC }, \ + { "startfile_default", STARTFILE_DEFAULT_SPEC }, \ + { "endfile_ads", ENDFILE_ADS_SPEC }, \ + { "endfile_yellowknife", ENDFILE_YELLOWKNIFE_SPEC }, \ + { "endfile_mvme", ENDFILE_MVME_SPEC }, \ + { "endfile_sim", ENDFILE_SIM_SPEC }, \ + { "endfile_freebsd", ENDFILE_FREEBSD_SPEC }, \ + { "endfile_linux", ENDFILE_LINUX_SPEC }, \ + { "endfile_netbsd", ENDFILE_NETBSD_SPEC }, \ + { "endfile_openbsd", ENDFILE_OPENBSD_SPEC }, \ + { "endfile_default", ENDFILE_DEFAULT_SPEC }, \ + { "link_shlib", LINK_SHLIB_SPEC }, \ + { "link_start", LINK_START_SPEC }, \ + { "link_start_ads", LINK_START_ADS_SPEC }, \ + { "link_start_yellowknife", LINK_START_YELLOWKNIFE_SPEC }, \ + { "link_start_mvme", LINK_START_MVME_SPEC }, \ + { "link_start_sim", LINK_START_SIM_SPEC }, \ + { "link_start_freebsd", LINK_START_FREEBSD_SPEC }, \ + { "link_start_linux", LINK_START_LINUX_SPEC }, \ + { "link_start_netbsd", LINK_START_NETBSD_SPEC }, \ + { "link_start_openbsd", LINK_START_OPENBSD_SPEC }, \ + { "link_start_default", LINK_START_DEFAULT_SPEC }, \ + { "link_os", LINK_OS_SPEC }, \ + { "link_os_ads", LINK_OS_ADS_SPEC }, \ + { "link_os_yellowknife", LINK_OS_YELLOWKNIFE_SPEC }, \ + { "link_os_mvme", LINK_OS_MVME_SPEC }, \ + { "link_os_sim", LINK_OS_SIM_SPEC }, \ + { "link_os_freebsd", LINK_OS_FREEBSD_SPEC }, \ + { "link_os_linux", LINK_OS_LINUX_SPEC }, \ + { "link_os_netbsd", LINK_OS_NETBSD_SPEC }, \ + { "link_os_openbsd", LINK_OS_OPENBSD_SPEC }, \ + { "link_os_default", LINK_OS_DEFAULT_SPEC }, \ + { "cc1_secure_plt_default", CC1_SECURE_PLT_DEFAULT_SPEC }, \ + { "link_secure_plt", LINK_SECURE_PLT_SPEC }, \ + { "cpp_os_ads", CPP_OS_ADS_SPEC }, \ + { "cpp_os_yellowknife", CPP_OS_YELLOWKNIFE_SPEC }, \ + { "cpp_os_mvme", CPP_OS_MVME_SPEC }, \ + { "cpp_os_sim", CPP_OS_SIM_SPEC }, \ + { "cpp_os_freebsd", CPP_OS_FREEBSD_SPEC }, \ + { "cpp_os_linux", CPP_OS_LINUX_SPEC }, \ + { "cpp_os_netbsd", CPP_OS_NETBSD_SPEC }, \ + { "cpp_os_openbsd", CPP_OS_OPENBSD_SPEC }, \ + { "cpp_os_default", CPP_OS_DEFAULT_SPEC }, \ + { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }, \ + SUBSUBTARGET_EXTRA_SPECS + +#define SUBSUBTARGET_EXTRA_SPECS + +/* Define this macro as a C expression for the initializer of an + array of string to tell the driver program which options are + defaults for this target and thus do not need to be handled + specially when using `MULTILIB_OPTIONS'. + + Do not define this macro if `MULTILIB_OPTIONS' is not defined in + the target makefile fragment or if none of the options listed in + `MULTILIB_OPTIONS' are set by default. *Note Target Fragment::. */ + +#define MULTILIB_DEFAULTS { "mbig", "mcall-sysv" } + +/* Define this macro if the code for function profiling should come + before the function prologue. Normally, the profiling code comes + after. */ +#define PROFILE_BEFORE_PROLOGUE 1 + +/* Function name to call to do profiling. */ +#define RS6000_MCOUNT "_mcount" + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (flag_pic \ + ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel \ + | DW_EH_PE_sdata4) \ + : DW_EH_PE_absptr) + +#define DOUBLE_INT_ASM_OP "\t.quad\t" + +/* Generate entries in .fixup for relocatable addresses. */ +#define RELOCATABLE_NEEDS_FIXUP 1 + +#define TARGET_ASM_FILE_END rs6000_elf_file_end + +#undef TARGET_ASAN_SHADOW_OFFSET +#define TARGET_ASAN_SHADOW_OFFSET rs6000_asan_shadow_offset + +/* This target uses the sysv4.opt file. */ +#define TARGET_USES_SYSV4_OPT 1 + +/* Include order changes for musl, same as in generic linux.h. */ +#if DEFAULT_LIBC == LIBC_MUSL +#define INCLUDE_DEFAULTS_MUSL_GPP \ + { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1, \ + GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, \ + { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1, \ + GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 }, \ + { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1, \ + GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, + +#ifdef LOCAL_INCLUDE_DIR +#define INCLUDE_DEFAULTS_MUSL_LOCAL \ + { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 }, \ + { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 }, +#else +#define INCLUDE_DEFAULTS_MUSL_LOCAL +#endif + +#ifdef PREFIX_INCLUDE_DIR +#define INCLUDE_DEFAULTS_MUSL_PREFIX \ + { PREFIX_INCLUDE_DIR, 0, 0, 1, 0, 0}, +#else +#define INCLUDE_DEFAULTS_MUSL_PREFIX +#endif + +#ifdef CROSS_INCLUDE_DIR +#define INCLUDE_DEFAULTS_MUSL_CROSS \ + { CROSS_INCLUDE_DIR, "GCC", 0, 0, 0, 0}, +#else +#define INCLUDE_DEFAULTS_MUSL_CROSS +#endif + +#ifdef TOOL_INCLUDE_DIR +#define INCLUDE_DEFAULTS_MUSL_TOOL \ + { TOOL_INCLUDE_DIR, "BINUTILS", 0, 1, 0, 0}, +#else +#define INCLUDE_DEFAULTS_MUSL_TOOL +#endif + +#ifdef NATIVE_SYSTEM_HEADER_DIR +#define INCLUDE_DEFAULTS_MUSL_NATIVE \ + { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 2 }, \ + { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 0 }, +#else +#define INCLUDE_DEFAULTS_MUSL_NATIVE +#endif + +#if defined (CROSS_DIRECTORY_STRUCTURE) && !defined (TARGET_SYSTEM_ROOT) +# undef INCLUDE_DEFAULTS_MUSL_LOCAL +# define INCLUDE_DEFAULTS_MUSL_LOCAL +# undef INCLUDE_DEFAULTS_MUSL_NATIVE +# define INCLUDE_DEFAULTS_MUSL_NATIVE +#else +# undef INCLUDE_DEFAULTS_MUSL_CROSS +# define INCLUDE_DEFAULTS_MUSL_CROSS +#endif + +#undef INCLUDE_DEFAULTS +#define INCLUDE_DEFAULTS \ + { \ + INCLUDE_DEFAULTS_MUSL_GPP \ + INCLUDE_DEFAULTS_MUSL_LOCAL \ + INCLUDE_DEFAULTS_MUSL_PREFIX \ + INCLUDE_DEFAULTS_MUSL_CROSS \ + INCLUDE_DEFAULTS_MUSL_TOOL \ + INCLUDE_DEFAULTS_MUSL_NATIVE \ + { GCC_INCLUDE_DIR, "GCC", 0, 1, 0, 0 }, \ + { 0, 0, 0, 0, 0, 0 } \ + } +#endif diff --git a/gcc/config/powerpcspe/sysv4.opt b/gcc/config/powerpcspe/sysv4.opt new file mode 100644 index 000000000000..423300b8148f --- /dev/null +++ b/gcc/config/powerpcspe/sysv4.opt @@ -0,0 +1,161 @@ +; SYSV4 options for PPC port. +; +; Copyright (C) 2005-2017 Free Software Foundation, Inc. +; Contributed by Aldy Hernandez <aldy@quesejoda.com>. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +mcall- +Target RejectNegative Joined Var(rs6000_abi_name) +Select ABI calling convention. + +msdata= +Target RejectNegative Joined Var(rs6000_sdata_name) +Select method for sdata handling. + +mtls-size= +Target RejectNegative Joined Var(rs6000_tls_size) Enum(rs6000_tls_size) +Specify bit size of immediate TLS offsets. + +Enum +Name(rs6000_tls_size) Type(int) + +EnumValue +Enum(rs6000_tls_size) String(16) Value(16) + +EnumValue +Enum(rs6000_tls_size) String(32) Value(32) + +EnumValue +Enum(rs6000_tls_size) String(64) Value(64) + +mbit-align +Target Report Var(TARGET_NO_BITFIELD_TYPE) Save +Align to the base type of the bit-field. + +mstrict-align +Target Report Mask(STRICT_ALIGN) Var(rs6000_isa_flags) +Align to the base type of the bit-field. +Don't assume that unaligned accesses are handled by the system. + +mrelocatable +Target Report Mask(RELOCATABLE) Var(rs6000_isa_flags) +Produce code relocatable at runtime. + +mrelocatable-lib +Target +Produce code relocatable at runtime. + +mlittle-endian +Target Report RejectNegative Mask(LITTLE_ENDIAN) Var(rs6000_isa_flags) +Produce little endian code. + +mlittle +Target Report RejectNegative Mask(LITTLE_ENDIAN) Var(rs6000_isa_flags) +Produce little endian code. + +mbig-endian +Target Report RejectNegative InverseMask(LITTLE_ENDIAN) Var(rs6000_isa_flags) +Produce big endian code. + +mbig +Target Report RejectNegative InverseMask(LITTLE_ENDIAN) Var(rs6000_isa_flags) +Produce big endian code. + +;; FIXME: This does nothing. What should be done? +mno-toc +Target RejectNegative +no description yet. + +mtoc +Target RejectNegative +no description yet. + +mprototype +Target Var(target_prototype) Save +Assume all variable arg functions are prototyped. + +;; FIXME: Does nothing. +mno-traceback +Target RejectNegative +no description yet. + +meabi +Target Report Mask(EABI) Var(rs6000_isa_flags) +Use EABI. + +mbit-word +Target Report Var(TARGET_NO_BITFIELD_WORD) Save +Allow bit-fields to cross word boundaries. + +mregnames +Target Var(rs6000_regnames) Save +Use alternate register names. + +;; This option does nothing and only exists because the compiler +;; driver passes all -m* options through. +msdata +Target +Use default method for sdata handling. + +msim +Target RejectNegative +Link with libsim.a, libc.a and sim-crt0.o. + +mads +Target RejectNegative +Link with libads.a, libc.a and crt0.o. + +myellowknife +Target RejectNegative +Link with libyk.a, libc.a and crt0.o. + +mmvme +Target RejectNegative +Link with libmvme.a, libc.a and crt0.o. + +memb +Target RejectNegative +Set the PPC_EMB bit in the ELF flags header. + +mshlib +Target RejectNegative +no description yet. + +m64 +Target Report RejectNegative Negative(m32) Mask(64BIT) Var(rs6000_isa_flags) +Generate 64-bit code. + +m32 +Target Report RejectNegative Negative(m64) InverseMask(64BIT) Var(rs6000_isa_flags) +Generate 32-bit code. + +mnewlib +Target RejectNegative +no description yet. + +msecure-plt +Target Report RejectNegative Var(secure_plt, 1) Save +Generate code to use a non-exec PLT and GOT. + +mbss-plt +Target Report RejectNegative Var(secure_plt, 0) Save +Generate code for old exec BSS PLT. + +mgnu-attribute +Target Report Var(rs6000_gnu_attr) Init(1) Save +Emit .gnu_attribute tags. diff --git a/gcc/config/powerpcspe/sysv4le.h b/gcc/config/powerpcspe/sysv4le.h new file mode 100644 index 000000000000..6aa32fcf019f --- /dev/null +++ b/gcc/config/powerpcspe/sysv4le.h @@ -0,0 +1,35 @@ +/* Target definitions for GCC for a little endian PowerPC + running System V.4 + Copyright (C) 1995-2017 Free Software Foundation, Inc. + Contributed by Cygnus Support. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT MASK_LITTLE_ENDIAN + +#undef DEFAULT_ASM_ENDIAN +#define DEFAULT_ASM_ENDIAN " -mlittle" + +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" } + +/* Little-endian PowerPC64 Linux uses the ELF v2 ABI by default. */ +#define LINUX64_DEFAULT_ABI_ELFv2 + +#undef MUSL_DYNAMIC_LINKER_E +#define MUSL_DYNAMIC_LINKER_E ENDIAN_SELECT("","le","le") diff --git a/gcc/config/powerpcspe/t-aix43 b/gcc/config/powerpcspe/t-aix43 new file mode 100644 index 000000000000..043928064782 --- /dev/null +++ b/gcc/config/powerpcspe/t-aix43 @@ -0,0 +1,39 @@ +# Copyright (C) 1998-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Build the libraries for pthread and all of the +# different processor models + +MULTILIB_OPTIONS = pthread \ + mcpu=common/mcpu=powerpc/maix64 + +MULTILIB_DIRNAMES = pthread \ + common powerpc ppc64 + +MULTILIB_MATCHES = mcpu?powerpc=mcpu?power3 \ + mcpu?powerpc=mcpu?power4 \ + mcpu?powerpc=mcpu?powerpc \ + mcpu?powerpc=mcpu?rs64a \ + mcpu?powerpc=mcpu?601 \ + mcpu?powerpc=mcpu?602 \ + mcpu?powerpc=mcpu?603 \ + mcpu?powerpc=mcpu?603e \ + mcpu?powerpc=mcpu?604 \ + mcpu?powerpc=mcpu?604e \ + mcpu?powerpc=mcpu?620 \ + mcpu?powerpc=mcpu?630 diff --git a/gcc/config/powerpcspe/t-aix52 b/gcc/config/powerpcspe/t-aix52 new file mode 100644 index 000000000000..b01ea0193300 --- /dev/null +++ b/gcc/config/powerpcspe/t-aix52 @@ -0,0 +1,26 @@ +# Copyright (C) 2002-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Build the libraries for pthread and all of the +# different processor models + +MULTILIB_OPTIONS = pthread maix64 + +MULTILIB_DIRNAMES = pthread ppc64 + +MULTILIB_MATCHES = diff --git a/gcc/config/powerpcspe/t-darwin64 b/gcc/config/powerpcspe/t-darwin64 new file mode 100644 index 000000000000..b0a04c7d89d9 --- /dev/null +++ b/gcc/config/powerpcspe/t-darwin64 @@ -0,0 +1,2 @@ +MULTILIB_OPTIONS = m32 +MULTILIB_DIRNAMES = ppc diff --git a/gcc/config/powerpcspe/t-darwin8 b/gcc/config/powerpcspe/t-darwin8 new file mode 100644 index 000000000000..2f3bb32f821a --- /dev/null +++ b/gcc/config/powerpcspe/t-darwin8 @@ -0,0 +1,3 @@ +# 64-bit libraries can only be built in Darwin 8.x or later. +MULTILIB_OPTIONS = m64 +MULTILIB_DIRNAMES = ppc64 diff --git a/gcc/config/powerpcspe/t-fprules b/gcc/config/powerpcspe/t-fprules new file mode 100644 index 000000000000..6c0ba5f4f6f7 --- /dev/null +++ b/gcc/config/powerpcspe/t-fprules @@ -0,0 +1,20 @@ +# Copyright (C) 2002-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +SOFT_FLOAT_CPUS = e300c2 401 403 405 440 464 476 ec603e 801 821 823 860 +MULTILIB_MATCHES_FLOAT = $(foreach cpu, $(SOFT_FLOAT_CPUS), msoft-float=mcpu?$(cpu)) diff --git a/gcc/config/powerpcspe/t-freebsd64 b/gcc/config/powerpcspe/t-freebsd64 new file mode 100644 index 000000000000..604b300efde0 --- /dev/null +++ b/gcc/config/powerpcspe/t-freebsd64 @@ -0,0 +1,29 @@ +#rs6000/t-freebsd64 + +# Copyright (C) 2012-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# On FreeBSD the 32-bit libraries are found under /usr/lib32. +# Set MULTILIB_OSDIRNAMES according to this. + +MULTILIB_OPTIONS = m32 +MULTILIB_DIRNAMES = 32 +MULTILIB_EXTRA_OPTS = fPIC mstrict-align +MULTILIB_EXCEPTIONS = +MULTILIB_OSDIRNAMES = ../lib32 + diff --git a/gcc/config/powerpcspe/t-linux b/gcc/config/powerpcspe/t-linux new file mode 100644 index 000000000000..1906338ea0b9 --- /dev/null +++ b/gcc/config/powerpcspe/t-linux @@ -0,0 +1,23 @@ +# do not define the multiarch name if configured for a soft-float cpu +# or soft-float. +ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float))) +ifneq (,$(findstring powerpc64,$(target))) +MULTILIB_OSDIRNAMES := .=../lib64$(call if_multiarch,:powerpc64-linux-gnu) +else +ifneq (,$(findstring spe,$(target))) +MULTIARCH_DIRNAME := powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1) +else +MULTIARCH_DIRNAME := powerpc-linux-gnu +endif +endif +ifneq (,$(findstring powerpcle,$(target))) +MULTIARCH_DIRNAME := $(subst -linux,le-linux,$(MULTIARCH_DIRNAME)) +endif +ifneq (,$(findstring powerpc64le,$(target))) +MULTILIB_OSDIRNAMES := $(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES)) +endif +endif + +powerpcspe-linux.o: $(srcdir)/config/powerpcspe/powerpcspe-linux.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc/config/powerpcspe/t-linux64 b/gcc/config/powerpcspe/t-linux64 new file mode 100644 index 000000000000..466e9931e945 --- /dev/null +++ b/gcc/config/powerpcspe/t-linux64 @@ -0,0 +1,36 @@ +#rs6000/t-linux64 + +# Copyright (C) 2002-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# On Debian, Ubuntu and other derivative distributions, the 32bit libraries +# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to +# /lib and /usr/lib, while other distributions install libraries into /lib64 +# and /usr/lib64. The LSB does not enforce the use of /lib64 and /usr/lib64, +# it doesn't tell anything about the 32bit libraries on those systems. Set +# MULTILIB_OSDIRNAMES according to what is found on the target. + +MULTILIB_OPTIONS := m64/m32 +MULTILIB_DIRNAMES := 64 32 +MULTILIB_EXTRA_OPTS := +MULTILIB_OSDIRNAMES := m64=../lib64$(call if_multiarch,:powerpc64-linux-gnu) +MULTILIB_OSDIRNAMES += m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu) + +powerpcspe-linux.o: $(srcdir)/config/powerpcspe/powerpcspe-linux.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc/config/powerpcspe/t-linux64bele b/gcc/config/powerpcspe/t-linux64bele new file mode 100644 index 000000000000..97c1ee6fb4d7 --- /dev/null +++ b/gcc/config/powerpcspe/t-linux64bele @@ -0,0 +1,7 @@ +#rs6000/t-linux64end + +MULTILIB_OPTIONS += mlittle +MULTILIB_DIRNAMES += le +MULTILIB_OSDIRNAMES += $(subst =,.mlittle=,$(subst lible32,lib32le,$(subst lible64,lib64le,$(subst lib,lible,$(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES)))))) +MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mlittle%,$(MULTILIB_OSDIRNAMES))) +MULTILIB_MATCHES := ${MULTILIB_MATCHES_ENDIAN} diff --git a/gcc/config/powerpcspe/t-linux64le b/gcc/config/powerpcspe/t-linux64le new file mode 100644 index 000000000000..0cf38e1523a3 --- /dev/null +++ b/gcc/config/powerpcspe/t-linux64le @@ -0,0 +1,3 @@ +#rs6000/t-linux64le + +MULTILIB_OSDIRNAMES := $(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES)) diff --git a/gcc/config/powerpcspe/t-linux64lebe b/gcc/config/powerpcspe/t-linux64lebe new file mode 100644 index 000000000000..2e63bdb9fc95 --- /dev/null +++ b/gcc/config/powerpcspe/t-linux64lebe @@ -0,0 +1,7 @@ +#rs6000/t-linux64leend + +MULTILIB_OPTIONS += mbig +MULTILIB_DIRNAMES += be +MULTILIB_OSDIRNAMES += $(subst =,.mbig=,$(subst libbe32,lib32be,$(subst libbe64,lib64be,$(subst lib,libbe,$(subst le-linux,-linux,$(MULTILIB_OSDIRNAMES)))))) +MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mbig%,$(MULTILIB_OSDIRNAMES))) +MULTILIB_MATCHES := ${MULTILIB_MATCHES_ENDIAN} diff --git a/gcc/config/powerpcspe/t-lynx b/gcc/config/powerpcspe/t-lynx new file mode 100644 index 000000000000..aac08f5aa24a --- /dev/null +++ b/gcc/config/powerpcspe/t-lynx @@ -0,0 +1,29 @@ +# Copyright (C) 2004-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +MULTILIB_OPTIONS += msoft-float +MULTILIB_DIRNAMES += soft-float + +MULTILIB_OPTIONS += maltivec +MULTILIB_DIRNAMES += altivec + +MULTILIB_EXCEPTIONS = *msoft-float/*maltivec* + +Local Variables: +mode: makefile +End: diff --git a/gcc/config/powerpcspe/t-netbsd b/gcc/config/powerpcspe/t-netbsd new file mode 100644 index 000000000000..a3e941d22dc5 --- /dev/null +++ b/gcc/config/powerpcspe/t-netbsd @@ -0,0 +1,36 @@ +# Support for NetBSD PowerPC ELF targets (SVR4 ABI). +# +# Copyright (C) 2002-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Switch synonyms +MULTILIB_MATCHES_FLOAT = msoft-float=mcpu?401 \ + msoft-float=mcpu?403 \ + msoft-float=mcpu?405 \ + msoft-float=mcpu?ec603e \ + msoft-float=mcpu?801 \ + msoft-float=mcpu?821 \ + msoft-float=mcpu?823 \ + msoft-float=mcpu?860 + +MULTILIB_OPTIONS = msoft-float +MULTILIB_DIRNAMES = soft-float +MULTILIB_EXTRA_OPTS = fPIC mstrict-align +MULTILIB_EXCEPTIONS = + +MULTILIB_MATCHES = ${MULTILIB_MATCHES_FLOAT} diff --git a/gcc/config/powerpcspe/t-powerpcspe b/gcc/config/powerpcspe/t-powerpcspe new file mode 100644 index 000000000000..fba99508757b --- /dev/null +++ b/gcc/config/powerpcspe/t-powerpcspe @@ -0,0 +1,69 @@ +# General rules that all rs6000/ targets must have. +# +# Copyright (C) 1995-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +TM_H += $(srcdir)/config/powerpcspe/powerpcspe-builtin.def +TM_H += $(srcdir)/config/powerpcspe/powerpcspe-cpus.def +PASSES_EXTRA += $(srcdir)/config/powerpcspe/powerpcspe-passes.def + +powerpcspe-c.o: $(srcdir)/config/powerpcspe/powerpcspe-c.c + $(COMPILE) $< + $(POSTCOMPILE) + +$(srcdir)/config/powerpcspe/powerpcspe-tables.opt: $(srcdir)/config/powerpcspe/genopt.sh \ + $(srcdir)/config/powerpcspe/powerpcspe-cpus.def + $(SHELL) $(srcdir)/config/powerpcspe/genopt.sh $(srcdir)/config/powerpcspe > \ + $(srcdir)/config/powerpcspe/powerpcspe-tables.opt + +# The rs6000 backend doesn't cause warnings in these files. +insn-conditions.o-warn = + +MD_INCLUDES = $(srcdir)/config/powerpcspe/rs64.md \ + $(srcdir)/config/powerpcspe/mpc.md \ + $(srcdir)/config/powerpcspe/40x.md \ + $(srcdir)/config/powerpcspe/440.md \ + $(srcdir)/config/powerpcspe/601.md \ + $(srcdir)/config/powerpcspe/603.md \ + $(srcdir)/config/powerpcspe/6xx.md \ + $(srcdir)/config/powerpcspe/7xx.md \ + $(srcdir)/config/powerpcspe/7450.md \ + $(srcdir)/config/powerpcspe/8540.md \ + $(srcdir)/config/powerpcspe/e300c2c3.md \ + $(srcdir)/config/powerpcspe/e500mc.md \ + $(srcdir)/config/powerpcspe/power4.md \ + $(srcdir)/config/powerpcspe/power5.md \ + $(srcdir)/config/powerpcspe/power6.md \ + $(srcdir)/config/powerpcspe/power7.md \ + $(srcdir)/config/powerpcspe/power8.md \ + $(srcdir)/config/powerpcspe/power9.md \ + $(srcdir)/config/powerpcspe/cell.md \ + $(srcdir)/config/powerpcspe/xfpu.md \ + $(srcdir)/config/powerpcspe/a2.md \ + $(srcdir)/config/powerpcspe/predicates.md \ + $(srcdir)/config/powerpcspe/constraints.md \ + $(srcdir)/config/powerpcspe/darwin.md \ + $(srcdir)/config/powerpcspe/sync.md \ + $(srcdir)/config/powerpcspe/vector.md \ + $(srcdir)/config/powerpcspe/vsx.md \ + $(srcdir)/config/powerpcspe/altivec.md \ + $(srcdir)/config/powerpcspe/crypto.md \ + $(srcdir)/config/powerpcspe/htm.md \ + $(srcdir)/config/powerpcspe/spe.md \ + $(srcdir)/config/powerpcspe/dfp.md \ + $(srcdir)/config/powerpcspe/paired.md diff --git a/gcc/config/powerpcspe/t-ppccomm b/gcc/config/powerpcspe/t-ppccomm new file mode 100644 index 000000000000..fa87d0f3a9ca --- /dev/null +++ b/gcc/config/powerpcspe/t-ppccomm @@ -0,0 +1,23 @@ +# Common support for PowerPC ELF targets (both EABI and SVR4). +# +# Copyright (C) 1996-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Switch synonyms +MULTILIB_MATCHES_ENDIAN = mlittle=mlittle-endian mbig=mbig-endian +MULTILIB_MATCHES_SYSV = mcall-sysv=mcall-sysv-eabi mcall-sysv=mcall-sysv-noeabi mcall-sysv=mcall-linux mcall-sysv=mcall-netbsd diff --git a/gcc/config/powerpcspe/t-ppcendian b/gcc/config/powerpcspe/t-ppcendian new file mode 100644 index 000000000000..3c1daf365033 --- /dev/null +++ b/gcc/config/powerpcspe/t-ppcendian @@ -0,0 +1,30 @@ +# Multilibs for powerpc embedded ELF targets with altivec. +# +# Copyright (C) 2002-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +MULTILIB_OPTIONS = msoft-float \ + mlittle/mbig + +MULTILIB_DIRNAMES = nof \ + le be + + +MULTILIB_MATCHES = ${MULTILIB_MATCHES_FLOAT} \ + ${MULTILIB_MATCHES_ENDIAN} \ + ${MULTILIB_MATCHES_SYSV} diff --git a/gcc/config/powerpcspe/t-ppcgas b/gcc/config/powerpcspe/t-ppcgas new file mode 100644 index 000000000000..6b00fc7e3d33 --- /dev/null +++ b/gcc/config/powerpcspe/t-ppcgas @@ -0,0 +1,32 @@ +# Multilibs for powerpc embedded ELF targets. +# +# Copyright (C) 1995-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +MULTILIB_OPTIONS = msoft-float \ + mlittle/mbig \ + fleading-underscore + +MULTILIB_DIRNAMES = nof \ + le be \ + und + +MULTILIB_EXTRA_OPTS = mrelocatable-lib mno-eabi mstrict-align + +MULTILIB_MATCHES = ${MULTILIB_MATCHES_FLOAT} \ + ${MULTILIB_MATCHES_ENDIAN} diff --git a/gcc/config/powerpcspe/t-ppcos b/gcc/config/powerpcspe/t-ppcos new file mode 100644 index 000000000000..819863bead4f --- /dev/null +++ b/gcc/config/powerpcspe/t-ppcos @@ -0,0 +1,8 @@ +# Multilibs for a powerpc hosted ELF target (linux, SVR4) + +MULTILIB_OPTIONS = msoft-float +MULTILIB_DIRNAMES = nof +MULTILIB_EXTRA_OPTS = fPIC mstrict-align +MULTILIB_EXCEPTIONS = + +MULTILIB_MATCHES = ${MULTILIB_MATCHES_FLOAT} diff --git a/gcc/config/powerpcspe/t-rtems b/gcc/config/powerpcspe/t-rtems new file mode 100644 index 000000000000..7c7637d017fd --- /dev/null +++ b/gcc/config/powerpcspe/t-rtems @@ -0,0 +1,79 @@ +# Multilibs for powerpc RTEMS targets. +# +# Copyright (C) 2004-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = +MULTILIB_MATCHES = +MULTILIB_EXCEPTIONS = +MULTILIB_REQUIRED = + +MULTILIB_OPTIONS += mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540/mcpu=e6500 +MULTILIB_DIRNAMES += m403 m505 m603e m604 m860 m7400 m8540 me6500 + +MULTILIB_OPTIONS += m32 +MULTILIB_DIRNAMES += m32 + +MULTILIB_OPTIONS += msoft-float/mfloat-gprs=double +MULTILIB_DIRNAMES += nof gprsdouble + +MULTILIB_OPTIONS += mno-spe/mno-altivec +MULTILIB_DIRNAMES += nospe noaltivec + +MULTILIB_MATCHES += ${MULTILIB_MATCHES_ENDIAN} +MULTILIB_MATCHES += ${MULTILIB_MATCHES_SYSV} +# Map 405 to 403 +MULTILIB_MATCHES += mcpu?403=mcpu?405 +# Map 602, 603e, 603 to 603e +MULTILIB_MATCHES += mcpu?603e=mcpu?602 +MULTILIB_MATCHES += mcpu?603e=mcpu?603 +# Map 801, 821, 823 to 860 +MULTILIB_MATCHES += mcpu?860=mcpu?801 +MULTILIB_MATCHES += mcpu?860=mcpu?821 +MULTILIB_MATCHES += mcpu?860=mcpu?823 +# Map 7450 to 7400 +MULTILIB_MATCHES += mcpu?7400=mcpu?7450 + +# Map 750 to . +MULTILIB_MATCHES += mcpu?750= + +# Map 8548 to 8540 +MULTILIB_MATCHES += mcpu?8540=mcpu?8548 + +# Map -mcpu=8540 -mfloat-gprs=single to -mcpu=8540 +# (mfloat-gprs=single is implicit default) +MULTILIB_MATCHES += mcpu?8540=mcpu?8540/mfloat-gprs?single + +# Enumeration of multilibs + +MULTILIB_REQUIRED += msoft-float +MULTILIB_REQUIRED += mcpu=403 +MULTILIB_REQUIRED += mcpu=505 +MULTILIB_REQUIRED += mcpu=603e +MULTILIB_REQUIRED += mcpu=603e/msoft-float +MULTILIB_REQUIRED += mcpu=604 +MULTILIB_REQUIRED += mcpu=604/msoft-float +MULTILIB_REQUIRED += mcpu=7400 +MULTILIB_REQUIRED += mcpu=7400/msoft-float +MULTILIB_REQUIRED += mcpu=8540 +MULTILIB_REQUIRED += mcpu=8540/msoft-float/mno-spe +MULTILIB_REQUIRED += mcpu=8540/mfloat-gprs=double +MULTILIB_REQUIRED += mcpu=860 +MULTILIB_REQUIRED += mcpu=e6500/m32 +MULTILIB_REQUIRED += mcpu=e6500/m32/msoft-float/mno-altivec diff --git a/gcc/config/powerpcspe/t-spe b/gcc/config/powerpcspe/t-spe new file mode 100644 index 000000000000..fe5de534be28 --- /dev/null +++ b/gcc/config/powerpcspe/t-spe @@ -0,0 +1,72 @@ +# Multilibs for e500 +# +# Copyright (C) 2003-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# What we really want are these variants: +# -mcpu=7400 +# -mcpu=7400 -maltivec -mabi=altivec +# -mcpu=7400 -msoft-float +# -msoft-float +# -mno-spe -mabi=no-spe +# -mno-spe -mabi=no-spe -mno-isel +# so we'll need to create exceptions later below. + +MULTILIB_OPTIONS = mcpu=7400 \ + maltivec \ + mabi=altivec \ + msoft-float \ + mno-spe \ + mabi=no-spe \ + mno-isel \ + mlittle + +MULTILIB_DIRNAMES = mpc7400 altivec abi-altivec \ + nof no-spe no-abi-spe no-isel le + +MULTILIB_EXCEPTIONS = maltivec mabi=altivec mno-spe mabi=no-spe mno-isel \ + maltivec/mabi=altivec \ + mcpu=7400/maltivec \ + mcpu=7400/mabi=altivec \ + *mcpu=7400/*mno-spe* \ + *mcpu=7400/*mabi=no-spe* \ + *mcpu=7400/*mno-isel* \ + *maltivec/*msoft-float* \ + *maltivec/*mno-spe* \ + *maltivec/*mabi=no-spe* \ + *maltivec/*mno-isel* \ + *mabi=altivec/*msoft-float* \ + *mabi=altivec/*mno-spe* \ + *mabi=altivec/*mabi=no-spe* \ + *mabi=altivec/*mno-isel* \ + *msoft-float/*mno-spe* \ + *msoft-float/*mabi=no-spe* \ + *msoft-float/*mno-isel* \ + mno-spe/mno-isel \ + mabi=no-spe/mno-isel \ + mno-isel/mlittle \ + mabi=no-spe/mno-isel/mlittle \ + mno-spe/mlittle \ + mabi=spe/mlittle \ + mcpu=7400/mabi=altivec/mlittle \ + mcpu=7400/maltivec/mlittle \ + mabi=no-spe/mlittle \ + mno-spe/mno-isel/mlittle \ + mabi=altivec/mlittle \ + maltivec/mlittle \ + maltivec/mabi=altivec/mlittle diff --git a/gcc/config/powerpcspe/t-vxworks b/gcc/config/powerpcspe/t-vxworks new file mode 100644 index 000000000000..fe2549f93e8e --- /dev/null +++ b/gcc/config/powerpcspe/t-vxworks @@ -0,0 +1,25 @@ +# Multilibs for VxWorks. +# +# Copyright (C) 2002-2017 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# The base multilib is -mhard-float. +MULTILIB_OPTIONS = mrtp fPIC msoft-float +MULTILIB_DIRNAMES = +MULTILIB_MATCHES = fPIC=fpic +MULTILIB_EXCEPTIONS = fPIC* diff --git a/gcc/config/powerpcspe/t-vxworksae b/gcc/config/powerpcspe/t-vxworksae new file mode 100644 index 000000000000..5f682627ebc6 --- /dev/null +++ b/gcc/config/powerpcspe/t-vxworksae @@ -0,0 +1,5 @@ +# Multilibs for VxWorks AE. + +MULTILIB_OPTIONS = mvthreads msoft-float +MULTILIB_MATCHES = +MULTILIB_EXCEPTIONS = diff --git a/gcc/config/powerpcspe/t-vxworksmils b/gcc/config/powerpcspe/t-vxworksmils new file mode 100644 index 000000000000..788069ed6dde --- /dev/null +++ b/gcc/config/powerpcspe/t-vxworksmils @@ -0,0 +1,10 @@ +# Multilibs for VxWorks MILS. + +MULTILIB_OPTIONS = msoft-float +MULTILIB_MATCHES = +MULTILIB_EXCEPTIONS = + +# Mils provides headers for the vthreads environment only, so we force +# that option on all the variants: + +TCFLAGS += -mvthreads diff --git a/gcc/config/powerpcspe/t-xilinx b/gcc/config/powerpcspe/t-xilinx new file mode 100644 index 000000000000..1bc31a82cb0e --- /dev/null +++ b/gcc/config/powerpcspe/t-xilinx @@ -0,0 +1,28 @@ +# Multilibs for Xilinx powerpc embedded ELF targets. +# +# Copyright (C) 2009-2017 Free Software Foundation, Inc. +# Contributed by Michael Eager, eager@eagercon.com +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Switch synonyms +MULTILIB_MATCHES = mfpu?sp_lite=msingle-float mfpu?dp_lite=mdouble-float mfpu?dp_lite=mhard-float mfpu?sp_lite=mfpu?sp_full mfpu?dp_lite=mfpu?dp_full + +MULTILIB_OPTIONS = mfpu=sp_lite/mfpu=dp_lite + +MULTILIB_DIRNAMES = single double + diff --git a/gcc/config/powerpcspe/titan.md b/gcc/config/powerpcspe/titan.md new file mode 100644 index 000000000000..5f725ee9fd1a --- /dev/null +++ b/gcc/config/powerpcspe/titan.md @@ -0,0 +1,168 @@ +;; Pipeline description for the AppliedMicro Titan core. +;; Copyright (C) 2010-2017 Free Software Foundation, Inc. +;; Contributed by Theobroma Systems Design und Consulting GmbH +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; AppliedMicro Titan core complex + +(define_automaton "titan_core,titan_fpu,titan_fxu,titan_bpu,titan_lsu") +(define_cpu_unit "titan_issue_0,titan_issue_1" "titan_core") + +;; Some useful abbreviations. +(define_reservation "titan_issue" "titan_issue_0|titan_issue_1") + +;; === FXU scheduling === + +(define_cpu_unit "titan_fxu_sh,titan_fxu_wb" "titan_fxu") + +;; The 1-cycle adder executes add, addi, subf, neg, compare and trap +;; instructions. It provides its own, dedicated result-bus, so we +;; don't need the titan_fxu_wb reservation to complete. +(define_insn_reservation "titan_fxu_adder" 1 + (and (ior (eq_attr "type" "cmp,trap") + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "yes"))) + (eq_attr "cpu" "titan")) + "titan_issue,titan_fxu_sh") + +(define_insn_reservation "titan_imul" 5 + (and (eq_attr "type" "mul") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fxu_sh,nothing*5,titan_fxu_wb") + +(define_insn_reservation "titan_mulhw" 4 + (and (eq_attr "type" "halfmul") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fxu_sh,nothing*4,titan_fxu_wb") + +(define_bypass 2 "titan_mulhw" "titan_mulhw") + +(define_insn_reservation "titan_fxu_shift_and_rotate" 2 + (and (eq_attr "type" "insert,shift,cntlz") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fxu_sh,nothing*2,titan_fxu_wb") + +;; We model the divider for the worst-case (i.e. a full 32-bit +;; divide). To model the bypass for byte-wise completion, a +;; define_bypass with a guard-function could be used... however, this +;; would be an optimization of doubtful value, as a large number of +;; divides will operate on 32-bit variables. + +;; To avoid an unmanagably large automata (generating the automata +;; would require well over 2GB in memory), we don't model the shared +;; result bus on this one. The divider-pipeline is thus modeled +;; through its latency and initial disptach bottlenecks (i.e. issue +;; slots and fxu scheduler availability) +(define_insn_reservation "titan_fxu_div" 34 + (and (eq_attr "type" "div") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fxu_sh") + +(define_insn_reservation "titan_fxu_alu" 1 + (and (ior (eq_attr "type" "integer,exts") + (and (eq_attr "type" "add,logical") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "titan")) + "titan_issue,titan_fxu_sh,nothing,titan_fxu_wb") + +;; === BPU scheduling === + +(define_cpu_unit "titan_bpu_sh" "titan_bpu") + +(define_insn_reservation "titan_bpu" 2 + (and (eq_attr "type" "branch,jmpreg,cr_logical,delayed_cr") + (eq_attr "cpu" "titan")) + "titan_issue,titan_bpu_sh") + +;; === LSU scheduling === + +(define_cpu_unit "titan_lsu_sh" "titan_lsu") + +;; Loads. +(define_insn_reservation "titan_lsu_load" 3 + (and (eq_attr "type" "load,load_l,sync") + (eq_attr "cpu" "titan")) + "titan_issue,titan_lsu_sh") + +(define_insn_reservation "titan_lsu_fpload" 12 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "titan")) + "titan_issue,titan_lsu_sh") + +;; Note that the isync is not clearly placed within any execution +;; unit. We've made the assumption that it will be running out of the +;; LSU, as msync is also executed within the LSU. +(define_insn_reservation "titan_lsu_sync" 20 + (and (eq_attr "type" "sync") + (eq_attr "cpu" "titan")) + "titan_issue,titan_lsu_sh*20") + +;; Stores. +(define_insn_reservation "titan_lsu_store" 12 + (and (eq_attr "type" "store,store_c") + (eq_attr "cpu" "titan")) + "titan_issue,titan_lsu_sh") + +(define_insn_reservation "titan_lsu_fpstore" 12 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "titan")) + "titan_issue,titan_lsu_sh") + +;; === FPU scheduling === + +;; In order to keep the automaton for the Titan FPU efficient and +;; maintainable, we've kept in as concise as possible and created a +;; mapping for the main "choke points" only instead of modelling the +;; overall flow of instructions through the FP-pipeline(s). + +;; The key elements modelled are: +;; * each FP-instruction takes up one of the two issue slots +;; * the FPU runs at half the core frequency +;; * divides are not pipelined (but execute in a separate unit) +;; * the FPU has a shared result bus for all its units + +(define_cpu_unit "titan_fp0,titan_fpdiv,titan_fpwb" "titan_fpu") + +(define_insn_reservation "titan_fp_div_double" 72 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fpdiv*72,titan_fpwb") + +(define_insn_reservation "titan_fp_div_single" 46 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fpdiv*46,titan_fpwb") + +(define_insn_reservation "titan_fp_single" 12 + (and (eq_attr "fp_type" "fp_addsub_s,fp_mul_s,fp_maddsub_s") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fp0*2,nothing*10,titan_fpwb") + +;; Make sure the "titan_fp" rule stays last, as it's a catch all for +;; double-precision and unclassified (e.g. fsel) FP-instructions +(define_insn_reservation "titan_fp" 10 + (and (eq_attr "type" "fpcompare,fp,fpsimple,dmul") + (eq_attr "cpu" "titan")) + "titan_issue,titan_fp0*2,nothing*8,titan_fpwb") + +;; Please note, that the non-pipelined FP-instructions "mcrfs", +;; "mtfsb0[.]", "mtfsb1[.]", "mtfsf[.]", "mtfsfi[.]" are not +;; accessible from regular language constructs (i.e. they are not used +;; by the code generator, except for special purpose sequences defined +;; in rs6000.md), no special provisions are made for these. + diff --git a/gcc/config/powerpcspe/vec_types.h b/gcc/config/powerpcspe/vec_types.h new file mode 100644 index 000000000000..abf38ad309f4 --- /dev/null +++ b/gcc/config/powerpcspe/vec_types.h @@ -0,0 +1,52 @@ +/* Cell single token vector types + Copyright (C) 2007-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Single token vector data types for the PowerPC SIMD/Vector Multi-media + eXtension */ + +#ifndef _VEC_TYPES_H_ +#define _VEC_TYPES_H_ 1 + +#define qword __vector unsigned char + +#define vec_uchar16 __vector unsigned char +#define vec_char16 __vector signed char +#define vec_bchar16 __vector bool char + +#define vec_ushort8 __vector unsigned short +#define vec_short8 __vector signed short +#define vec_bshort8 __vector bool short + +#define vec_pixel8 __vector pixel + +#define vec_uint4 __vector unsigned int +#define vec_int4 __vector signed int +#define vec_bint4 __vector bool int + +#define vec_float4 __vector float + +#define vec_ullong2 __vector bool char +#define vec_llong2 __vector bool short + +#define vec_double2 __vector bool int + +#endif /* _VEC_TYPES_H_ */ diff --git a/gcc/config/powerpcspe/vector.md b/gcc/config/powerpcspe/vector.md new file mode 100644 index 000000000000..e6489a861cdd --- /dev/null +++ b/gcc/config/powerpcspe/vector.md @@ -0,0 +1,1407 @@ +;; Expander definitions for vector support between altivec & vsx. No +;; instructions are in this file, this file provides the generic vector +;; expander, and the actual vector instructions will be in altivec.md and +;; vsx.md + +;; Copyright (C) 2009-2017 Free Software Foundation, Inc. +;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +;; Vector int modes +(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) + +;; Vector int modes for parity +(define_mode_iterator VEC_IP [V8HI + V4SI + V2DI + V1TI + (TI "TARGET_VSX_TIMODE")]) + +;; Vector float modes +(define_mode_iterator VEC_F [V4SF V2DF]) + +;; Vector arithmetic modes +(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Vector modes that need alginment via permutes +(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) + +;; Vector logical modes +(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI KF TF]) + +;; Vector modes for moves. Don't do TImode or TFmode here, since their +;; moves are handled elsewhere. +(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI KF]) + +;; Vector modes for types that don't need a realignment under VSX +(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI KF TF]) + +;; Vector comparison modes +(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Vector init/extract modes +(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Vector modes for 64-bit base types +(define_mode_iterator VEC_64 [V2DI V2DF]) + +;; Vector integer modes +(define_mode_iterator VI [V4SI V8HI V16QI]) + +;; Base type from vector mode +(define_mode_attr VEC_base [(V16QI "QI") + (V8HI "HI") + (V4SI "SI") + (V2DI "DI") + (V4SF "SF") + (V2DF "DF") + (V1TI "TI") + (TI "TI")]) + +;; Same size integer type for floating point data +(define_mode_attr VEC_int [(V4SF "v4si") + (V2DF "v2di")]) + +(define_mode_attr VEC_INT [(V4SF "V4SI") + (V2DF "V2DI")]) + +;; constants for unspec +(define_c_enum "unspec" [UNSPEC_PREDICATE + UNSPEC_REDUC + UNSPEC_NEZ_P]) + +;; Vector reduction code iterators +(define_code_iterator VEC_reduc [plus smin smax]) + +(define_code_attr VEC_reduc_name [(plus "plus") + (smin "smin") + (smax "smax")]) + +(define_code_attr VEC_reduc_rtx [(plus "add") + (smin "smin") + (smax "smax")]) + + +;; Vector move instructions. Little-endian VSX loads and stores require +;; special handling to circumvent "element endianness." +(define_expand "mov<mode>" + [(set (match_operand:VEC_M 0 "nonimmediate_operand" "") + (match_operand:VEC_M 1 "any_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + if (can_create_pseudo_p ()) + { + if (CONSTANT_P (operands[1])) + { + if (FLOAT128_VECTOR_P (<MODE>mode)) + { + if (!easy_fp_constant (operands[1], <MODE>mode)) + operands[1] = force_const_mem (<MODE>mode, operands[1]); + } + else if (!easy_vector_constant (operands[1], <MODE>mode)) + operands[1] = force_const_mem (<MODE>mode, operands[1]); + } + + if (!vlogical_operand (operands[0], <MODE>mode) + && !vlogical_operand (operands[1], <MODE>mode)) + operands[1] = force_reg (<MODE>mode, operands[1]); + } + if (!BYTES_BIG_ENDIAN + && VECTOR_MEM_VSX_P (<MODE>mode) + && !TARGET_P9_VECTOR + && !gpr_or_gpr_p (operands[0], operands[1]) + && (memory_operand (operands[0], <MODE>mode) + ^ memory_operand (operands[1], <MODE>mode))) + { + rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); + DONE; + } +}) + +;; Generic vector floating point load/store instructions. These will match +;; insns defined in vsx.md or altivec.md depending on the switches. +(define_expand "vector_load_<mode>" + [(set (match_operand:VEC_M 0 "vfloat_operand" "") + (match_operand:VEC_M 1 "memory_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_store_<mode>" + [(set (match_operand:VEC_M 0 "memory_operand" "") + (match_operand:VEC_M 1 "vfloat_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +;; Splits if a GPR register was chosen for the move +(define_split + [(set (match_operand:VEC_L 0 "nonimmediate_operand" "") + (match_operand:VEC_L 1 "input_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) + && reload_completed + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" + [(pc)] +{ + rs6000_split_multireg_move (operands[0], operands[1]); + DONE; +}) + +;; Vector floating point load/store instructions that uses the Altivec +;; instructions even if we are compiling for VSX, since the Altivec +;; instructions silently ignore the bottom 3 bits of the address, and VSX does +;; not. +(define_expand "vector_altivec_load_<mode>" + [(set (match_operand:VEC_M 0 "vfloat_operand" "") + (match_operand:VEC_M 1 "memory_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)); + + if (VECTOR_MEM_VSX_P (<MODE>mode)) + { + operands[1] = rs6000_address_for_altivec (operands[1]); + rtx and_op = XEXP (operands[1], 0); + gcc_assert (GET_CODE (and_op) == AND); + rtx addr = XEXP (and_op, 0); + if (GET_CODE (addr) == PLUS) + emit_insn (gen_altivec_lvx_<mode>_2op (operands[0], XEXP (addr, 0), + XEXP (addr, 1))); + else + emit_insn (gen_altivec_lvx_<mode>_1op (operands[0], operands[1])); + DONE; + } +}") + +(define_expand "vector_altivec_store_<mode>" + [(set (match_operand:VEC_M 0 "memory_operand" "") + (match_operand:VEC_M 1 "vfloat_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)); + + if (VECTOR_MEM_VSX_P (<MODE>mode)) + { + operands[0] = rs6000_address_for_altivec (operands[0]); + rtx and_op = XEXP (operands[0], 0); + gcc_assert (GET_CODE (and_op) == AND); + rtx addr = XEXP (and_op, 0); + if (GET_CODE (addr) == PLUS) + emit_insn (gen_altivec_stvx_<mode>_2op (operands[1], XEXP (addr, 0), + XEXP (addr, 1))); + else + emit_insn (gen_altivec_stvx_<mode>_1op (operands[1], operands[0])); + DONE; + } +}") + + + +;; Generic floating point vector arithmetic support +(define_expand "add<mode>3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (plus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "sub<mode>3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "mul<mode>3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_expand "div<mode>3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (div:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" +{ + if (RS6000_RECIP_AUTO_RE_P (<MODE>mode) + && can_create_pseudo_p () && flag_finite_math_only + && !flag_trapping_math && flag_reciprocal_math) + { + rs6000_emit_swdiv (operands[0], operands[1], operands[2], true); + DONE; + } +}) + +(define_expand "neg<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1])); + DONE; + } +}") + +(define_expand "abs<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1])); + DONE; + } +}") + +(define_expand "smin<mode>3" + [(set (match_operand:VEC_F 0 "register_operand" "") + (smin:VEC_F (match_operand:VEC_F 1 "register_operand" "") + (match_operand:VEC_F 2 "register_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "smax<mode>3" + [(set (match_operand:VEC_F 0 "register_operand" "") + (smax:VEC_F (match_operand:VEC_F 1 "register_operand" "") + (match_operand:VEC_F 2 "register_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + + +(define_expand "sqrt<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (sqrt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" +{ + if (<MODE>mode == V4SFmode + && !optimize_function_for_size_p (cfun) + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + rs6000_emit_swsqrt (operands[0], operands[1], 0); + DONE; + } +}) + +(define_expand "rsqrte<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")] + UNSPEC_RSQRT))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "re<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "f")] + UNSPEC_FRES))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "ftrunc<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_ceil<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")] + UNSPEC_FRIP))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_floor<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")] + UNSPEC_FRIM))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_btrunc<mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_copysign<mode>3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")] UNSPEC_COPYSIGN))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_copysign_v4sf3 (operands[0], operands[1], + operands[2])); + DONE; + } +}") + + +;; Vector comparisons +(define_expand "vcond<mode><mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (if_then_else:VEC_F + (match_operator 3 "comparison_operator" + [(match_operand:VEC_F 4 "vfloat_operand" "") + (match_operand:VEC_F 5 "vfloat_operand" "")]) + (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcond<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (if_then_else:VEC_I + (match_operator 3 "comparison_operator" + [(match_operand:VEC_I 4 "vint_operand") + (match_operand:VEC_I 5 "vint_operand")]) + (match_operand:VEC_I 1 "vector_int_reg_or_same_bit") + (match_operand:VEC_I 2 "vector_int_reg_or_same_bit")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcondv4sfv4si" + [(set (match_operand:V4SF 0 "vfloat_operand" "") + (if_then_else:V4SF + (match_operator 3 "comparison_operator" + [(match_operand:V4SI 4 "vint_operand" "") + (match_operand:V4SI 5 "vint_operand" "")]) + (match_operand:V4SF 1 "vfloat_operand" "") + (match_operand:V4SF 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && VECTOR_UNIT_ALTIVEC_P (V4SImode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcondv4siv4sf" + [(set (match_operand:V4SI 0 "vint_operand" "") + (if_then_else:V4SI + (match_operator 3 "comparison_operator" + [(match_operand:V4SF 4 "vfloat_operand" "") + (match_operand:V4SF 5 "vfloat_operand" "")]) + (match_operand:V4SI 1 "vint_operand" "") + (match_operand:V4SI 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && VECTOR_UNIT_ALTIVEC_P (V4SImode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcondu<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (if_then_else:VEC_I + (match_operator 3 "comparison_operator" + [(match_operand:VEC_I 4 "vint_operand") + (match_operand:VEC_I 5 "vint_operand")]) + (match_operand:VEC_I 1 "vector_int_reg_or_same_bit") + (match_operand:VEC_I 2 "vector_int_reg_or_same_bit")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vconduv4sfv4si" + [(set (match_operand:V4SF 0 "vfloat_operand" "") + (if_then_else:V4SF + (match_operator 3 "comparison_operator" + [(match_operand:V4SI 4 "vint_operand" "") + (match_operand:V4SI 5 "vint_operand" "")]) + (match_operand:V4SF 1 "vfloat_operand" "") + (match_operand:V4SF 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && VECTOR_UNIT_ALTIVEC_P (V4SImode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vector_eq<mode>" + [(set (match_operand:VEC_C 0 "vlogical_operand" "") + (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") + (match_operand:VEC_C 2 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_gt<mode>" + [(set (match_operand:VEC_C 0 "vlogical_operand" "") + (gt:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") + (match_operand:VEC_C 2 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_ge<mode>" + [(set (match_operand:VEC_F 0 "vlogical_operand" "") + (ge:VEC_F (match_operand:VEC_F 1 "vlogical_operand" "") + (match_operand:VEC_F 2 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +; >= for integer vectors: swap operands and apply not-greater-than +(define_expand "vector_nlt<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "") + (match_operand:VEC_I 1 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + +(define_expand "vector_gtu<mode>" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +; >= for integer vectors: swap operands and apply not-greater-than +(define_expand "vector_nltu<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "") + (match_operand:VEC_I 1 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + +(define_expand "vector_geu<mode>" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +; <= for integer vectors: apply not-greater-than +(define_expand "vector_ngt<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "") + (match_operand:VEC_I 2 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + +(define_expand "vector_ngtu<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "") + (match_operand:VEC_I 2 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + +(define_insn_and_split "*vector_uneq<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (gt:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (gt:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (not:VEC_F (ior:VEC_F (match_dup 3) + (match_dup 4))))] + " +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}") + +(define_insn_and_split "*vector_ltgt<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (gt:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (gt:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (ior:VEC_F (match_dup 3) + (match_dup 4)))] + " +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}") + +(define_insn_and_split "*vector_ordered<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (ge:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (ge:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (ior:VEC_F (match_dup 3) + (match_dup 4)))] + " +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}") + +(define_insn_and_split "*vector_unordered<mode>" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "#" + "" + [(set (match_dup 3) + (ge:VEC_F (match_dup 1) + (match_dup 2))) + (set (match_dup 4) + (ge:VEC_F (match_dup 2) + (match_dup 1))) + (set (match_dup 0) + (and:VEC_F (not:VEC_F (match_dup 3)) + (not:VEC_F (match_dup 4))))] + " +{ + operands[3] = gen_reg_rtx (<MODE>mode); + operands[4] = gen_reg_rtx (<MODE>mode); +}") + +;; Note the arguments for __builtin_altivec_vsel are op2, op1, mask +;; which is in the reverse order that we want +(define_expand "vector_select_<mode>" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (if_then_else:VEC_L + (ne:CC (match_operand:VEC_L 3 "vlogical_operand" "") + (match_dup 4)) + (match_operand:VEC_L 2 "vlogical_operand" "") + (match_operand:VEC_L 1 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "operands[4] = CONST0_RTX (<MODE>mode);") + +(define_expand "vector_select_<mode>_uns" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (if_then_else:VEC_L + (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand" "") + (match_dup 4)) + (match_operand:VEC_L 2 "vlogical_operand" "") + (match_operand:VEC_L 1 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "operands[4] = CONST0_RTX (<MODE>mode);") + +;; Expansions that compare vectors producing a vector result and a predicate, +;; setting CR6 to indicate a combined status +(define_expand "vector_eq_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:VEC_A 1 "vlogical_operand" "") + (match_operand:VEC_A 2 "vlogical_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_A 0 "vlogical_operand" "") + (eq:VEC_A (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +;; This expansion handles the V16QI, V8HI, and V4SI modes in the +;; implementation of the vec_all_ne built-in functions on Power9. +(define_expand "vector_ne_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(ne:CC (match_operand:VI 1 "vlogical_operand") + (match_operand:VI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (ne:VI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +;; This expansion handles the V16QI, V8HI, and V4SI modes in the +;; implementation of the vec_any_eq built-in functions on Power9. +(define_expand "vector_ae_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(ne:CC (match_operand:VI 1 "vlogical_operand") + (match_operand:VI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (ne:VI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +;; This expansion handles the V16QI, V8HI, and V4SI modes in the +;; implementation of the vec_all_nez and vec_any_eqz built-in +;; functions on Power9. +(define_expand "vector_nez_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(unspec:VI + [(match_operand:VI 1 "vlogical_operand") + (match_operand:VI 2 "vlogical_operand")] + UNSPEC_NEZ_P)] + UNSPEC_PREDICATE)) + (set (match_operand:VI 0 "vlogical_operand") + (unspec:VI [(match_dup 1) + (match_dup 2)] + UNSPEC_NEZ_P))])] + "TARGET_P9_VECTOR" + "") + +;; This expansion handles the V2DI mode in the implementation of the +;; vec_all_ne built-in function on Power9. +;; +;; Since the Power9 "xvcmpne<mode>." instruction does not support DImode, +;; this expands into the same rtl that would be used for the Power8 +;; architecture. +(define_expand "vector_ne_v2di_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V2DI 1 "vlogical_operand") + (match_operand:V2DI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V2DI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (V2DImode); +}) + +;; This expansion handles the V2DI mode in the implementation of the +;; vec_any_eq built-in function on Power9. +;; +;; Since the Power9 "xvcmpne<mode>." instruction does not support DImode, +;; this expands into the same rtl that would be used for the Power8 +;; architecture. +(define_expand "vector_ae_v2di_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V2DI 1 "vlogical_operand") + (match_operand:V2DI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V2DI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (V2DImode); +}) + +;; This expansion handles the V4SF and V2DF modes in the Power9 +;; implementation of the vec_all_ne built-in functions. Note that the +;; expansions for this pattern with these modes makes no use of power9- +;; specific instructions since there are no new power9 instructions +;; for vector compare not equal with floating point arguments. +(define_expand "vector_ne_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:VEC_F 1 "vlogical_operand") + (match_operand:VEC_F 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:VEC_F (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +;; This expansion handles the V4SF and V2DF modes in the Power9 +;; implementation of the vec_any_eq built-in functions. Note that the +;; expansions for this pattern with these modes makes no use of power9- +;; specific instructions since there are no new power9 instructions +;; for vector compare not equal with floating point arguments. +(define_expand "vector_ae_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:VEC_F 1 "vlogical_operand") + (match_operand:VEC_F 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:VEC_F (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +(define_expand "vector_gt_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:VEC_A 1 "vlogical_operand" "") + (match_operand:VEC_A 2 "vlogical_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_A 0 "vlogical_operand" "") + (gt:VEC_A (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_ge_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(ge:CC (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_F 0 "vfloat_operand" "") + (ge:VEC_F (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +(define_expand "vector_gtu_<mode>_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (gtu:VEC_I (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +;; AltiVec/VSX predicates. + +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals zero (aka __CR6_EQ in altivec.h). +(define_expand "cr6_test_for_zero" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_ALTIVEC || TARGET_VSX" + "") + +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals one (aka __CR6_EQ_REV in altivec.h). +(define_expand "cr6_test_for_zero_reverse" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_ALTIVEC || TARGET_VSX" + "") + +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals two (aka __CR6_LT in altivec.h). +(define_expand "cr6_test_for_lt" + [(set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_ALTIVEC || TARGET_VSX" + "") + +;; This expansion is triggered during expansion of predicate built-in +;; functions (built-ins defined with the RS6000_BUILTIN_P macro) by the +;; altivec_expand_predicate_builtin() function when the value of the +;; integer constant first argument equals three +;; (aka __CR6_LT_REV in altivec.h). +(define_expand "cr6_test_for_lt_reverse" + [(set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_ALTIVEC || TARGET_VSX" + "") + + +;; Vector count leading zeros +(define_expand "clz<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") + +;; Vector count trailing zeros +(define_expand "ctz<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (ctz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P9_VECTOR") + +;; Vector population count +(define_expand "popcount<mode>2" + [(set (match_operand:VEC_I 0 "register_operand" "") + (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))] + "TARGET_P8_VECTOR") + +;; Vector parity +(define_expand "parity<mode>2" + [(set (match_operand:VEC_IP 0 "register_operand" "") + (parity:VEC_IP (match_operand:VEC_IP 1 "register_operand" "")))] + "TARGET_P9_VECTOR") + + +;; Same size conversions +(define_expand "float<VEC_int><mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx)); + DONE; + } +}") + +(define_expand "floatuns<VEC_int><mode>2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unsigned_float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx)); + DONE; + } +}") + +(define_expand "fix_trunc<mode><VEC_int>2" + [(set (match_operand:<VEC_INT> 0 "vint_operand" "") + (fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx)); + DONE; + } +}") + +(define_expand "fixuns_trunc<mode><VEC_int>2" + [(set (match_operand:<VEC_INT> 0 "vint_operand" "") + (unsigned_fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode)) + { + emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx)); + DONE; + } +}") + + +;; Vector initialization, set, extract +(define_expand "vec_init<mode>" + [(match_operand:VEC_E 0 "vlogical_operand" "") + (match_operand:VEC_E 1 "" "")] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rs6000_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_set<mode>" + [(match_operand:VEC_E 0 "vlogical_operand" "") + (match_operand:<VEC_base> 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extract<mode>" + [(match_operand:<VEC_base> 0 "register_operand" "") + (match_operand:VEC_E 1 "vlogical_operand" "") + (match_operand 2 "const_int_operand" "")] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rs6000_expand_vector_extract (operands[0], operands[1], operands[2]); + DONE; +}) + +;; Convert double word types to single word types +(define_expand "vec_pack_trunc_v2df" + [(match_operand:V4SF 0 "vfloat_operand" "") + (match_operand:V2DF 1 "vfloat_operand" "") + (match_operand:V2DF 2 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC" +{ + rtx r1 = gen_reg_rtx (V4SFmode); + rtx r2 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_vsx_xvcvdpsp (r1, operands[1])); + emit_insn (gen_vsx_xvcvdpsp (r2, operands[2])); + rs6000_expand_extract_even (operands[0], r1, r2); + DONE; +}) + +(define_expand "vec_pack_sfix_trunc_v2df" + [(match_operand:V4SI 0 "vint_operand" "") + (match_operand:V2DF 1 "vfloat_operand" "") + (match_operand:V2DF 2 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC" +{ + rtx r1 = gen_reg_rtx (V4SImode); + rtx r2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1])); + emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2])); + rs6000_expand_extract_even (operands[0], r1, r2); + DONE; +}) + +(define_expand "vec_pack_ufix_trunc_v2df" + [(match_operand:V4SI 0 "vint_operand" "") + (match_operand:V2DF 1 "vfloat_operand" "") + (match_operand:V2DF 2 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC" +{ + rtx r1 = gen_reg_rtx (V4SImode); + rtx r2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1])); + emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2])); + rs6000_expand_extract_even (operands[0], r1, r2); + DONE; +}) + +;; Convert single word types to double word +(define_expand "vec_unpacks_hi_v4sf" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SF 1 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" +{ + rtx reg = gen_reg_rtx (V4SFmode); + + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacks_lo_v4sf" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SF 1 "vfloat_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" +{ + rtx reg = gen_reg_rtx (V4SFmode); + + rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacks_float_hi_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacks_float_lo_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacku_float_hi_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpacku_float_lo_v4si" + [(match_operand:V2DF 0 "vfloat_operand" "") + (match_operand:V4SI 1 "vint_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)" +{ + rtx reg = gen_reg_rtx (V4SImode); + + rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); + DONE; +}) + + +;; Align vector loads with a permute. +(define_expand "vec_realign_load_<mode>" + [(match_operand:VEC_K 0 "vlogical_operand" "") + (match_operand:VEC_K 1 "vlogical_operand" "") + (match_operand:VEC_K 2 "vlogical_operand" "") + (match_operand:V16QI 3 "vlogical_operand" "")] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], + operands[2], operands[3])); + else + { + /* We have changed lvsr to lvsl, so to complete the transformation + of vperm for LE, we must swap the inputs. */ + rtx unspec = gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (3, operands[2], + operands[1], operands[3]), + UNSPEC_VPERM); + emit_move_insn (operands[0], unspec); + } + DONE; +}) + +;; Under VSX, vectors of 4/8 byte alignments do not need to be aligned +;; since the load already handles it. +(define_expand "movmisalign<mode>" + [(set (match_operand:VEC_N 0 "nonimmediate_operand" "") + (match_operand:VEC_N 1 "any_operand" ""))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_ALLOW_MOVMISALIGN" + "") + +;; Vector shift right in bits. Currently supported ony for shift +;; amounts that can be expressed as byte shifts (divisible by 8). +;; General shift amounts can be supported using vsro + vsr. We're +;; not expecting to see these yet (the vectorizer currently +;; generates only shifts by a whole number of vector elements). +;; Note that the vec_shr operation is actually defined as +;; 'shift toward element 0' so is a shr for LE and shl for BE. +(define_expand "vec_shr_<mode>" + [(match_operand:VEC_L 0 "vlogical_operand" "") + (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:QI 2 "reg_or_short_operand" "")] + "TARGET_ALTIVEC" + " +{ + rtx bitshift = operands[2]; + rtx shift; + rtx insn; + rtx zero_reg, op1, op2; + HOST_WIDE_INT bitshift_val; + HOST_WIDE_INT byteshift_val; + + if (! CONSTANT_P (bitshift)) + FAIL; + bitshift_val = INTVAL (bitshift); + if (bitshift_val & 0x7) + FAIL; + byteshift_val = (bitshift_val >> 3); + zero_reg = gen_reg_rtx (<MODE>mode); + emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode)); + if (!BYTES_BIG_ENDIAN) + { + byteshift_val = 16 - byteshift_val; + op1 = zero_reg; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = zero_reg; + } + + if (TARGET_VSX && (byteshift_val & 0x3) == 0) + { + shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); + insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift); + } + else + { + shift = gen_rtx_CONST_INT (QImode, byteshift_val); + insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift); + } + + emit_insn (insn); + DONE; +}") + +;; Expanders for rotate each element in a vector +(define_expand "vrotl<mode>3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +;; Expanders for arithmetic shift left on each vector element +(define_expand "vashl<mode>3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +;; Expanders for logical shift right on each vector element +(define_expand "vlshr<mode>3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +;; Expanders for arithmetic shift right on each vector element +(define_expand "vashr<mode>3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + "") + +;; Vector reduction expanders for VSX +; The (VEC_reduc:... +; (op1) +; (unspec:... [(const_int 0)] UNSPEC_REDUC)) +; +; is to allow us to use a code iterator, but not completely list all of the +; vector rotates, etc. to prevent canonicalization + + +(define_expand "reduc_<VEC_reduc:VEC_reduc_name>_scal_<VEC_F:mode>" + [(match_operand:<VEC_base> 0 "register_operand" "") + (VEC_reduc:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (unspec:VEC_F [(const_int 0)] UNSPEC_REDUC))] + "VECTOR_UNIT_VSX_P (<VEC_F:MODE>mode)" + { + rtx vec = gen_reg_rtx (<VEC_F:MODE>mode); + rtx elt = BYTES_BIG_ENDIAN + ? gen_int_mode (GET_MODE_NUNITS (<VEC_F:MODE>mode) - 1, QImode) + : const0_rtx; + emit_insn (gen_vsx_reduc_<VEC_reduc:VEC_reduc_name>_<VEC_F:mode> (vec, + operand1)); + emit_insn (gen_vsx_extract_<VEC_F:mode> (operand0, vec, elt)); + DONE; + }) + + +;;; Expanders for vector insn patterns shared between the SPE and TARGET_PAIRED systems. + +(define_expand "absv2sf2" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "") + (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))] + "TARGET_PAIRED_FLOAT || TARGET_SPE" + "") + +(define_expand "negv2sf2" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "") + (neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))] + "TARGET_PAIRED_FLOAT || TARGET_SPE" + "") + +(define_expand "addv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "") + (plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") + (match_operand:V2SF 2 "gpc_reg_operand" "")))] + "TARGET_PAIRED_FLOAT || TARGET_SPE" + " +{ + if (TARGET_SPE) + { + /* We need to make a note that we clobber SPEFSCR. */ + rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + + XVECEXP (par, 0, 0) = gen_rtx_SET (operands[0], + gen_rtx_PLUS (V2SFmode, operands[1], operands[2])); + XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); + emit_insn (par); + DONE; + } +}") + +(define_expand "subv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "") + (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") + (match_operand:V2SF 2 "gpc_reg_operand" "")))] + "TARGET_PAIRED_FLOAT || TARGET_SPE" + " +{ + if (TARGET_SPE) + { + /* We need to make a note that we clobber SPEFSCR. */ + rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + + XVECEXP (par, 0, 0) = gen_rtx_SET (operands[0], + gen_rtx_MINUS (V2SFmode, operands[1], operands[2])); + XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); + emit_insn (par); + DONE; + } +}") + +(define_expand "mulv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "") + (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") + (match_operand:V2SF 2 "gpc_reg_operand" "")))] + "TARGET_PAIRED_FLOAT || TARGET_SPE" + " +{ + if (TARGET_SPE) + { + /* We need to make a note that we clobber SPEFSCR. */ + rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + + XVECEXP (par, 0, 0) = gen_rtx_SET (operands[0], + gen_rtx_MULT (V2SFmode, operands[1], operands[2])); + XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); + emit_insn (par); + DONE; + } +}") + +(define_expand "divv2sf3" + [(set (match_operand:V2SF 0 "gpc_reg_operand" "") + (div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "") + (match_operand:V2SF 2 "gpc_reg_operand" "")))] + "TARGET_PAIRED_FLOAT || TARGET_SPE" + " +{ + if (TARGET_SPE) + { + /* We need to make a note that we clobber SPEFSCR. */ + rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + + XVECEXP (par, 0, 0) = gen_rtx_SET (operands[0], + gen_rtx_DIV (V2SFmode, operands[1], operands[2])); + XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)); + emit_insn (par); + DONE; + } +}") diff --git a/gcc/config/powerpcspe/vsx.md b/gcc/config/powerpcspe/vsx.md new file mode 100644 index 000000000000..b669764ce8f3 --- /dev/null +++ b/gcc/config/powerpcspe/vsx.md @@ -0,0 +1,4128 @@ +;; VSX patterns. +;; Copyright (C) 2009-2017 Free Software Foundation, Inc. +;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Iterator for comparison types +(define_code_iterator CMP_TEST [eq lt gt unordered]) + +;; Iterator for both scalar and vector floating point types supported by VSX +(define_mode_iterator VSX_B [DF V4SF V2DF]) + +;; Iterator for the 2 64-bit vector types +(define_mode_iterator VSX_D [V2DF V2DI]) + +;; Mode iterator to handle swapping words on little endian for the 128-bit +;; types that goes in a single vector register. +(define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)") + (TI "TARGET_VSX_TIMODE") + V1TI]) + +;; Iterator for the 2 32-bit vector types +(define_mode_iterator VSX_W [V4SF V4SI]) + +;; Iterator for the DF types +(define_mode_iterator VSX_DF [V2DF DF]) + +;; Iterator for vector floating point types supported by VSX +(define_mode_iterator VSX_F [V4SF V2DF]) + +;; Iterator for logical types supported by VSX +(define_mode_iterator VSX_L [V16QI + V8HI + V4SI + V2DI + V4SF + V2DF + V1TI + TI + (KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) + +;; Iterator for memory moves. +(define_mode_iterator VSX_M [V16QI + V8HI + V4SI + V2DI + V4SF + V2DF + V1TI + (KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)") + (TI "TARGET_VSX_TIMODE")]) + +;; Map into the appropriate load/store name based on the type +(define_mode_attr VSm [(V16QI "vw4") + (V8HI "vw4") + (V4SI "vw4") + (V4SF "vw4") + (V2DF "vd2") + (V2DI "vd2") + (DF "d") + (TF "vd2") + (KF "vd2") + (V1TI "vd2") + (TI "vd2")]) + +;; Map into the appropriate suffix based on the type +(define_mode_attr VSs [(V16QI "sp") + (V8HI "sp") + (V4SI "sp") + (V4SF "sp") + (V2DF "dp") + (V2DI "dp") + (DF "dp") + (SF "sp") + (TF "dp") + (KF "dp") + (V1TI "dp") + (TI "dp")]) + +;; Map the register class used +(define_mode_attr VSr [(V16QI "v") + (V8HI "v") + (V4SI "v") + (V4SF "wf") + (V2DI "wd") + (V2DF "wd") + (DI "wi") + (DF "ws") + (SF "ww") + (TF "wp") + (KF "wq") + (V1TI "v") + (TI "wt")]) + +;; Map the register class used for float<->int conversions (floating point side) +;; VSr2 is the preferred register class, VSr3 is any register class that will +;; hold the data +(define_mode_attr VSr2 [(V2DF "wd") + (V4SF "wf") + (DF "ws") + (SF "ww") + (DI "wi") + (KF "wq") + (TF "wp")]) + +(define_mode_attr VSr3 [(V2DF "wa") + (V4SF "wa") + (DF "ws") + (SF "ww") + (DI "wi") + (KF "wq") + (TF "wp")]) + +;; Map the register class for sp<->dp float conversions, destination +(define_mode_attr VSr4 [(SF "ws") + (DF "f") + (V2DF "wd") + (V4SF "v")]) + +;; Map the register class for sp<->dp float conversions, source +(define_mode_attr VSr5 [(SF "ws") + (DF "f") + (V2DF "v") + (V4SF "wd")]) + +;; The VSX register class that a type can occupy, even if it is not the +;; preferred register class (VSr is the preferred register class that will get +;; allocated first). +(define_mode_attr VSa [(V16QI "wa") + (V8HI "wa") + (V4SI "wa") + (V4SF "wa") + (V2DI "wa") + (V2DF "wa") + (DI "wi") + (DF "ws") + (SF "ww") + (V1TI "wa") + (TI "wt") + (TF "wp") + (KF "wq")]) + +;; Same size integer type for floating point data +(define_mode_attr VSi [(V4SF "v4si") + (V2DF "v2di") + (DF "di")]) + +(define_mode_attr VSI [(V4SF "V4SI") + (V2DF "V2DI") + (DF "DI")]) + +;; Word size for same size conversion +(define_mode_attr VSc [(V4SF "w") + (V2DF "d") + (DF "d")]) + +;; Map into either s or v, depending on whether this is a scalar or vector +;; operation +(define_mode_attr VSv [(V16QI "v") + (V8HI "v") + (V4SI "v") + (V4SF "v") + (V2DI "v") + (V2DF "v") + (V1TI "v") + (DF "s") + (KF "v")]) + +;; Appropriate type for add ops (and other simple FP ops) +(define_mode_attr VStype_simple [(V2DF "vecdouble") + (V4SF "vecfloat") + (DF "fp")]) + +(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d") + (V4SF "fp_addsub_s") + (DF "fp_addsub_d")]) + +;; Appropriate type for multiply ops +(define_mode_attr VStype_mul [(V2DF "vecdouble") + (V4SF "vecfloat") + (DF "dmul")]) + +(define_mode_attr VSfptype_mul [(V2DF "fp_mul_d") + (V4SF "fp_mul_s") + (DF "fp_mul_d")]) + +;; Appropriate type for divide ops. +(define_mode_attr VStype_div [(V2DF "vecdiv") + (V4SF "vecfdiv") + (DF "ddiv")]) + +(define_mode_attr VSfptype_div [(V2DF "fp_div_d") + (V4SF "fp_div_s") + (DF "fp_div_d")]) + +;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with +;; the scalar sqrt +(define_mode_attr VStype_sqrt [(V2DF "dsqrt") + (V4SF "ssqrt") + (DF "dsqrt")]) + +(define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d") + (V4SF "fp_sqrt_s") + (DF "fp_sqrt_d")]) + +;; Iterator and modes for sp<->dp conversions +;; Because scalar SF values are represented internally as double, use the +;; V4SF type to represent this than SF. +(define_mode_iterator VSX_SPDP [DF V4SF V2DF]) + +(define_mode_attr VS_spdp_res [(DF "V4SF") + (V4SF "V2DF") + (V2DF "V4SF")]) + +(define_mode_attr VS_spdp_insn [(DF "xscvdpsp") + (V4SF "xvcvspdp") + (V2DF "xvcvdpsp")]) + +(define_mode_attr VS_spdp_type [(DF "fp") + (V4SF "vecdouble") + (V2DF "vecdouble")]) + +;; Map the scalar mode for a vector type +(define_mode_attr VS_scalar [(V1TI "TI") + (V2DF "DF") + (V2DI "DI") + (V4SF "SF") + (V4SI "SI") + (V8HI "HI") + (V16QI "QI")]) + +;; Map to a double-sized vector mode +(define_mode_attr VS_double [(V4SI "V8SI") + (V4SF "V8SF") + (V2DI "V4DI") + (V2DF "V4DF") + (V1TI "V2TI")]) + +;; Map register class for 64-bit element in 128-bit vector for direct moves +;; to/from gprs +(define_mode_attr VS_64dm [(V2DF "wk") + (V2DI "wj")]) + +;; Map register class for 64-bit element in 128-bit vector for normal register +;; to register moves +(define_mode_attr VS_64reg [(V2DF "ws") + (V2DI "wi")]) + +;; Iterators for loading constants with xxspltib +(define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) +(define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) + +;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. +;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be +;; done on ISA 2.07 and not just ISA 3.0. +(define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) +(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) + +(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") + (V8HI "h") + (V4SI "w")]) + +;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and +;; insert to validate the operand number. +(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand") + (V8HI "const_0_to_7_operand") + (V4SI "const_0_to_3_operand")]) + +;; Mode attribute to give the constraint for vector extract and insert +;; operations. +(define_mode_attr VSX_EX [(V16QI "v") + (V8HI "v") + (V4SI "wa")]) + +;; Mode iterator for binary floating types other than double to +;; optimize convert to that floating point type from an extract +;; of an integer type +(define_mode_iterator VSX_EXTRACT_FL [SF + (IF "FLOAT128_2REG_P (IFmode)") + (KF "TARGET_FLOAT128_HW") + (TF "FLOAT128_2REG_P (TFmode) + || (FLOAT128_IEEE_P (TFmode) + && TARGET_FLOAT128_HW)")]) + +;; Mode iterator for binary floating types that have a direct conversion +;; from 64-bit integer to floating point +(define_mode_iterator FL_CONV [SF + DF + (KF "TARGET_FLOAT128_HW") + (TF "TARGET_FLOAT128_HW + && FLOAT128_IEEE_P (TFmode)")]) + +;; Iterator for the 2 short vector types to do a splat from an integer +(define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) + +;; Mode attribute to give the count for the splat instruction to splat +;; the value in the 64-bit integer slot +(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")]) + +;; Mode attribute to give the suffix for the splat instruction +(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")]) + +;; Constants for creating unspecs +(define_c_enum "unspec" + [UNSPEC_VSX_CONCAT + UNSPEC_VSX_CVDPSXWS + UNSPEC_VSX_CVDPUXWS + UNSPEC_VSX_CVSPDP + UNSPEC_VSX_CVSPDPN + UNSPEC_VSX_CVDPSPN + UNSPEC_VSX_CVSXWDP + UNSPEC_VSX_CVUXWDP + UNSPEC_VSX_CVSXDSP + UNSPEC_VSX_CVUXDSP + UNSPEC_VSX_CVSPSXDS + UNSPEC_VSX_CVSPUXDS + UNSPEC_VSX_TDIV + UNSPEC_VSX_TSQRT + UNSPEC_VSX_SET + UNSPEC_VSX_ROUND_I + UNSPEC_VSX_ROUND_IC + UNSPEC_VSX_SLDWI + UNSPEC_VSX_XXSPLTW + UNSPEC_VSX_XXSPLTD + UNSPEC_VSX_DIVSD + UNSPEC_VSX_DIVUD + UNSPEC_VSX_MULSD + UNSPEC_VSX_XVCVSXDDP + UNSPEC_VSX_XVCVUXDDP + UNSPEC_VSX_XVCVDPSXDS + UNSPEC_VSX_XVCVDPUXDS + UNSPEC_VSX_SIGN_EXTEND + UNSPEC_VSX_VSLO + UNSPEC_VSX_EXTRACT + UNSPEC_VSX_SXEXPDP + UNSPEC_VSX_SXSIGDP + UNSPEC_VSX_SIEXPDP + UNSPEC_VSX_SCMPEXPDP + UNSPEC_VSX_STSTDC + UNSPEC_VSX_VXEXP + UNSPEC_VSX_VXSIG + UNSPEC_VSX_VIEXP + UNSPEC_VSX_VTSTDC + UNSPEC_VSX_VEC_INIT + UNSPEC_LXVL + UNSPEC_STXVL + UNSPEC_VCLZLSBB + UNSPEC_VCTZLSBB + UNSPEC_VEXTUBLX + UNSPEC_VEXTUHLX + UNSPEC_VEXTUWLX + UNSPEC_VEXTUBRX + UNSPEC_VEXTUHRX + UNSPEC_VEXTUWRX + UNSPEC_VCMPNEB + UNSPEC_VCMPNEZB + UNSPEC_VCMPNEH + UNSPEC_VCMPNEZH + UNSPEC_VCMPNEW + UNSPEC_VCMPNEZW + UNSPEC_XXEXTRACTUW + UNSPEC_XXINSERTW + ]) + +;; VSX moves + +;; The patterns for LE permuted loads and stores come before the general +;; VSX moves so they match first. +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") + (match_operand:VSX_D 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 1) (const_int 0)])))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) + +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") + (match_operand:VSX_W 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) + +(define_insn_and_split "*vsx_le_perm_load_v8hi" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (match_operand:V8HI 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + [(set (match_dup 2) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 0) + (vec_select:V8HI + (match_dup 2) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) + +(define_insn_and_split "*vsx_le_perm_load_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (match_operand:V16QI 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + [(set (match_dup 2) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 0) + (vec_select:V16QI + (match_dup 2) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) + +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:VSX_D 0 "memory_operand" "=Z") + (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:VSX_D 0 "memory_operand" "") + (match_operand:VSX_D 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 1) (const_int 0)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) + +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:VSX_D 0 "memory_operand" "") + (match_operand:VSX_D 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" + [(set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)]))) + (set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 1) (const_int 0)])))] + "") + +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") + (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:VSX_W 0 "memory_operand" "") + (match_operand:VSX_W 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" + [(set (match_dup 2) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) + +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:VSX_W 0 "memory_operand" "") + (match_operand:VSX_W 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" + [(set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 0) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (set (match_dup 1) + (vec_select:<MODE> + (match_dup 1) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "") + +(define_insn "*vsx_le_perm_store_v8hi" + [(set (match_operand:V8HI 0 "memory_operand" "=Z") + (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:V8HI 0 "memory_operand" "") + (match_operand:V8HI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" + [(set (match_dup 2) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 0) + (vec_select:V8HI + (match_dup 2) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) + +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:V8HI 0 "memory_operand" "") + (match_operand:V8HI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" + [(set (match_dup 1) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 0) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))) + (set (match_dup 1) + (vec_select:V8HI + (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "") + +(define_insn "*vsx_le_perm_store_v16qi" + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:V16QI 0 "memory_operand" "") + (match_operand:V16QI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" + [(set (match_dup 2) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 0) + (vec_select:V16QI + (match_dup 2) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; +}) + +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:V16QI 0 "memory_operand" "") + (match_operand:V16QI 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" + [(set (match_dup 1) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 0) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))) + (set (match_dup 1) + (vec_select:V16QI + (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "") + +;; Little endian word swapping for 128-bit types that are either scalars or the +;; special V1TI container class, which it is not appropriate to use vec_select +;; for the type. +(define_insn "*vsx_le_permute_<mode>" + [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z") + (rotate:VSX_LE_128 + (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>") + (const_int 64)))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "@ + xxpermdi %x0,%x1,%x1,2 + lxvd2x %x0,%y1 + stxvd2x %x1,%y0" + [(set_attr "length" "4") + (set_attr "type" "vecperm,vecload,vecstore")]) + +(define_insn_and_split "*vsx_le_undo_permute_<mode>" + [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>") + (rotate:VSX_LE_128 + (rotate:VSX_LE_128 + (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>") + (const_int 64)) + (const_int 64)))] + "!BYTES_BIG_ENDIAN && TARGET_VSX" + "@ + # + xxlor %x0,%x1" + "" + [(set (match_dup 0) (match_dup 1))] +{ + if (reload_completed && REGNO (operands[0]) == REGNO (operands[1])) + { + emit_note (NOTE_INSN_DELETED); + DONE; + } +} + [(set_attr "length" "0,4") + (set_attr "type" "veclogical")]) + +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>") + (match_operand:VSX_LE_128 1 "memory_operand" "Z"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + [(set (match_dup 2) + (rotate:VSX_LE_128 (match_dup 1) + (const_int 64))) + (set (match_dup 0) + (rotate:VSX_LE_128 (match_dup 2) + (const_int 64)))] + " +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +} + " + [(set_attr "type" "vecload") + (set_attr "length" "8")]) + +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z") + (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "#" + [(set_attr "type" "vecstore") + (set_attr "length" "12")]) + +(define_split + [(set (match_operand:VSX_LE_128 0 "memory_operand" "") + (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR" + [(set (match_dup 2) + (rotate:VSX_LE_128 (match_dup 1) + (const_int 64))) + (set (match_dup 0) + (rotate:VSX_LE_128 (match_dup 2) + (const_int 64)))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]; +}) + +;; Peephole to catch memory to memory transfers for TImode if TImode landed in +;; VSX registers on a little endian system. The vector types and IEEE 128-bit +;; floating point are handled by the more generic swap elimination pass. +(define_peephole2 + [(set (match_operand:TI 0 "vsx_register_operand" "") + (rotate:TI (match_operand:TI 1 "vsx_register_operand" "") + (const_int 64))) + (set (match_operand:TI 2 "vsx_register_operand" "") + (rotate:TI (match_dup 0) + (const_int 64)))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR + && (rtx_equal_p (operands[0], operands[2]) + || peep2_reg_dead_p (2, operands[0]))" + [(set (match_dup 2) (match_dup 1))]) + +;; The post-reload split requires that we re-permute the source +;; register in case it is still live. +(define_split + [(set (match_operand:VSX_LE_128 0 "memory_operand" "") + (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))] + "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR" + [(set (match_dup 1) + (rotate:VSX_LE_128 (match_dup 1) + (const_int 64))) + (set (match_dup 0) + (rotate:VSX_LE_128 (match_dup 1) + (const_int 64))) + (set (match_dup 1) + (rotate:VSX_LE_128 (match_dup 1) + (const_int 64)))] + "") + +;; Vector constants that can be generated with XXSPLTIB that was added in ISA +;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. +(define_insn "xxspltib_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] + "TARGET_P9_VECTOR" +{ + operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); + return "xxspltib %x0,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "xxspltib_<mode>_nosplit" + [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") + (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] + "TARGET_P9_VECTOR" +{ + rtx op1 = operands[1]; + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) + || num_insns != 1) + gcc_unreachable (); + + operands[2] = GEN_INT (value & 0xff); + return "xxspltib %x0,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn_and_split "*xxspltib_<mode>_split" + [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") + (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] + "TARGET_P9_VECTOR" + "#" + "&& 1" + [(const_int 0)] +{ + int value = 256; + int num_insns = -1; + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp = ((can_create_pseudo_p ()) + ? gen_reg_rtx (V16QImode) + : gen_lowpart (V16QImode, op0)); + + if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) + || num_insns != 2) + gcc_unreachable (); + + emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); + + if (<MODE>mode == V2DImode) + emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); + + else if (<MODE>mode == V4SImode) + emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); + + else if (<MODE>mode == V8HImode) + emit_insn (gen_altivec_vupkhsb (op0, tmp)); + + else + gcc_unreachable (); + + DONE; +} + [(set_attr "type" "vecperm") + (set_attr "length" "8")]) + + +;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB +;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or +;; all 1's, since the machine does not have to wait for the previous +;; instruction using the register being set (such as a store waiting on a slow +;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. + +;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) +;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW +;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) +(define_insn "*vsx_mov<mode>_64bit" + [(set (match_operand:VSX_M 0 "nonimmediate_operand" + "=ZwO, <VSa>, <VSa>, r, we, ?wQ, + ?&r, ??r, ??Y, ??r, wo, v, + ?<VSa>, *r, v, ??r, wZ, v") + + (match_operand:VSX_M 1 "input_operand" + "<VSa>, ZwO, <VSa>, we, r, r, + wQ, Y, r, r, wE, jwM, + ?jwM, jwM, W, W, v, wZ"))] + + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" +{ + return rs6000_output_move_128bit (operands); +} + [(set_attr "type" + "vecstore, vecload, vecsimple, mffgpr, mftgpr, load, + store, load, store, *, vecsimple, vecsimple, + vecsimple, *, *, *, vecstore, vecload") + + (set_attr "length" + "4, 4, 4, 8, 4, 8, + 8, 8, 8, 8, 4, 4, + 4, 8, 20, 20, 4, 4")]) + +;; VSX store VSX load VSX move GPR load GPR store GPR move +;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const +;; LVX (VMX) STVX (VMX) +(define_insn "*vsx_mov<mode>_32bit" + [(set (match_operand:VSX_M 0 "nonimmediate_operand" + "=ZwO, <VSa>, <VSa>, ??r, ??Y, ??r, + wo, v, ?<VSa>, *r, v, ??r, + wZ, v") + + (match_operand:VSX_M 1 "input_operand" + "<VSa>, ZwO, <VSa>, Y, r, r, + wE, jwM, ?jwM, jwM, W, W, + v, wZ"))] + + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" +{ + return rs6000_output_move_128bit (operands); +} + [(set_attr "type" + "vecstore, vecload, vecsimple, load, store, *, + vecsimple, vecsimple, vecsimple, *, *, *, + vecstore, vecload") + + (set_attr "length" + "4, 4, 4, 16, 16, 16, + 4, 4, 4, 16, 20, 32, + 4, 4")]) + +;; Explicit load/store expanders for the builtin functions +(define_expand "vsx_load_<mode>" + [(set (match_operand:VSX_M 0 "vsx_register_operand" "") + (match_operand:VSX_M 1 "memory_operand" ""))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + /* Expand to swaps if needed, prior to swap optimization. */ + if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) + { + rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); + DONE; + } +}) + +(define_expand "vsx_store_<mode>" + [(set (match_operand:VSX_M 0 "memory_operand" "") + (match_operand:VSX_M 1 "vsx_register_operand" ""))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + /* Expand to swaps if needed, prior to swap optimization. */ + if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) + { + rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); + DONE; + } +}) + +;; Explicit load/store expanders for the builtin functions for lxvd2x, etc., +;; when you really want their element-reversing behavior. +(define_insn "vsx_ld_elemrev_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (vec_select:V2DI + (match_operand:V2DI 1 "memory_operand" "Z") + (parallel [(const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "vsx_ld_elemrev_v2df" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") + (vec_select:V2DF + (match_operand:V2DF 1 "memory_operand" "Z") + (parallel [(const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "vsx_ld_elemrev_v4si" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") + (vec_select:V4SI + (match_operand:V4SI 1 "memory_operand" "Z") + (parallel [(const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" + "lxvw4x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "vsx_ld_elemrev_v4sf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (vec_select:V4SF + (match_operand:V4SF 1 "memory_operand" "Z") + (parallel [(const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" + "lxvw4x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "vsx_ld_elemrev_v8hi" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (vec_select:V8HI + (match_operand:V8HI 1 "memory_operand" "Z") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "lxvh8x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "vsx_ld_elemrev_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_select:V16QI + (match_operand:V16QI 1 "memory_operand" "Z") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "lxvb16x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "vsx_st_elemrev_v2df" + [(set (match_operand:V2DF 0 "memory_operand" "=Z") + (vec_select:V2DF + (match_operand:V2DF 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "vsx_st_elemrev_v2di" + [(set (match_operand:V2DI 0 "memory_operand" "=Z") + (vec_select:V2DI + (match_operand:V2DI 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "vsx_st_elemrev_v4sf" + [(set (match_operand:V4SF 0 "memory_operand" "=Z") + (vec_select:V4SF + (match_operand:V4SF 1 "vsx_register_operand" "wa") + (parallel [(const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" + "stxvw4x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "vsx_st_elemrev_v4si" + [(set (match_operand:V4SI 0 "memory_operand" "=Z") + (vec_select:V4SI + (match_operand:V4SI 1 "vsx_register_operand" "wa") + (parallel [(const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" + "stxvw4x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "vsx_st_elemrev_v8hi" + [(set (match_operand:V8HI 0 "memory_operand" "=Z") + (vec_select:V8HI + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "stxvh8x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "vsx_st_elemrev_v16qi" + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (vec_select:V16QI + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "stxvb16x %x1,%y0" + [(set_attr "type" "vecstore")]) + + +;; VSX vector floating point arithmetic instructions. The VSX scalar +;; instructions are now combined with the insn for the traditional floating +;; point unit. +(define_insn "*vsx_add<mode>3" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvadd<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_sub<mode>3" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvsub<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_mul<mode>3" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvmul<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_mul>")]) + +; Emulate vector with scalar for vec_mul in V2DImode +(define_insn_and_split "vsx_mul_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_MULSD))] + "VECTOR_MEM_VSX_P (V2DImode)" + "#" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + emit_insn (gen_muldi3 (op5, op3, op4)); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + emit_insn (gen_muldi3 (op3, op3, op4)); + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); + DONE; +}" + [(set_attr "type" "mul")]) + +(define_insn "*vsx_div<mode>3" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvdiv<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_div>") + (set_attr "fp_type" "<VSfptype_div>")]) + +; Emulate vector with scalar for vec_div in V2DImode +(define_insn_and_split "vsx_div_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_DIVSD))] + "VECTOR_MEM_VSX_P (V2DImode)" + "#" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + emit_insn (gen_divdi3 (op5, op3, op4)); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + emit_insn (gen_divdi3 (op3, op3, op4)); + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); + DONE; +}" + [(set_attr "type" "div")]) + +(define_insn_and_split "vsx_udiv_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_DIVUD))] + "VECTOR_MEM_VSX_P (V2DImode)" + "#" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + emit_insn (gen_udivdi3 (op5, op3, op4)); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + emit_insn (gen_udivdi3 (op3, op3, op4)); + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); + DONE; +}" + [(set_attr "type" "div")]) + +;; *tdiv* instruction returning the FG flag +(define_expand "vsx_tdiv<mode>3_fg" + [(set (match_dup 3) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "")] + UNSPEC_VSX_TDIV)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (gt:SI (match_dup 3) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" +{ + operands[3] = gen_reg_rtx (CCFPmode); +}) + +;; *tdiv* instruction returning the FE flag +(define_expand "vsx_tdiv<mode>3_fe" + [(set (match_dup 3) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "") + (match_operand:VSX_B 2 "vsx_register_operand" "")] + UNSPEC_VSX_TDIV)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (eq:SI (match_dup 3) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" +{ + operands[3] = gen_reg_rtx (CCFPmode); +}) + +(define_insn "*vsx_tdiv<mode>3_internal" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_VSX_TDIV))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "x<VSv>tdiv<VSs> %0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_fre<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_FRES))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvre<VSs> %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_neg<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvneg<VSs> %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_abs<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvabs<VSs> %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_nabs<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (neg:VSX_F + (abs:VSX_F + (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvnabs<VSs> %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_smax<mode>3" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvmax<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_smin<mode>3" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvmin<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_sqrt<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvsqrt<VSs> %x0,%x1" + [(set_attr "type" "<VStype_sqrt>") + (set_attr "fp_type" "<VSfptype_sqrt>")]) + +(define_insn "*vsx_rsqrte<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_RSQRT))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvrsqrte<VSs> %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +;; *tsqrt* returning the fg flag +(define_expand "vsx_tsqrt<mode>2_fg" + [(set (match_dup 2) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")] + UNSPEC_VSX_TSQRT)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (gt:SI (match_dup 2) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" +{ + operands[2] = gen_reg_rtx (CCFPmode); +}) + +;; *tsqrt* returning the fe flag +(define_expand "vsx_tsqrt<mode>2_fe" + [(set (match_dup 2) + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")] + UNSPEC_VSX_TSQRT)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (eq:SI (match_dup 2) + (const_int 0)))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" +{ + operands[2] = gen_reg_rtx (CCFPmode); +}) + +(define_insn "*vsx_tsqrt<mode>2_internal" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") + (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_VSX_TSQRT))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "x<VSv>tsqrt<VSs> %0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +;; Fused vector multiply/add instructions. Support the classical Altivec +;; versions of fma, which allows the target to be a separate register from the +;; 3 inputs. Under VSX, the target must be either the addend or the first +;; multiply. + +(define_insn "*vsx_fmav4sf4" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") + (fma:V4SF + (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") + (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") + (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "@ + xvmaddasp %x0,%x1,%x2 + xvmaddmsp %x0,%x1,%x3 + xvmaddasp %x0,%x1,%x2 + xvmaddmsp %x0,%x1,%x3 + vmaddfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +(define_insn "*vsx_fmav2df4" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") + (fma:V2DF + (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") + (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") + (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "@ + xvmaddadp %x0,%x1,%x2 + xvmaddmdp %x0,%x1,%x3 + xvmaddadp %x0,%x1,%x2 + xvmaddmdp %x0,%x1,%x3" + [(set_attr "type" "vecdouble")]) + +(define_insn "*vsx_fms<mode>4" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>") + (fma:VSX_F + (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0") + (neg:VSX_F + (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "@ + xvmsuba<VSs> %x0,%x1,%x2 + xvmsubm<VSs> %x0,%x1,%x3 + xvmsuba<VSs> %x0,%x1,%x2 + xvmsubm<VSs> %x0,%x1,%x3" + [(set_attr "type" "<VStype_mul>")]) + +(define_insn "*vsx_nfma<mode>4" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>") + (neg:VSX_F + (fma:VSX_F + (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0") + (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "@ + xvnmadda<VSs> %x0,%x1,%x2 + xvnmaddm<VSs> %x0,%x1,%x3 + xvnmadda<VSs> %x0,%x1,%x2 + xvnmaddm<VSs> %x0,%x1,%x3" + [(set_attr "type" "<VStype_mul>") + (set_attr "fp_type" "<VSfptype_mul>")]) + +(define_insn "*vsx_nfmsv4sf4" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") + (neg:V4SF + (fma:V4SF + (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") + (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") + (neg:V4SF + (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "@ + xvnmsubasp %x0,%x1,%x2 + xvnmsubmsp %x0,%x1,%x3 + xvnmsubasp %x0,%x1,%x2 + xvnmsubmsp %x0,%x1,%x3 + vnmsubfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +(define_insn "*vsx_nfmsv2df4" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") + (neg:V2DF + (fma:V2DF + (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") + (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") + (neg:V2DF + (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "@ + xvnmsubadp %x0,%x1,%x2 + xvnmsubmdp %x0,%x1,%x3 + xvnmsubadp %x0,%x1,%x2 + xvnmsubmdp %x0,%x1,%x3" + [(set_attr "type" "vecdouble")]) + +;; Vector conditional expressions (no scalar version for these instructions) +(define_insn "vsx_eq<mode>" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcmpeq<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_gt<mode>" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcmpgt<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_ge<mode>" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcmpge<VSs> %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +;; Compare vectors producing a vector result and a predicate, setting CR6 to +;; indicate a combined status +(define_insn "*vsx_eq_<mode>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC + [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] + UNSPEC_PREDICATE)) + (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (eq:VSX_F (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcmpeq<VSs>. %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>")]) + +(define_insn "*vsx_gt_<mode>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC + [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] + UNSPEC_PREDICATE)) + (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (gt:VSX_F (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcmpgt<VSs>. %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>")]) + +(define_insn "*vsx_ge_<mode>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC + [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] + UNSPEC_PREDICATE)) + (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (ge:VSX_F (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcmpge<VSs>. %x0,%x1,%x2" + [(set_attr "type" "<VStype_simple>")]) + +;; Vector select +(define_insn "*vsx_xxsel<mode>" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (if_then_else:VSX_L + (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_L 4 "zero_constant" "")) + (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" + "xxsel %x0,%x3,%x2,%x1" + [(set_attr "type" "vecmove")]) + +(define_insn "*vsx_xxsel<mode>_uns" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (if_then_else:VSX_L + (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_L 4 "zero_constant" "")) + (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" + "xxsel %x0,%x3,%x2,%x1" + [(set_attr "type" "vecmove")]) + +;; Copy sign +(define_insn "vsx_copysign<mode>3" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_F + [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_COPYSIGN))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcpsgn<VSs> %x0,%x2,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +;; For the conversions, limit the register class for the integer value to be +;; the fprs because we don't want to add the altivec registers to movdi/movsi. +;; For the unsigned tests, there isn't a generic double -> unsigned conversion +;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. +;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md +;; in allowing virtual registers. +(define_insn "vsx_float<VSi><mode>2" + [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") + (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcvsx<VSc><VSs> %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_floatuns<VSi><mode>2" + [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") + (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvcvux<VSc><VSs> %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_fix_trunc<mode><VSi>2" + [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") + (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "x<VSv>cv<VSs>sx<VSc>s %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_fixuns_trunc<mode><VSi>2" + [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") + (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "x<VSv>cv<VSs>ux<VSc>s %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +;; Math rounding functions +(define_insn "vsx_x<VSv>r<VSs>i" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_VSX_ROUND_I))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "x<VSv>r<VSs>i %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_x<VSv>r<VSs>ic" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_VSX_ROUND_IC))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "x<VSv>r<VSs>ic %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_btrunc<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvr<VSs>iz %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "*vsx_b2trunc<mode>2" + [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_FRIZ))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "x<VSv>r<VSs>iz %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_floor<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_FRIM))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvr<VSs>im %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + +(define_insn "vsx_ceil<mode>2" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") + (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] + UNSPEC_FRIP))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "xvr<VSs>ip %x0,%x1" + [(set_attr "type" "<VStype_simple>") + (set_attr "fp_type" "<VSfptype_simple>")]) + + +;; VSX convert to/from double vector + +;; Convert between single and double precision +;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal +;; scalar single precision instructions internally use the double format. +;; Prefer the altivec registers, since we likely will need to do a vperm +(define_insn "vsx_<VS_spdp_insn>" + [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>") + (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (<MODE>mode)" + "<VS_spdp_insn> %x0,%x1" + [(set_attr "type" "<VS_spdp_type>")]) + +;; xscvspdp, represent the scalar SF type as V4SF +(define_insn "vsx_xscvspdp" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws") + (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "xscvspdp %x0,%x1" + [(set_attr "type" "fp")]) + +;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF +;; format of scalars is actually DF. +(define_insn "vsx_xscvdpsp_scalar" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "xscvdpsp %x0,%x1" + [(set_attr "type" "fp")]) + +;; Same as vsx_xscvspdp, but use SF as the type +(define_insn "vsx_xscvspdp_scalar2" + [(set (match_operand:SF 0 "vsx_register_operand" "=ww") + (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "xscvspdp %x0,%x1" + [(set_attr "type" "fp")]) + +;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs +(define_insn "vsx_xscvdpspn" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww") + (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvspdpn" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws") + (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + +(define_insn "vsx_xscvdpspn_scalar" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + +;; Used by direct move to move a SFmode value from GPR to VSX register +(define_insn "vsx_xscvspdpn_directmove" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + +;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) + +(define_expand "vsx_xvcvsxddp_scale" + [(match_operand:V2DF 0 "vsx_register_operand" "") + (match_operand:V2DI 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int scale = INTVAL(operands[2]); + emit_insn (gen_vsx_xvcvsxddp (op0, op1)); + if (scale != 0) + rs6000_scale_v2df (op0, op0, -scale); + DONE; +}) + +(define_insn "vsx_xvcvsxddp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") + (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVSXDDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvsxddp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_expand "vsx_xvcvuxddp_scale" + [(match_operand:V2DF 0 "vsx_register_operand" "") + (match_operand:V2DI 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int scale = INTVAL(operands[2]); + emit_insn (gen_vsx_xvcvuxddp (op0, op1)); + if (scale != 0) + rs6000_scale_v2df (op0, op0, -scale); + DONE; +}) + +(define_insn "vsx_xvcvuxddp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") + (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVUXDDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvuxddp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_expand "vsx_xvcvdpsxds_scale" + [(match_operand:V2DI 0 "vsx_register_operand" "") + (match_operand:V2DF 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp; + int scale = INTVAL (operands[2]); + if (scale == 0) + tmp = op1; + else + { + tmp = gen_reg_rtx (V2DFmode); + rs6000_scale_v2df (tmp, op1, scale); + } + emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); + DONE; +}) + +(define_insn "vsx_xvcvdpsxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVDPSXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpsxds %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_expand "vsx_xvcvdpuxds_scale" + [(match_operand:V2DI 0 "vsx_register_operand" "") + (match_operand:V2DF 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp; + int scale = INTVAL (operands[2]); + if (scale == 0) + tmp = op1; + else + { + tmp = gen_reg_rtx (V2DFmode); + rs6000_scale_v2df (tmp, op1, scale); + } + emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); + DONE; +}) + +(define_insn "vsx_xvcvdpuxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVDPUXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpuxds %x0,%x1" + [(set_attr "type" "vecdouble")]) + +;; Convert from 64-bit to 32-bit types +;; Note, favor the Altivec registers since the usual use of these instructions +;; is in vector converts and we need to use the Altivec vperm instruction. + +(define_insn "vsx_xvcvdpsxws" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") + (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPSXWS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpsxws %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_insn "vsx_xvcvdpuxws" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") + (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVDPUXWS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpuxws %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_insn "vsx_xvcvsxdsp" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVSXDSP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvsxdsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvuxdsp" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVUXDSP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvuxdsp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +;; Convert from 32-bit to 64-bit types +;; Provide both vector and scalar targets +(define_insn "vsx_xvcvsxwdp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVSXWDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvsxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_insn "vsx_xvcvsxwdp_df" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws") + (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSXWDP))] + "TARGET_VSX" + "xvcvsxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_insn "vsx_xvcvuxwdp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") + (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] + UNSPEC_VSX_CVUXWDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvuxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_insn "vsx_xvcvuxwdp_df" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws") + (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVUXWDP))] + "TARGET_VSX" + "xvcvuxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_insn "vsx_xvcvspsxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") + (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVSPSXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvspsxds %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_insn "vsx_xvcvspuxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") + (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] + UNSPEC_VSX_CVSPUXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvspuxds %x0,%x1" + [(set_attr "type" "vecdouble")]) + +;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since +;; since the xvrdpiz instruction does not truncate the value if the floating +;; point value is < LONG_MIN or > LONG_MAX. +(define_insn "*vsx_float_fix_v2df2" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") + (float:V2DF + (fix:V2DI + (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations + && !flag_trapping_math && TARGET_FRIZ" + "xvrdpiz %x0,%x1" + [(set_attr "type" "vecdouble") + (set_attr "fp_type" "fp_addsub_d")]) + + +;; Permute operations + +;; Build a V2DF/V2DI vector from two scalars +(define_insn "vsx_concat_<mode>" + [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we") + (vec_concat:VSX_D + (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b") + (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + if (which_alternative == 0) + return (BYTES_BIG_ENDIAN + ? "xxpermdi %x0,%x1,%x2,0" + : "xxpermdi %x0,%x2,%x1,0"); + + else if (which_alternative == 1) + return (BYTES_BIG_ENDIAN + ? "mtvsrdd %x0,%1,%2" + : "mtvsrdd %x0,%2,%1"); + + else + gcc_unreachable (); +} + [(set_attr "type" "vecperm")]) + +;; Special purpose concat using xxpermdi to glue two single precision values +;; together, relying on the fact that internally scalar floats are represented +;; as doubles. This is used to initialize a V4SF vector with 4 floats +(define_insn "vsx_concat_v2sf" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") + (unspec:V2DF + [(match_operand:SF 1 "vsx_register_operand" "ww") + (match_operand:SF 2 "vsx_register_operand" "ww")] + UNSPEC_VSX_CONCAT))] + "VECTOR_MEM_VSX_P (V2DFmode)" +{ + if (BYTES_BIG_ENDIAN) + return "xxpermdi %x0,%x1,%x2,0"; + else + return "xxpermdi %x0,%x2,%x1,0"; +} + [(set_attr "type" "vecperm")]) + +;; V4SImode initialization splitter +(define_insn_and_split "vsx_init_v4si" + [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r") + (unspec:V4SI + [(match_operand:SI 1 "reg_or_cint_operand" "rn") + (match_operand:SI 2 "reg_or_cint_operand" "rn") + (match_operand:SI 3 "reg_or_cint_operand" "rn") + (match_operand:SI 4 "reg_or_cint_operand" "rn")] + UNSPEC_VSX_VEC_INIT)) + (clobber (match_scratch:DI 5 "=&r")) + (clobber (match_scratch:DI 6 "=&r"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_v4si_init (operands); + DONE; +}) + +;; xxpermdi for little endian loads and stores. We need several of +;; these since the form of the PARALLEL differs by mode. +(define_insn "*vsx_xxpermdi2_le_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") + (vec_select:VSX_D + (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>") + (parallel [(const_int 1) (const_int 0)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_xxpermdi4_le_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") + (vec_select:VSX_W + (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_xxpermdi8_le_V8HI" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (vec_select:V8HI + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_xxpermdi16_le_V16QI" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_select:V16QI + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + +;; lxvd2x for little endian loads. We need several of +;; these since the form of the PARALLEL differs by mode. +(define_insn "*vsx_lxvd2x2_le_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") + (vec_select:VSX_D + (match_operand:VSX_D 1 "memory_operand" "Z") + (parallel [(const_int 1) (const_int 0)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "*vsx_lxvd2x4_le_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") + (vec_select:VSX_W + (match_operand:VSX_W 1 "memory_operand" "Z") + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "*vsx_lxvd2x8_le_V8HI" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (vec_select:V8HI + (match_operand:V8HI 1 "memory_operand" "Z") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +(define_insn "*vsx_lxvd2x16_le_V16QI" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_select:V16QI + (match_operand:V16QI 1 "memory_operand" "Z") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" + "lxvd2x %x0,%y1" + [(set_attr "type" "vecload")]) + +;; stxvd2x for little endian stores. We need several of +;; these since the form of the PARALLEL differs by mode. +(define_insn "*vsx_stxvd2x2_le_<mode>" + [(set (match_operand:VSX_D 0 "memory_operand" "=Z") + (vec_select:VSX_D + (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>") + (parallel [(const_int 1) (const_int 0)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "*vsx_stxvd2x4_le_<mode>" + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") + (vec_select:VSX_W + (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "*vsx_stxvd2x8_le_V8HI" + [(set (match_operand:V8HI 0 "memory_operand" "=Z") + (vec_select:V8HI + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +(define_insn "*vsx_stxvd2x16_le_V16QI" + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (vec_select:V16QI + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15) + (const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" + "stxvd2x %x1,%y0" + [(set_attr "type" "vecstore")]) + +;; Convert a TImode value into V1TImode +(define_expand "vsx_set_v1ti" + [(match_operand:V1TI 0 "nonimmediate_operand" "") + (match_operand:V1TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "input_operand" "") + (match_operand:QI 3 "u5bit_cint_operand" "")] + "VECTOR_MEM_VSX_P (V1TImode)" +{ + if (operands[3] != const0_rtx) + gcc_unreachable (); + + emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); + DONE; +}) + +;; Set the element of a V2DI/VD2F mode +(define_insn "vsx_set_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>") + (unspec:VSX_D + [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>") + (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>") + (match_operand:QI 3 "u5bit_cint_operand" "i,i")] + UNSPEC_VSX_SET))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + int idx_first = BYTES_BIG_ENDIAN ? 0 : 1; + if (INTVAL (operands[3]) == idx_first) + return \"xxpermdi %x0,%x2,%x1,1\"; + else if (INTVAL (operands[3]) == 1 - idx_first) + return \"xxpermdi %x0,%x1,%x2,0\"; + else + gcc_unreachable (); +} + [(set_attr "type" "vecperm")]) + +;; Extract a DF/DI element from V2DF/V2DI +;; Optimize cases were we can do a simple or direct move. +;; Or see if we can avoid doing the move at all + +;; There are some unresolved problems with reload that show up if an Altivec +;; register was picked. Limit the scalar value to FPRs for now. + +(define_insn "vsx_extract_<mode>" + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr") + + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo") + + (parallel + [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + int element = INTVAL (operands[2]); + int op0_regno = REGNO (operands[0]); + int op1_regno = REGNO (operands[1]); + int fldDM; + + gcc_assert (IN_RANGE (element, 0, 1)); + gcc_assert (VSX_REGNO_P (op1_regno)); + + if (element == VECTOR_ELEMENT_SCALAR_64BIT) + { + if (op0_regno == op1_regno) + return ASM_COMMENT_START " vec_extract to same register"; + + else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE + && TARGET_POWERPC64) + return "mfvsrd %0,%x1"; + + else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) + return "fmr %0,%1"; + + else if (VSX_REGNO_P (op0_regno)) + return "xxlor %x0,%x1,%x1"; + + else + gcc_unreachable (); + } + + else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno) + && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE) + return "mfvsrld %0,%x1"; + + else if (VSX_REGNO_P (op0_regno)) + { + fldDM = element << 1; + if (!BYTES_BIG_ENDIAN) + fldDM = 3 - fldDM; + operands[3] = GEN_INT (fldDM); + return "xxpermdi %x0,%x1,%x1,%3"; + } + + else + gcc_unreachable (); +} + [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")]) + +;; Optimize extracting a single scalar element from memory. +(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load" + [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr") + (vec_select:<VSX_D:VS_scalar> + (match_operand:VSX_D 1 "memory_operand" "m,m") + (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) + (clobber (match_scratch:P 3 "=&b,&b"))] + "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], + operands[3], <VSX_D:VS_scalar>mode); +} + [(set_attr "type" "fpload,load") + (set_attr "length" "8")]) + +;; Optimize storing a single scalar element that is the right location to +;; memory +(define_insn "*vsx_extract_<mode>_store" + [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY") + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "register_operand" "d,wv,wb") + (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] + "VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + stfd%U0%X0 %1,%0 + stxsd%U0x %x1,%y0 + stxsd %1,%0" + [(set_attr "type" "fpstore") + (set_attr "length" "4")]) + +;; Variable V2DI/V2DF extract shift +(define_insn "vsx_vslo_<mode>" + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") + (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") + (match_operand:V2DI 2 "gpc_reg_operand" "v")] + UNSPEC_VSX_VSLO))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "vslo %0,%1,%2" + [(set_attr "type" "vecperm")]) + +;; Variable V2DI/V2DF extract +(define_insn_and_split "vsx_extract_<mode>_var" + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r") + (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + UNSPEC_VSX_EXTRACT)) + (clobber (match_scratch:DI 3 "=r,&b,&b")) + (clobber (match_scratch:V2DI 4 "=&v,X,X"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], + operands[3], operands[4]); + DONE; +}) + +;; Extract a SF element from V4SF +(define_insn_and_split "vsx_extract_v4sf" + [(set (match_operand:SF 0 "vsx_register_operand" "=ww") + (vec_select:SF + (match_operand:V4SF 1 "vsx_register_operand" "wa") + (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")]))) + (clobber (match_scratch:V4SF 3 "=0"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "#" + "&& 1" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + rtx tmp; + HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); + + if (ele == 0) + tmp = op1; + else + { + if (GET_CODE (op3) == SCRATCH) + op3 = gen_reg_rtx (V4SFmode); + emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); + tmp = op3; + } + emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "fp")]) + +(define_insn_and_split "*vsx_extract_v4sf_<mode>_load" + [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r") + (vec_select:SF + (match_operand:V4SF 1 "memory_operand" "m,Z,m,m") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) + (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))] + "VECTOR_MEM_VSX_P (V4SFmode)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], + operands[3], SFmode); +} + [(set_attr "type" "fpload,fpload,fpload,load") + (set_attr "length" "8")]) + +;; Variable V4SF extract +(define_insn_and_split "vsx_extract_v4sf_var" + [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r") + (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + UNSPEC_VSX_EXTRACT)) + (clobber (match_scratch:DI 3 "=r,&b,&b")) + (clobber (match_scratch:V2DI 4 "=&v,X,X"))] + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], + operands[3], operands[4]); + DONE; +}) + +;; Expand the builtin form of xxpermdi to canonical rtl. +(define_expand "vsx_xxpermdi_<mode>" + [(match_operand:VSX_L 0 "vsx_register_operand") + (match_operand:VSX_L 1 "vsx_register_operand") + (match_operand:VSX_L 2 "vsx_register_operand") + (match_operand:QI 3 "u5bit_cint_operand")] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + int mask = INTVAL (operands[3]); + rtx perm0 = GEN_INT ((mask >> 1) & 1); + rtx perm1 = GEN_INT ((mask & 1) + 2); + rtx (*gen) (rtx, rtx, rtx, rtx, rtx); + + if (<MODE>mode == V2DFmode) + gen = gen_vsx_xxpermdi2_v2df_1; + else + { + gen = gen_vsx_xxpermdi2_v2di_1; + if (<MODE>mode != V2DImode) + { + target = gen_lowpart (V2DImode, target); + op0 = gen_lowpart (V2DImode, op0); + op1 = gen_lowpart (V2DImode, op1); + } + } + emit_insn (gen (target, op0, op1, perm0, perm1)); + DONE; +}) + +;; Special version of xxpermdi that retains big-endian semantics. +(define_expand "vsx_xxpermdi_<mode>_be" + [(match_operand:VSX_L 0 "vsx_register_operand") + (match_operand:VSX_L 1 "vsx_register_operand") + (match_operand:VSX_L 2 "vsx_register_operand") + (match_operand:QI 3 "u5bit_cint_operand")] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + int mask = INTVAL (operands[3]); + rtx perm0 = GEN_INT ((mask >> 1) & 1); + rtx perm1 = GEN_INT ((mask & 1) + 2); + rtx (*gen) (rtx, rtx, rtx, rtx, rtx); + + if (<MODE>mode == V2DFmode) + gen = gen_vsx_xxpermdi2_v2df_1; + else + { + gen = gen_vsx_xxpermdi2_v2di_1; + if (<MODE>mode != V2DImode) + { + target = gen_lowpart (V2DImode, target); + op0 = gen_lowpart (V2DImode, op0); + op1 = gen_lowpart (V2DImode, op1); + } + } + /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a + transformation we don't want; it is necessary for + rs6000_expand_vec_perm_const_1 but not for this use. So we + prepare for that by reversing the transformation here. */ + if (BYTES_BIG_ENDIAN) + emit_insn (gen (target, op0, op1, perm0, perm1)); + else + { + rtx p0 = GEN_INT (3 - INTVAL (perm1)); + rtx p1 = GEN_INT (3 - INTVAL (perm0)); + emit_insn (gen (target, op1, op0, p0, p1)); + } + DONE; +}) + +(define_insn "vsx_xxpermdi2_<mode>_1" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd") + (vec_select:VSX_D + (vec_concat:<VS_double> + (match_operand:VSX_D 1 "vsx_register_operand" "wd") + (match_operand:VSX_D 2 "vsx_register_operand" "wd")) + (parallel [(match_operand 3 "const_0_to_1_operand" "") + (match_operand 4 "const_2_to_3_operand" "")])))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + int op3, op4, mask; + + /* For little endian, swap operands and invert/swap selectors + to get the correct xxpermdi. The operand swap sets up the + inputs as a little endian array. The selectors are swapped + because they are defined to use big endian ordering. The + selectors are inverted to get the correct doublewords for + little endian ordering. */ + if (BYTES_BIG_ENDIAN) + { + op3 = INTVAL (operands[3]); + op4 = INTVAL (operands[4]); + } + else + { + op3 = 3 - INTVAL (operands[4]); + op4 = 3 - INTVAL (operands[3]); + } + + mask = (op3 << 1) | (op4 - 2); + operands[3] = GEN_INT (mask); + + if (BYTES_BIG_ENDIAN) + return "xxpermdi %x0,%x1,%x2,%3"; + else + return "xxpermdi %x0,%x2,%x1,%3"; +} + [(set_attr "type" "vecperm")]) + +(define_expand "vec_perm_const<mode>" + [(match_operand:VSX_D 0 "vsx_register_operand" "") + (match_operand:VSX_D 1 "vsx_register_operand" "") + (match_operand:VSX_D 2 "vsx_register_operand" "") + (match_operand:V2DI 3 "" "")] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + if (rs6000_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +;; Extraction of a single element in a small integer vector. Until ISA 3.0, +;; none of the small types were allowed in a vector register, so we had to +;; extract to a DImode and either do a direct move or store. +(define_expand "vsx_extract_<mode>" + [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") + (parallel [(match_operand:QI 2 "const_int_operand")]))) + (clobber (match_scratch:VSX_EXTRACT_I 3))])] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" +{ + /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ + if (TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR) + { + emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], + operands[2])); + DONE; + } +}) + +(define_insn "vsx_extract_<mode>_p9" + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) + (clobber (match_scratch:SI 3 "=r,X"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER" +{ + if (which_alternative == 0) + return "#"; + + else + { + HOST_WIDE_INT elt = INTVAL (operands[2]); + HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG + ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt + : elt); + + HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); + HOST_WIDE_INT offset = unit_size * elt_adj; + + operands[2] = GEN_INT (offset); + if (unit_size == 4) + return "xxextractuw %x0,%x1,%2"; + else + return "vextractu<wd> %0,%1,%2"; + } +} + [(set_attr "type" "vecsimple")]) + +(define_split + [(set (match_operand:<VS_scalar> 0 "int_reg_operand") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") + (parallel [(match_operand:QI 2 "const_int_operand")]))) + (clobber (match_operand:SI 3 "int_reg_operand"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER && reload_completed" + [(const_int 0)] +{ + rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); + + emit_move_insn (op3, GEN_INT (offset)); + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); + else + emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); + DONE; +}) + +;; Optimize zero extracts to eliminate the AND after the extract. +(define_insn_and_split "*vsx_extract_<mode>_di_p9" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") + (zero_extend:DI + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") + (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) + (clobber (match_scratch:SI 3 "=r,X"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 4) + (vec_select:<VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (match_dup 3))])] +{ + operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); +}) + +;; Optimize stores to use the ISA 3.0 scalar store instructions +(define_insn_and_split "*vsx_extract_<mode>_store_p9" + [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v") + (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) + (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r")) + (clobber (match_scratch:SI 4 "=X,&r"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) + (vec_select:<VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (match_dup 4))]) + (set (match_dup 0) + (match_dup 3))]) + +(define_insn_and_split "*vsx_extract_si" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z") + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) + (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT + && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx vec_tmp = operands[3]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT<x> + instruction. */ + value = INTVAL (element); + if (value != 1) + { + if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER) + { + rtx si_tmp = gen_rtx_REG (SImode, REGNO (vec_tmp)); + emit_insn (gen_vsx_extract_v4si_p9 (si_tmp,src, element)); + } + else + emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); + } + else + vec_tmp = src; + + if (MEM_P (operands[0])) + { + if (can_create_pseudo_p ()) + dest = rs6000_address_for_fpconvert (dest); + + if (TARGET_VSX_SMALL_INTEGER) + emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); + else + emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); + } + + else if (TARGET_VSX_SMALL_INTEGER) + emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); + else + emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), + gen_rtx_REG (DImode, REGNO (vec_tmp))); + + DONE; +} + [(set_attr "type" "mftgpr,vecperm,fpstore") + (set_attr "length" "8")]) + +(define_insn_and_split "*vsx_extract_<mode>_p8" + [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) + (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT + && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx vec_tmp = operands[3]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT<x> + instruction. */ + value = INTVAL (element); + if (<MODE>mode == V16QImode) + { + if (value != 7) + emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element)); + else + vec_tmp = src; + } + else if (<MODE>mode == V8HImode) + { + if (value != 3) + emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); + else + vec_tmp = src; + } + else + gcc_unreachable (); + + emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), + gen_rtx_REG (DImode, REGNO (vec_tmp))); + DONE; +} + [(set_attr "type" "mftgpr")]) + +;; Optimize extracting a single scalar element from memory. +(define_insn_and_split "*vsx_extract_<mode>_load" + [(set (match_operand:<VS_scalar> 0 "register_operand" "=r") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m") + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) + (clobber (match_scratch:DI 3 "=&b"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], + operands[3], <VS_scalar>mode); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + +;; Variable V16QI/V8HI/V4SI extract +(define_insn_and_split "vsx_extract_<mode>_var" + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") + (unspec:<VS_scalar> + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + UNSPEC_VSX_EXTRACT)) + (clobber (match_scratch:DI 3 "=r,r,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], + operands[3], operands[4]); + DONE; +}) + +(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var" + [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r") + (zero_extend:SDI + (unspec:<VSX_EXTRACT_I:VS_scalar> + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + UNSPEC_VSX_EXTRACT))) + (clobber (match_scratch:DI 3 "=r,r,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X"))] + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + machine_mode smode = <VSX_EXTRACT_I:MODE>mode; + rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), + operands[1], operands[2], + operands[3], operands[4]); + DONE; +}) + +;; VSX_EXTRACT optimizations +;; Optimize double d = (double) vec_extract (vi, <n>) +;; Get the element into the top position and use XVCVSWDP/XVCVUWDP +(define_insn_and_split "*vsx_extract_si_<uns>float_df" + [(set (match_operand:DF 0 "gpc_reg_operand" "=ws") + (any_float:DF + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) + (clobber (match_scratch:V4SI 3 "=v"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx v4si_tmp = operands[3]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT<x> + instruction. */ + value = INTVAL (element); + if (value != 0) + { + if (GET_CODE (v4si_tmp) == SCRATCH) + v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); + } + else + v4si_tmp = src; + + emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); + DONE; +}) + +;; Optimize <type> f = (<type>) vec_extract (vi, <n>) +;; where <type> is a floating point type that supported by the hardware that is +;; not double. First convert the value to double, and then to the desired +;; type. +(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" + [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww") + (any_float:VSX_EXTRACT_FL + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) + (clobber (match_scratch:V4SI 3 "=v")) + (clobber (match_scratch:DF 4 "=ws"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx v4si_tmp = operands[3]; + rtx df_tmp = operands[4]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT<x> + instruction. */ + value = INTVAL (element); + if (value != 0) + { + if (GET_CODE (v4si_tmp) == SCRATCH) + v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); + } + else + v4si_tmp = src; + + if (GET_CODE (df_tmp) == SCRATCH) + df_tmp = gen_reg_rtx (DFmode); + + emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); + + if (<MODE>mode == SFmode) + emit_insn (gen_truncdfsf2 (dest, df_tmp)); + else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) + emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); + else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) + && TARGET_FLOAT128_HW) + emit_insn (gen_extenddftf2_hw (dest, df_tmp)); + else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) + emit_insn (gen_extenddfif2 (dest, df_tmp)); + else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) + emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); + else + gcc_unreachable (); + + DONE; +}) + +;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) +;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE +;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, +;; vector short or vector unsigned short. +(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>" + [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") + (float:FL_CONV + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) + (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (scratch:SI))]) + (set (match_dup 4) + (sign_extend:DI (match_dup 3))) + (set (match_dup 0) + (float:<FL_CONV:MODE> (match_dup 4)))] +{ + operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>" + [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") + (unsigned_float:FL_CONV + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) + (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) + (vec_select:<VSX_EXTRACT_I:VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (scratch:SI))]) + (set (match_dup 0) + (float:<FL_CONV:MODE> (match_dup 4)))] +{ + operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +;; V4SI/V8HI/V16QI set operation on ISA 3.0 +(define_insn "vsx_set_<mode>_p9" + [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") + (unspec:VSX_EXTRACT_I + [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0") + (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>") + (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] + UNSPEC_VSX_SET))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" +{ + int ele = INTVAL (operands[3]); + int nunits = GET_MODE_NUNITS (<MODE>mode); + + if (!VECTOR_ELT_ORDER_BIG) + ele = nunits - 1 - ele; + + operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele); + if (<MODE>mode == V4SImode) + return "xxinsertw %x0,%x2,%3"; + else + return "vinsert<wd> %0,%2,%3"; +} + [(set_attr "type" "vecperm")]) + +;; Expanders for builtins +(define_expand "vsx_mergel_<mode>" + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +(define_expand "vsx_mergeh_<mode>" + [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) + (use (match_operand:VSX_D 1 "vsx_register_operand" "")) + (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + rtvec v; + rtx x; + + /* Special handling for LE with -maltivec=be. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); + } + else + { + v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); + x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); + } + + x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +;; V2DF/V2DI splat +;; We separate the register splat insn from the memory splat insn to force the +;; register allocator to generate the indexed form of the SPLAT when it is +;; given an offsettable memory reference. Otherwise, if the register and +;; memory insns were combined into a single insn, the register allocator will +;; load the value into a register, and then do a double word permute. +(define_expand "vsx_splat_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand") + (vec_duplicate:VSX_D + (match_operand:<VS_scalar> 1 "input_operand")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + rtx op1 = operands[1]; + if (MEM_P (op1)) + operands[1] = rs6000_address_for_fpconvert (op1); + else if (!REG_P (op1)) + op1 = force_reg (<VSX_D:VS_scalar>mode, op1); +}) + +(define_insn "vsx_splat_<mode>_reg" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we") + (vec_duplicate:VSX_D + (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" + "@ + xxpermdi %x0,%x1,%x1,0 + mtvsrdd %x0,%1,%1" + [(set_attr "type" "vecperm")]) + +(define_insn "vsx_splat_<VSX_D:mode>_mem" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>") + (vec_duplicate:VSX_D + (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" + "lxvdsx %x0,%y1" + [(set_attr "type" "vecload")]) + +;; V4SI splat support +(define_insn "vsx_splat_v4si" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we") + (vec_duplicate:V4SI + (match_operand:SI 1 "splat_input_operand" "r,Z")))] + "TARGET_P9_VECTOR" + "@ + mtvsrws %x0,%1 + lxvwsx %x0,%y1" + [(set_attr "type" "vecperm,vecload")]) + +;; SImode is not currently allowed in vector registers. This pattern +;; allows us to use direct move to get the value in a vector register +;; so that we can use XXSPLTW +(define_insn "vsx_splat_v4si_di" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we") + (vec_duplicate:V4SI + (truncate:SI + (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "@ + xxspltw %x0,%x1,1 + mtvsrws %x0,%1" + [(set_attr "type" "vecperm")]) + +;; V4SF splat (ISA 3.0) +(define_insn_and_split "vsx_splat_v4sf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") + (vec_duplicate:V4SF + (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))] + "TARGET_P9_VECTOR" + "@ + lxvwsx %x0,%y1 + # + mtvsrws %x0,%1" + "&& reload_completed && vsx_register_operand (operands[1], SFmode)" + [(set (match_dup 0) + (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) + (set (match_dup 0) + (unspec:V4SF [(match_dup 0) + (const_int 0)] UNSPEC_VSX_XXSPLTW))] + "" + [(set_attr "type" "vecload,vecperm,mftgpr") + (set_attr "length" "4,8,4")]) + +;; V4SF/V4SI splat from a vector element +(define_insn "vsx_xxspltw_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") + (vec_duplicate:VSX_W + (vec_select:<VS_scalar> + (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") + (parallel + [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + if (!BYTES_BIG_ENDIAN) + operands[2] = GEN_INT (3 - INTVAL (operands[2])); + + return "xxspltw %x0,%x1,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "vsx_xxspltw_<mode>_direct" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") + (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSX_XXSPLTW))] + "VECTOR_MEM_VSX_P (<MODE>mode)" + "xxspltw %x0,%x1,%2" + [(set_attr "type" "vecperm")]) + +;; V16QI/V8HI splat support on ISA 2.07 +(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di" + [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v") + (vec_duplicate:VSX_SPLAT_I + (truncate:<VS_scalar> + (match_operand:DI 1 "altivec_register_operand" "v"))))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>" + [(set_attr "type" "vecperm")]) + +;; V2DF/V2DI splat for use by vec_splat builtin +(define_insn "vsx_xxspltd_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSX_XXSPLTD))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0) + || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1)) + return "xxpermdi %x0,%x1,%x1,0"; + else + return "xxpermdi %x0,%x1,%x1,3"; +} + [(set_attr "type" "vecperm")]) + +;; V4SF/V4SI interleave +(define_insn "vsx_xxmrghw_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>") + (vec_select:VSX_W + (vec_concat:<VS_double> + (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>") + (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + if (BYTES_BIG_ENDIAN) + return "xxmrghw %x0,%x1,%x2"; + else + return "xxmrglw %x0,%x2,%x1"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "vsx_xxmrglw_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>") + (vec_select:VSX_W + (vec_concat:<VS_double> + (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>") + (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + if (BYTES_BIG_ENDIAN) + return "xxmrglw %x0,%x1,%x2"; + else + return "xxmrghw %x0,%x2,%x1"; +} + [(set_attr "type" "vecperm")]) + +;; Shift left double by word immediate +(define_insn "vsx_xxsldwi_<mode>" + [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>") + (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>") + (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>") + (match_operand:QI 3 "u5bit_cint_operand" "i")] + UNSPEC_VSX_SLDWI))] + "VECTOR_MEM_VSX_P (<MODE>mode)" + "xxsldwi %x0,%x1,%x2,%3" + [(set_attr "type" "vecperm")]) + + +;; Vector reduction insns and splitters + +(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" + [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa") + (VEC_reduc:V2DF + (vec_concat:V2DF + (vec_select:DF + (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") + (parallel [(const_int 1)])) + (vec_select:DF + (match_dup 1) + (parallel [(const_int 0)]))) + (match_dup 1))) + (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "#" + "" + [(const_int 0)] + " +{ + rtx tmp = (GET_CODE (operands[2]) == SCRATCH) + ? gen_reg_rtx (V2DFmode) + : operands[2]; + emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); + emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); + DONE; +}" + [(set_attr "length" "8") + (set_attr "type" "veccomplex")]) + +(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" + [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa") + (VEC_reduc:V4SF + (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) + (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))) + (clobber (match_scratch:V4SF 2 "=&wf,&wa")) + (clobber (match_scratch:V4SF 3 "=&wf,&wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "#" + "" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp2, tmp3, tmp4; + + if (can_create_pseudo_p ()) + { + tmp2 = gen_reg_rtx (V4SFmode); + tmp3 = gen_reg_rtx (V4SFmode); + tmp4 = gen_reg_rtx (V4SFmode); + } + else + { + tmp2 = operands[2]; + tmp3 = operands[3]; + tmp4 = tmp2; + } + + emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); + emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); + emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); + emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); + DONE; +}" + [(set_attr "length" "16") + (set_attr "type" "veccomplex")]) + +;; Combiner patterns with the vector reduction patterns that knows we can get +;; to the top element of the V2DF array without doing an extract. + +(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" + [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws") + (vec_select:DF + (VEC_reduc:V2DF + (vec_concat:V2DF + (vec_select:DF + (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") + (parallel [(const_int 1)])) + (vec_select:DF + (match_dup 1) + (parallel [(const_int 0)]))) + (match_dup 1)) + (parallel [(const_int 1)]))) + (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "#" + "" + [(const_int 0)] + " +{ + rtx hi = gen_highpart (DFmode, operands[1]); + rtx lo = (GET_CODE (operands[2]) == SCRATCH) + ? gen_reg_rtx (DFmode) + : operands[2]; + + emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); + emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); + DONE; +}" + [(set_attr "length" "8") + (set_attr "type" "veccomplex")]) + +(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" + [(set (match_operand:SF 0 "vfloat_operand" "=f,?f") + (vec_select:SF + (VEC_reduc:V4SF + (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) + (match_operand:V4SF 1 "vfloat_operand" "wf,wa")) + (parallel [(const_int 3)]))) + (clobber (match_scratch:V4SF 2 "=&wf,&wa")) + (clobber (match_scratch:V4SF 3 "=&wf,&wa")) + (clobber (match_scratch:V4SF 4 "=0,0"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "#" + "" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp2, tmp3, tmp4, tmp5; + + if (can_create_pseudo_p ()) + { + tmp2 = gen_reg_rtx (V4SFmode); + tmp3 = gen_reg_rtx (V4SFmode); + tmp4 = gen_reg_rtx (V4SFmode); + tmp5 = gen_reg_rtx (V4SFmode); + } + else + { + tmp2 = operands[2]; + tmp3 = operands[3]; + tmp4 = tmp2; + tmp5 = operands[4]; + } + + emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); + emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); + emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); + emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); + emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); + DONE; +}" + [(set_attr "length" "20") + (set_attr "type" "veccomplex")]) + + +;; Power8 Vector fusion. The fused ops must be physically adjacent. +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M 2 "vsx_register_operand" "") + (mem:VSX_M (plus:P (match_dup 0) + (match_operand:P 3 "int_reg_operand" ""))))] + "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" + "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M 2 "vsx_register_operand" "") + (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "") + (match_dup 0))))] + "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" + "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) + + +;; ISA 3.0 vector extend sign support + +(define_insn "vsx_sign_extend_qi_<mode>" + [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") + (unspec:VSINT_84 + [(match_operand:V16QI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" + "vextsb2<wd> %0,%1" + [(set_attr "type" "vecexts")]) + +(define_insn "vsx_sign_extend_hi_<mode>" + [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") + (unspec:VSINT_84 + [(match_operand:V8HI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" + "vextsh2<wd> %0,%1" + [(set_attr "type" "vecexts")]) + +(define_insn "*vsx_sign_extend_si_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" + "vextsw2d %0,%1" + [(set_attr "type" "vecexts")]) + + +;; ISA 3.0 Binary Floating-Point Support + +;; VSX Scalar Extract Exponent Double-Precision +(define_insn "xsxexpdp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_SXEXPDP))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "xsxexpdp %0,%x1" + [(set_attr "type" "integer")]) + +;; VSX Scalar Extract Significand Double-Precision +(define_insn "xsxsigdp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_SXSIGDP))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "xsxsigdp %0,%x1" + [(set_attr "type" "integer")]) + +;; VSX Scalar Insert Exponent Double-Precision +(define_insn "xsiexpdp" + [(set (match_operand:DF 0 "vsx_register_operand" "=wa") + (unspec:DF [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_VSX_SIEXPDP))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "xsiexpdp %x0,%1,%2" + [(set_attr "type" "fpsimple")]) + +;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument +(define_insn "xsiexpdpf" + [(set (match_operand:DF 0 "vsx_register_operand" "=wa") + (unspec:DF [(match_operand:DF 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_VSX_SIEXPDP))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "xsiexpdp %x0,%1,%2" + [(set_attr "type" "fpsimple")]) + +;; VSX Scalar Compare Exponents Double-Precision +(define_expand "xscmpexpdp_<code>" + [(set (match_dup 3) + (compare:CCFP + (unspec:DF + [(match_operand:DF 1 "vsx_register_operand" "wa") + (match_operand:DF 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_SCMPEXPDP) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (CMP_TEST:SI (match_dup 3) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (CCFPmode); +}) + +(define_insn "*xscmpexpdp" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP + (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa") + (match_operand:DF 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_SCMPEXPDP) + (match_operand:SI 3 "zero_constant" "j")))] + "TARGET_P9_VECTOR" + "xscmpexpdp %0,%x1,%x2" + [(set_attr "type" "fpcompare")]) + +;; VSX Scalar Test Data Class Double- and Single-Precision +;; (The lt bit is set if operand 1 is negative. The eq bit is set +;; if any of the conditions tested by operand 2 are satisfied. +;; The gt and unordered bits are cleared to zero.) +(define_expand "xststdc<Fvsx>" + [(set (match_dup 3) + (compare:CCFP + (unspec:SFDF + [(match_operand:SFDF 1 "vsx_register_operand" "wa") + (match_operand:SI 2 "u7bit_cint_operand" "n")] + UNSPEC_VSX_STSTDC) + (match_dup 4))) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_dup 3) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[3] = gen_reg_rtx (CCFPmode); + operands[4] = CONST0_RTX (SImode); +}) + +;; The VSX Scalar Test Data Class Double- and Single-Precision +;; instruction may also be used to test for negative value. +(define_expand "xststdcneg<Fvsx>" + [(set (match_dup 2) + (compare:CCFP + (unspec:SFDF + [(match_operand:SFDF 1 "vsx_register_operand" "wa") + (const_int 0)] + UNSPEC_VSX_STSTDC) + (match_dup 3))) + (set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (match_dup 2) + (const_int 0)))] + "TARGET_P9_VECTOR" +{ + operands[2] = gen_reg_rtx (CCFPmode); + operands[3] = CONST0_RTX (SImode); +}) + +(define_insn "*xststdc<Fvsx>" + [(set (match_operand:CCFP 0 "" "=y") + (compare:CCFP + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") + (match_operand:SI 2 "u7bit_cint_operand" "n")] + UNSPEC_VSX_STSTDC) + (match_operand:SI 3 "zero_constant" "j")))] + "TARGET_P9_VECTOR" + "xststdc<Fvsx> %0,%x1,%2" + [(set_attr "type" "fpcompare")]) + +;; VSX Vector Extract Exponent Double and Single Precision +(define_insn "xvxexp<VSs>" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") + (unspec:VSX_F + [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_VXEXP))] + "TARGET_P9_VECTOR" + "xvxexp<VSs> %x0,%x1" + [(set_attr "type" "vecsimple")]) + +;; VSX Vector Extract Significand Double and Single Precision +(define_insn "xvxsig<VSs>" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") + (unspec:VSX_F + [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_VXSIG))] + "TARGET_P9_VECTOR" + "xvxsig<VSs> %x0,%x1" + [(set_attr "type" "vecsimple")]) + +;; VSX Vector Insert Exponent Double and Single Precision +(define_insn "xviexp<VSs>" + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") + (unspec:VSX_F + [(match_operand:VSX_F 1 "vsx_register_operand" "wa") + (match_operand:VSX_F 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_VIEXP))] + "TARGET_P9_VECTOR" + "xviexp<VSs> %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +;; VSX Vector Test Data Class Double and Single Precision +;; The corresponding elements of the result vector are all ones +;; if any of the conditions tested by operand 3 are satisfied. +(define_insn "xvtstdc<VSs>" + [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa") + (unspec:<VSI> + [(match_operand:VSX_F 1 "vsx_register_operand" "wa") + (match_operand:SI 2 "u7bit_cint_operand" "n")] + UNSPEC_VSX_VTSTDC))] + "TARGET_P9_VECTOR" + "xvtstdc<VSs> %x0,%x1,%2" + [(set_attr "type" "vecsimple")]) + +;; ISA 3.0 String Operations Support + +;; Compare vectors producing a vector result and a predicate, setting CR6 +;; to indicate a combined status. This pattern matches v16qi, v8hi, and +;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no +;; need to match v4sf, v2df, or v2di modes because those are expanded +;; to use Power8 instructions. +(define_insn "*vsx_ne_<mode>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC + [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") + (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v") + (ne:VSX_EXTRACT_I (match_dup 1) + (match_dup 2)))] + "TARGET_P9_VECTOR" + "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vector_nez_<mode>_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(unspec:VI + [(match_operand:VI 1 "gpc_reg_operand" "v") + (match_operand:VI 2 "gpc_reg_operand" "v")] + UNSPEC_NEZ_P)] + UNSPEC_PREDICATE)) + (set (match_operand:VI 0 "gpc_reg_operand" "=v") + (unspec:VI [(match_dup 1) + (match_dup 2)] + UNSPEC_NEZ_P))] + "TARGET_P9_VECTOR" + "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Load VSX Vector with Length +(define_expand "lxvl" + [(set (match_dup 3) + (match_operand:DI 2 "register_operand")) + (set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand") + (match_dup 3)] + UNSPEC_LXVL))] + "TARGET_P9_VECTOR && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "*lxvl" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b") + (match_operand:DI 2 "register_operand" "+r")] + UNSPEC_LXVL))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "sldi %2,%2, 56\; lxvl %x0,%1,%2" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) + +;; Store VSX Vector with Length +(define_expand "stxvl" + [(set (match_dup 3) + (match_operand:DI 2 "register_operand")) + (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) + (unspec:V16QI + [(match_operand:V16QI 0 "vsx_register_operand") + (match_dup 3)] + UNSPEC_STXVL))] + "TARGET_P9_VECTOR && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "*stxvl" + [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) + (unspec:V16QI + [(match_operand:V16QI 0 "vsx_register_operand" "wa") + (match_operand:DI 2 "register_operand" "+r")] + UNSPEC_STXVL))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "sldi %2,%2\;stxvl %x0,%1,%2" + [(set_attr "length" "8") + (set_attr "type" "vecstore")]) + +;; Vector Compare Not Equal Byte +(define_insn "vcmpneb" + [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v") + (match_operand:V16QI 2 "altivec_register_operand" "v")] + UNSPEC_VCMPNEB))] + "TARGET_P9_VECTOR" + "vcmpneb %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Compare Not Equal or Zero Byte +(define_insn "vcmpnezb" + [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") + (unspec:V16QI + [(match_operand:V16QI 1 "altivec_register_operand" "v") + (match_operand:V16QI 2 "altivec_register_operand" "v")] + UNSPEC_VCMPNEZB))] + "TARGET_P9_VECTOR" + "vcmpnezb %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Compare Not Equal Half Word +(define_insn "vcmpneh" + [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") + (match_operand:V8HI 2 "altivec_register_operand" "v")] + UNSPEC_VCMPNEH))] + "TARGET_P9_VECTOR" + "vcmpneh %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Compare Not Equal or Zero Half Word +(define_insn "vcmpnezh" + [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") + (match_operand:V8HI 2 "altivec_register_operand" "v")] + UNSPEC_VCMPNEZH))] + "TARGET_P9_VECTOR" + "vcmpnezh %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Compare Not Equal Word +(define_insn "vcmpnew" + [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") + (unspec:V4SI + [(match_operand:V4SI 1 "altivec_register_operand" "v") + (match_operand:V4SI 2 "altivec_register_operand" "v")] + UNSPEC_VCMPNEH))] + "TARGET_P9_VECTOR" + "vcmpnew %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Compare Not Equal or Zero Word +(define_insn "vcmpnezw" + [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v") + (match_operand:V4SI 2 "altivec_register_operand" "v")] + UNSPEC_VCMPNEZW))] + "TARGET_P9_VECTOR" + "vcmpnezw %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Count Leading Zero Least-Significant Bits Byte +(define_insn "vclzlsbb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:V16QI 1 "altivec_register_operand" "v")] + UNSPEC_VCLZLSBB))] + "TARGET_P9_VECTOR" + "vclzlsbb %0,%1" + [(set_attr "type" "vecsimple")]) + +;; Vector Count Trailing Zero Least-Significant Bits Byte +(define_insn "vctzlsbb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:V16QI 1 "altivec_register_operand" "v")] + UNSPEC_VCTZLSBB))] + "TARGET_P9_VECTOR" + "vctzlsbb %0,%1" + [(set_attr "type" "vecsimple")]) + +;; Vector Extract Unsigned Byte Left-Indexed +(define_insn "vextublx" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "r") + (match_operand:V16QI 2 "altivec_register_operand" "v")] + UNSPEC_VEXTUBLX))] + "TARGET_P9_VECTOR" + "vextublx %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Extract Unsigned Byte Right-Indexed +(define_insn "vextubrx" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "r") + (match_operand:V16QI 2 "altivec_register_operand" "v")] + UNSPEC_VEXTUBRX))] + "TARGET_P9_VECTOR" + "vextubrx %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Extract Unsigned Half Word Left-Indexed +(define_insn "vextuhlx" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "r") + (match_operand:V8HI 2 "altivec_register_operand" "v")] + UNSPEC_VEXTUHLX))] + "TARGET_P9_VECTOR" + "vextuhlx %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Extract Unsigned Half Word Right-Indexed +(define_insn "vextuhrx" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "r") + (match_operand:V8HI 2 "altivec_register_operand" "v")] + UNSPEC_VEXTUHRX))] + "TARGET_P9_VECTOR" + "vextuhrx %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Extract Unsigned Word Left-Indexed +(define_insn "vextuwlx" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "r") + (match_operand:V4SI 2 "altivec_register_operand" "v")] + UNSPEC_VEXTUWLX))] + "TARGET_P9_VECTOR" + "vextuwlx %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector Extract Unsigned Word Right-Indexed +(define_insn "vextuwrx" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "r") + (match_operand:V4SI 2 "altivec_register_operand" "v")] + UNSPEC_VEXTUWRX))] + "TARGET_P9_VECTOR" + "vextuwrx %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector insert/extract word at arbitrary byte values. Note, the little +;; endian version needs to adjust the byte number, and the V4SI element in +;; vinsert4b. +(define_expand "vextract4b" + [(set (match_operand:DI 0 "gpc_reg_operand") + (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand") + (match_operand:QI 2 "const_0_to_12_operand")] + UNSPEC_XXEXTRACTUW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (12 - INTVAL (operands[2])); +}) + +(define_insn_and_split "*vextract4b_internal" + [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r") + (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v") + (match_operand:QI 2 "const_0_to_12_operand" "n,n")] + UNSPEC_XXEXTRACTUW))] + "TARGET_P9_VECTOR" + "@ + xxextractuw %x0,%x1,%2 + #" + "&& reload_completed && int_reg_operand (operands[0], DImode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_si = gen_rtx_REG (SImode, REGNO (op0)); + rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1)); + + emit_move_insn (op0, op2); + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si)); + else + emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si)); + DONE; +} + [(set_attr "type" "vecperm")]) + +(define_expand "vinsert4b" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") + (match_operand:V16QI 2 "vsx_register_operand") + (match_operand:QI 3 "const_0_to_12_operand")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + rtx op1 = operands[1]; + rtx v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx)); + operands[1] = v4si_tmp; + operands[3] = GEN_INT (12 - INTVAL (operands[3])); + } +}) + +(define_insn "*vinsert4b_internal" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") + (match_operand:V16QI 2 "vsx_register_operand" "0") + (match_operand:QI 3 "const_0_to_12_operand" "n")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" + "xxinsertw %x0,%x1,%3" + [(set_attr "type" "vecperm")]) + +(define_expand "vinsert4b_di" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand") + (match_operand:V16QI 2 "vsx_register_operand") + (match_operand:QI 3 "const_0_to_12_operand")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + operands[3] = GEN_INT (12 - INTVAL (operands[3])); +}) + +(define_insn "*vinsert4b_di_internal" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj") + (match_operand:V16QI 2 "vsx_register_operand" "0") + (match_operand:QI 3 "const_0_to_12_operand" "n")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" + "xxinsertw %x0,%x1,%3" + [(set_attr "type" "vecperm")]) + + +;; Support for ISA 3.0 vector byte reverse + +;; Swap all bytes with in a vector +(define_insn "p9_xxbrq_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") + (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))] + "TARGET_P9_VECTOR" + "xxbrq %x0,%x1" + [(set_attr "type" "vecperm")]) + +(define_expand "p9_xxbrq_v16qi" + [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa")) + (use (match_operand:V16QI 1 "vsx_register_operand" "=wa"))] + "TARGET_P9_VECTOR" +{ + rtx op0 = gen_lowpart (V1TImode, operands[0]); + rtx op1 = gen_lowpart (V1TImode, operands[1]); + emit_insn (gen_p9_xxbrq_v1ti (op0, op1)); + DONE; +}) + +;; Swap all bytes in each 64-bit element +(define_insn "p9_xxbrd_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (bswap:VSX_D (match_operand:VSX_D 1 "vsx_register_operand" "wa")))] + "TARGET_P9_VECTOR" + "xxbrd %x0,%x1" + [(set_attr "type" "vecperm")]) + +;; Swap all bytes in each 32-bit element +(define_insn "p9_xxbrw_<mode>" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") + (bswap:VSX_W (match_operand:VSX_W 1 "vsx_register_operand" "wa")))] + "TARGET_P9_VECTOR" + "xxbrw %x0,%x1" + [(set_attr "type" "vecperm")]) + +;; Swap all bytes in each 16-bit element +(define_insn "p9_xxbrh_v8hi" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))] + "TARGET_P9_VECTOR" + "xxbrh %x0,%x1" + [(set_attr "type" "vecperm")]) + + +;; Operand numbers for the following peephole2 +(define_constants + [(SFBOOL_TMP_GPR 0) ;; GPR temporary + (SFBOOL_TMP_VSX 1) ;; vector temporary + (SFBOOL_MFVSR_D 2) ;; move to gpr dest + (SFBOOL_MFVSR_A 3) ;; move to gpr src + (SFBOOL_BOOL_D 4) ;; and/ior/xor dest + (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 + (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 + (SFBOOL_SHL_D 7) ;; shift left dest + (SFBOOL_SHL_A 8) ;; shift left arg + (SFBOOL_MTVSR_D 9) ;; move to vecter dest + (SFBOOL_BOOL_A_DI 10) ;; SFBOOL_BOOL_A1/A2 as DImode + (SFBOOL_TMP_VSX_DI 11) ;; SFBOOL_TMP_VSX as DImode + (SFBOOL_MTVSR_D_V4SF 12)]) ;; SFBOOL_MTVSRD_D as V4SFmode + +;; Attempt to optimize some common GLIBC operations using logical operations to +;; pick apart SFmode operations. For example, there is code from e_powf.c +;; after macro expansion that looks like: +;; +;; typedef union { +;; float value; +;; uint32_t word; +;; } ieee_float_shape_type; +;; +;; float t1; +;; int32_t is; +;; +;; do { +;; ieee_float_shape_type gf_u; +;; gf_u.value = (t1); +;; (is) = gf_u.word; +;; } while (0); +;; +;; do { +;; ieee_float_shape_type sf_u; +;; sf_u.word = (is & 0xfffff000); +;; (t1) = sf_u.value; +;; } while (0); +;; +;; +;; This would result in two direct move operations (convert to memory format, +;; direct move to GPR, do the AND operation, direct move to VSX, convert to +;; scalar format). With this peephole, we eliminate the direct move to the +;; GPR, and instead move the integer mask value to the vector register after a +;; shift and do the VSX logical operation. + +;; The insns for dealing with SFmode in GPR registers looks like: +;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) +;; +;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) +;; +;; (set (reg:DI reg3) (lshiftrt:DI (reg:DI reg3) (const_int 32))) +;; +;; (set (reg:DI reg5) (and:DI (reg:DI reg3) (reg:DI reg4))) +;; +;; (set (reg:DI reg6) (ashift:DI (reg:DI reg5) (const_int 32))) +;; +;; (set (reg:SF reg7) (unspec:SF [(reg:DI reg6)] UNSPEC_P8V_MTVSRD)) +;; +;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg7)] UNSPEC_VSX_CVSPDPN)) + +(define_peephole2 + [(match_scratch:DI SFBOOL_TMP_GPR "r") + (match_scratch:V4SF SFBOOL_TMP_VSX "wa") + + ;; MFVSRD + (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") + (unspec:DI [(match_operand:V4SF SFBOOL_MFVSR_A "vsx_register_operand")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + + ;; SRDI + (set (match_dup SFBOOL_MFVSR_D) + (lshiftrt:DI (match_dup SFBOOL_MFVSR_D) + (const_int 32))) + + ;; AND/IOR/XOR operation on int + (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") + (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") + (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) + + ;; SLDI + (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") + (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") + (const_int 32))) + + ;; MTVSRD + (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") + (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] + + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE + /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO + to compare registers, when the mode is different. */ + && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) + && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) + && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) + && (REG_P (operands[SFBOOL_BOOL_A2]) + || CONST_INT_P (operands[SFBOOL_BOOL_A2])) + && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) + || peep2_reg_dead_p (3, operands[SFBOOL_MFVSR_D])) + && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) + || (REG_P (operands[SFBOOL_BOOL_A2]) + && REGNO (operands[SFBOOL_MFVSR_D]) + == REGNO (operands[SFBOOL_BOOL_A2]))) + && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) + && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) + || peep2_reg_dead_p (4, operands[SFBOOL_BOOL_D])) + && peep2_reg_dead_p (5, operands[SFBOOL_SHL_D])" + [(set (match_dup SFBOOL_TMP_GPR) + (ashift:DI (match_dup SFBOOL_BOOL_A_DI) + (const_int 32))) + + (set (match_dup SFBOOL_TMP_VSX_DI) + (match_dup SFBOOL_TMP_GPR)) + + (set (match_dup SFBOOL_MTVSR_D_V4SF) + (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A) + (match_dup SFBOOL_TMP_VSX)))] +{ + rtx bool_a1 = operands[SFBOOL_BOOL_A1]; + rtx bool_a2 = operands[SFBOOL_BOOL_A2]; + int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); + int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); + int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); + + if (CONST_INT_P (bool_a2)) + { + rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; + emit_move_insn (tmp_gpr, bool_a2); + operands[SFBOOL_BOOL_A_DI] = tmp_gpr; + } + else + { + int regno_bool_a1 = REGNO (bool_a1); + int regno_bool_a2 = REGNO (bool_a2); + int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 + ? regno_bool_a2 : regno_bool_a1); + operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); + } + + operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); + operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); +}) diff --git a/gcc/config/powerpcspe/vxworks.h b/gcc/config/powerpcspe/vxworks.h new file mode 100644 index 000000000000..ccf6a666752e --- /dev/null +++ b/gcc/config/powerpcspe/vxworks.h @@ -0,0 +1,147 @@ +/* Definitions of target machine for GNU compiler. Vxworks PowerPC version. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Note to future editors: VxWorks is mostly an EABI target. We do + not use rs6000/eabi.h because we would have to override most of + it anyway. However, if you change that file, consider making + analogous changes here too. */ + +/* CPP predefined macros. */ + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__ppc"); \ + builtin_define ("__PPC__"); \ + builtin_define ("__EABI__"); \ + builtin_define ("__ELF__"); \ + if (!TARGET_SOFT_FLOAT) \ + builtin_define ("__hardfp"); \ + \ + /* C89 namespace violation! */ \ + builtin_define ("CPU_FAMILY=PPC"); \ + \ + VXWORKS_OS_CPP_BUILTINS (); \ + } \ + while (0) + +/* Only big endian PPC is supported by VxWorks. */ +#undef BYTES_BIG_ENDIAN +#define BYTES_BIG_ENDIAN 1 +#undef WORDS_BIG_ENDIAN +#define WORDS_BIG_ENDIAN 1 + +/* We have to kill off the entire specs set created by rs6000/sysv4.h + and substitute our own set. The top level vxworks.h has done some + of this for us. */ + +#undef SUBTARGET_EXTRA_SPECS +#undef CPP_SPEC +#undef CC1_SPEC +#undef ASM_SPEC + +#define SUBTARGET_EXTRA_SPECS /* none needed */ + +/* VxWorks and VxWorksAE (aka 653) expect different CPU values to designate + SPE on 8548. We define a dedicated macro for the base VxWorks here, which + the AE configuration will override. */ + +#define VXCPU_FOR_8548 "PPC85XX" + +/* FIXME: The only reason we allow no -mcpu switch at all is because + config-ml.in insists on a "." multilib. */ +#define CPP_SPEC \ +"%{!DCPU=*: \ + %{mcpu=403 : -DCPU=PPC403 ; \ + mcpu=405 : -DCPU=PPC405 ; \ + mcpu=440 : -DCPU=PPC440 ; \ + mcpu=464 : -DCPU=PPC464 ; \ + mcpu=476 : -DCPU=PPC476 ; \ + mcpu=603 : -DCPU=PPC603 ; \ + mcpu=604 : -DCPU=PPC604 ; \ + mcpu=860 : -DCPU=PPC860 ; \ + mcpu=8540: -DCPU=PPC85XX ; \ + mcpu=8548: -DCPU=" VXCPU_FOR_8548 "; \ + : -DCPU=PPC604 }}" \ +VXWORKS_ADDITIONAL_CPP_SPEC + +#define CC1_SPEC \ +"%{G*} %{mno-sdata:-msdata=none} %{msdata:-msdata=default} \ + %{mlittle|mlittle-endian:-mstrict-align}" + +#define ASM_SPEC \ +"%(asm_cpu) \ + %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \ + %{mrelocatable} %{mrelocatable-lib} %{" FPIC_SPEC ":-K PIC} -mbig" + +#undef LIB_SPEC +#define LIB_SPEC VXWORKS_LIB_SPEC +#undef LINK_SPEC +#define LINK_SPEC VXWORKS_LINK_SPEC +#undef STARTFILE_SPEC +#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC +#undef ENDFILE_SPEC +#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC + +/* There is no default multilib. */ +#undef MULTILIB_DEFAULTS + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_EABI | MASK_STRICT_ALIGN) + +#undef PROCESSOR_DEFAULT +#define PROCESSOR_DEFAULT PROCESSOR_PPC604 + +/* Nor sdata, for kernel mode. We use this in + SUBSUBTARGET_INITIALIZE_OPTIONS, after rs6000_rtp has been initialized. */ +#undef SDATA_DEFAULT_SIZE +#define SDATA_DEFAULT_SIZE (TARGET_VXWORKS_RTP ? 8 : 0) + +/* Enforce 16bytes alignment for the stack pointer, to permit general + compliance with e.g. Altivec instructions requirements. Make sure + this isn't overruled by the EABI constraints. */ + +#undef STACK_BOUNDARY +#define STACK_BOUNDARY (16*BITS_PER_UNIT) + +#undef PREFERRED_STACK_BOUNDARY +#define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY + +#undef ABI_STACK_BOUNDARY + +#undef SUBSUBTARGET_OVERRIDE_OPTIONS +#define SUBSUBTARGET_OVERRIDE_OPTIONS \ + do { \ + if (!global_options_set.x_g_switch_value) \ + g_switch_value = SDATA_DEFAULT_SIZE; \ + VXWORKS_OVERRIDE_OPTIONS; \ + } while (0) + +/* No _mcount profiling on VxWorks. */ +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO) + +/* Define this to be nonzero if static stack checking is supported. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +/* This platform supports the probing method of stack checking (RTP mode). + 8K is reserved in the stack to propagate exceptions in case of overflow. */ +#define STACK_CHECK_PROTECT 8192 diff --git a/gcc/config/powerpcspe/vxworksae.h b/gcc/config/powerpcspe/vxworksae.h new file mode 100644 index 000000000000..27bf470828b7 --- /dev/null +++ b/gcc/config/powerpcspe/vxworksae.h @@ -0,0 +1,28 @@ +/* Definitions of target machine for GNU compiler. PowerPC VxworksAE version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* VxWorksAE for E500V2 expects a specific CPU value to designate 8548. */ +#undef VXCPU_FOR_8548 +#define VXCPU_FOR_8548 "PPCE500V2" + +/* This platform supports the probing method of stack checking and + requires 4K of space for executing a possible last chance handler. */ +#undef STACK_CHECK_PROTECT +#define STACK_CHECK_PROTECT 4096 diff --git a/gcc/config/powerpcspe/vxworksmils.h b/gcc/config/powerpcspe/vxworksmils.h new file mode 100644 index 000000000000..7b1e2cc4e4cd --- /dev/null +++ b/gcc/config/powerpcspe/vxworksmils.h @@ -0,0 +1,29 @@ +/* PowerPC VxWorks MILS target definitions for GNU compiler. Overrides + on top of the canonical VxWorks definitions. + + Copyright (C) 2014-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This platform supports the probing method of stack checking and + requires 4K of space for executing a possible last chance handler. */ +#undef STACK_CHECK_PROTECT +#define STACK_CHECK_PROTECT 4096 + +/* VxWorksMILS for E500V2 expects a specific CPU value to designate 8548. */ +#undef VXCPU_FOR_8548 +#define VXCPU_FOR_8548 "PPC85XX" diff --git a/gcc/config/powerpcspe/x-aix b/gcc/config/powerpcspe/x-aix new file mode 100644 index 000000000000..029b21e41425 --- /dev/null +++ b/gcc/config/powerpcspe/x-aix @@ -0,0 +1,5 @@ +# genautomata requires more than 1GB of data +build/genautomata : override LDFLAGS += -Wl,-bmaxdata:0x40000000 + +$(COMPILERS) : override LDFLAGS += -Wl,-bmaxdata:0x40000000 + diff --git a/gcc/config/powerpcspe/x-darwin b/gcc/config/powerpcspe/x-darwin new file mode 100644 index 000000000000..984fc5fa9d5b --- /dev/null +++ b/gcc/config/powerpcspe/x-darwin @@ -0,0 +1,3 @@ +host-ppc-darwin.o : $(srcdir)/config/powerpcspe/host-darwin.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc/config/powerpcspe/x-darwin64 b/gcc/config/powerpcspe/x-darwin64 new file mode 100644 index 000000000000..8a91231642f6 --- /dev/null +++ b/gcc/config/powerpcspe/x-darwin64 @@ -0,0 +1,3 @@ +host-ppc64-darwin.o : $(srcdir)/config/powerpcspe/host-ppc64-darwin.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc/config/powerpcspe/x-linux-relax b/gcc/config/powerpcspe/x-linux-relax new file mode 100644 index 000000000000..2743a94e43d1 --- /dev/null +++ b/gcc/config/powerpcspe/x-linux-relax @@ -0,0 +1,2 @@ +# At -O0 cc1 etc. are too large and -Wl,--relax is needed +$(COMPILERS) : override LDFLAGS += -Wl,--relax diff --git a/gcc/config/powerpcspe/x-powerpcspe b/gcc/config/powerpcspe/x-powerpcspe new file mode 100644 index 000000000000..57d5f70aa2c8 --- /dev/null +++ b/gcc/config/powerpcspe/x-powerpcspe @@ -0,0 +1,3 @@ +driver-powerpcspe.o : $(srcdir)/config/powerpcspe/driver-powerpcspe.c \ + $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< diff --git a/gcc/config/powerpcspe/x86intrin.h b/gcc/config/powerpcspe/x86intrin.h new file mode 100644 index 000000000000..4aa33fdbbf59 --- /dev/null +++ b/gcc/config/powerpcspe/x86intrin.h @@ -0,0 +1,43 @@ +/* Copyright (C) 2008-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ +#warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." +#endif + +#ifndef _X86INTRIN_H_INCLUDED +#define _X86INTRIN_H_INCLUDED + +#include <bmiintrin.h> + +#include <bmi2intrin.h> + + +#endif /* _X86INTRIN_H_INCLUDED */ diff --git a/gcc/config/powerpcspe/xcoff.h b/gcc/config/powerpcspe/xcoff.h new file mode 100644 index 000000000000..36f40f4b11ea --- /dev/null +++ b/gcc/config/powerpcspe/xcoff.h @@ -0,0 +1,316 @@ +/* Definitions of target machine for GNU compiler, + for some generic XCOFF file format + Copyright (C) 2001-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define TARGET_OBJECT_FORMAT OBJECT_XCOFF + +/* The RS/6000 uses the XCOFF format. */ +#define XCOFF_DEBUGGING_INFO 1 + +/* Define if the object format being used is COFF or a superset. */ +#define OBJECT_FORMAT_COFF + +/* Define the magic numbers that we recognize as COFF. + + AIX 4.3 adds U803XTOCMAGIC (0757) for 64-bit objects and AIX V5 adds + U64_TOCMAGIC (0767), but collect2.c does not include files in the + correct order to conditionally define the symbolic name in this macro. + + The AIX linker accepts import/export files as object files, + so accept "#!" (0x2321) magic number. */ +#define MY_ISCOFF(magic) \ + ((magic) == U802WRMAGIC || (magic) == U802ROMAGIC \ + || (magic) == U802TOCMAGIC || (magic) == 0757 || (magic) == 0767 \ + || (magic) == 0x2321) + +/* We don't have GAS for the RS/6000 yet, so don't write out special + .stabs in cc1plus. */ + +#define FASCIST_ASSEMBLER + +/* We define this to prevent the name mangler from putting dollar signs into + function names. */ + +#define NO_DOLLAR_IN_LABEL + +/* We define this to 0 so that gcc will never accept a dollar sign in a + variable name. This is needed because the AIX assembler will not accept + dollar signs. */ + +#define DOLLARS_IN_IDENTIFIERS 0 + +/* AIX .align pseudo-op accept value from 0 to 12, corresponding to + log base 2 of the alignment in bytes; 12 = 4096 bytes = 32768 bits. */ + +#define MAX_OFILE_ALIGNMENT 32768 + +/* Default alignment factor for csect directives, chosen to honor + BIGGEST_ALIGNMENT. */ +#define XCOFF_CSECT_DEFAULT_ALIGNMENT_STR "4" + +/* Return nonzero if this entry is to be written into the constant + pool in a special way. We do so if this is a SYMBOL_REF, LABEL_REF + or a CONST containing one of them. If -mfp-in-toc (the default), + we also do this for floating-point constants. We actually can only + do this if the FP formats of the target and host machines are the + same, but we can't check that since not every file that uses these + target macros includes real.h. We also do this when we can write the + entry into the TOC and the entry is not larger than a TOC entry. */ + +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) \ + (TARGET_TOC \ + && (GET_CODE (X) == SYMBOL_REF \ + || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF) \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST_INT \ + && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode)) \ + || (GET_CODE (X) == CONST_DOUBLE \ + && (TARGET_MINIMAL_TOC \ + || (SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && ! TARGET_NO_FP_IN_TOC))))) + +#undef TARGET_DEBUG_UNWIND_INFO +#define TARGET_DEBUG_UNWIND_INFO rs6000_xcoff_debug_unwind_info +#define TARGET_ASM_OUTPUT_ANCHOR rs6000_xcoff_asm_output_anchor +#define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_xcoff_asm_globalize_decl_name +#define TARGET_ASM_GLOBALIZE_LABEL rs6000_xcoff_asm_globalize_label +#define TARGET_ASM_INIT_SECTIONS rs6000_xcoff_asm_init_sections +#define TARGET_ASM_RELOC_RW_MASK rs6000_xcoff_reloc_rw_mask +#define TARGET_ASM_NAMED_SECTION rs6000_xcoff_asm_named_section +#define TARGET_ASM_SELECT_SECTION rs6000_xcoff_select_section +#define TARGET_ASM_SELECT_RTX_SECTION rs6000_xcoff_select_rtx_section +#define TARGET_ASM_UNIQUE_SECTION rs6000_xcoff_unique_section +#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section +#define TARGET_STRIP_NAME_ENCODING rs6000_xcoff_strip_name_encoding +#define TARGET_SECTION_TYPE_FLAGS rs6000_xcoff_section_type_flags +#ifdef HAVE_AS_TLS +#define TARGET_ENCODE_SECTION_INFO rs6000_xcoff_encode_section_info +#endif +#define ASM_OUTPUT_ALIGNED_DECL_COMMON rs6000_xcoff_asm_output_aligned_decl_common + +/* FP save and restore routines. */ +#define SAVE_FP_PREFIX "._savef" +#define SAVE_FP_SUFFIX "" +#define RESTORE_FP_PREFIX "._restf" +#define RESTORE_FP_SUFFIX "" + +/* Function name to call to do profiling. */ +#undef RS6000_MCOUNT +#define RS6000_MCOUNT ".__mcount" + +/* This outputs NAME to FILE up to the first null or '['. */ + +#define RS6000_OUTPUT_BASENAME(FILE, NAME) \ + assemble_name ((FILE), (*targetm.strip_name_encoding) (NAME)) + +/* This is how to output the definition of a user-level label named NAME, + such as the label on a static function or variable NAME. */ + +#define ASM_OUTPUT_LABEL(FILE,NAME) \ + do { RS6000_OUTPUT_BASENAME (FILE, NAME); fputs (":\n", FILE); } while (0) + +/* This is how to output a command to make the user-level label named NAME + defined for reference from other files. */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START rs6000_xcoff_file_start +#define TARGET_ASM_FILE_END rs6000_xcoff_file_end +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false + +/* This macro produces the initial definition of a function name. */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + rs6000_xcoff_declare_function_name ((FILE), (NAME), (DECL)) +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + rs6000_xcoff_declare_object_name ((FILE), (NAME), (DECL)) + +/* Output a reference to SYM on FILE. */ + +#define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \ + rs6000_output_symbol_ref (FILE, SYM) + +/* This says how to output an external. + Dollar signs are converted to underscores. */ + +#undef ASM_OUTPUT_EXTERNAL +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ +{ char *buffer = (char *) alloca (strlen (NAME) + 1); \ + char *p; \ + int dollar_inside = 0; \ + strcpy (buffer, NAME); \ + p = strchr (buffer, '$'); \ + while (p) { \ + *p = '_'; \ + dollar_inside++; \ + p = strchr (p + 1, '$'); \ + } \ + if (dollar_inside) { \ + fputs ("\t.extern .", FILE); \ + RS6000_OUTPUT_BASENAME (FILE, buffer); \ + putc ('\n', FILE); \ + fprintf (FILE, "\t.rename .%s,\".%s\"\n", buffer, NAME); \ + } \ +} + +/* This is how to output a reference to a user-level label named NAME. + `assemble_name' uses this. */ + +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + asm_fprintf ((FILE), "%U%s", rs6000_xcoff_strip_dollar (NAME)); + +/* This is how to output an internal label prefix. rs6000.c uses this + when generating traceback tables. */ + +#define ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX) \ + fprintf (FILE, "%s..", PREFIX) + +/* This is how to output a label for a jump table. Arguments are the same as + for (*targetm.asm_out.internal_label), except the insn for the jump table is + passed. */ + +#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN) \ +{ ASM_OUTPUT_ALIGN (FILE, 2); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); } + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf (LABEL, "*%s..%u", rs6000_xcoff_strip_dollar (PREFIX), (unsigned) (NUM)) + +/* This is how to output an assembler line to define N characters starting + at P to FILE. */ + +#define ASM_OUTPUT_ASCII(FILE, P, N) output_ascii ((FILE), (P), (N)) + +/* This is how to advance the location counter by SIZE bytes. */ + +#define SKIP_ASM_OP "\t.space " + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "%s" HOST_WIDE_INT_PRINT_UNSIGNED"\n", SKIP_ASM_OP, (SIZE)) + +/* This says how to output an assembler line + to define a global common symbol. */ + +#define COMMON_ASM_OP "\t.comm " + +/* This says how to output an assembler line + to define a local common symbol. + The assembler in AIX 6.1 and later supports an alignment argument. + For earlier releases of AIX, we try to maintain + alignment after preceding TOC section if it was aligned + for 64-bit mode. */ + +#define LOCAL_COMMON_ASM_OP "\t.lcomm " + +#if TARGET_AIX_VERSION >= 61 +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ + do { fputs (LOCAL_COMMON_ASM_OP, (FILE)); \ + RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \ + if ((ALIGN) > 32) \ + fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED",%s%u_,%u\n", \ + (SIZE), xcoff_bss_section_name, \ + floor_log2 ((ALIGN) / BITS_PER_UNIT), \ + floor_log2 ((ALIGN) / BITS_PER_UNIT)); \ + else if ((SIZE) > 4) \ + fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED",%s3_,3\n", \ + (SIZE), xcoff_bss_section_name); \ + else \ + fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED",%s,2\n", \ + (SIZE), xcoff_bss_section_name); \ + } while (0) +#endif + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ + do { fputs (LOCAL_COMMON_ASM_OP, (FILE)); \ + RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \ + fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED",%s\n", \ + (TARGET_32BIT ? (SIZE) : (ROUNDED)), \ + xcoff_bss_section_name); \ + } while (0) + +#ifdef HAVE_AS_TLS +#define ASM_OUTPUT_TLS_COMMON(FILE, DECL, NAME, SIZE) \ + do { fputs (COMMON_ASM_OP, (FILE)); \ + RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \ + fprintf ((FILE), "[UL]," HOST_WIDE_INT_PRINT_UNSIGNED"\n", \ + (SIZE)); \ + } while (0) +#endif + +/* This is how we tell the assembler that two symbols have the same value. */ +#define SET_ASM_OP "\t.set " + +/* This is how we tell the assembler to equate two values. + The semantic of AIX assembler's .set do not correspond to middle-end expectations. + We output aliases as alternative symbols in the front of the definition + via DECLARE_FUNCTION_NAME and DECLARE_OBJECT_NAME. + We still need to define this macro to let middle-end know that aliases are + supported. + */ +#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) do { } while (0) + +/* Used by rs6000_assemble_integer, among others. */ + +/* Used by rs6000_assemble_integer, among others. */ +#define DOUBLE_INT_ASM_OP "\t.llong\t" + +/* Output before instructions. */ +#define TEXT_SECTION_ASM_OP "\t.csect .text[PR]" + +/* Output before writable data. */ +#define DATA_SECTION_ASM_OP \ + "\t.csect .data[RW]," XCOFF_CSECT_DEFAULT_ALIGNMENT_STR + + +/* The eh_frames are put in the read-only text segment. + Local code labels/function will also be in the local text segment so use + PC relative addressing. + Global symbols must be in the data segment to allow loader relocations. + So use DW_EH_PE_indirect to allocate a slot in the local data segment. + There is no constant offset to this data segment from the text segment, + so use addressing relative to the data segment. + */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (((GLOBAL) ? DW_EH_PE_indirect | DW_EH_PE_datarel : DW_EH_PE_pcrel) \ + | (TARGET_64BIT ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)) + +#define EH_FRAME_THROUGH_COLLECT2 1 +#define EH_TABLES_CAN_BE_READ_ONLY 1 + +/* AIX Assembler implicitly assumes DWARF 64 bit extension in 64 bit mode. */ +#define DWARF_OFFSET_SIZE PTR_SIZE + +#define ASM_OUTPUT_DWARF_PCREL(FILE,SIZE,LABEL) \ + rs6000_asm_output_dwarf_pcrel ((FILE), (SIZE), (LABEL)); + +#define ASM_OUTPUT_DWARF_DATAREL(FILE,SIZE,LABEL) \ + rs6000_asm_output_dwarf_datarel ((FILE), (SIZE), (LABEL)); + +#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1) + diff --git a/gcc/config/powerpcspe/xfpu.h b/gcc/config/powerpcspe/xfpu.h new file mode 100644 index 000000000000..5cecdeddbd6e --- /dev/null +++ b/gcc/config/powerpcspe/xfpu.h @@ -0,0 +1,26 @@ +/* Definitions for Xilinx PowerPC 405/440 APU. + + Copyright (C) 2008-2017 Free Software Foundation, Inc. + Contributed by Michael Eager (eager@eagercon.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + +/* Undefine definitions from rs6000.h. */ +#undef TARGET_XILINX_FPU + +#define TARGET_XILINX_FPU (rs6000_xilinx_fpu) diff --git a/gcc/config/powerpcspe/xfpu.md b/gcc/config/powerpcspe/xfpu.md new file mode 100644 index 000000000000..c31c7697b2df --- /dev/null +++ b/gcc/config/powerpcspe/xfpu.md @@ -0,0 +1,140 @@ +;; Scheduling description for the Xilinx PowerPC 405 APU Floating Point Unit. +;; Copyright (C) 2008-2017 Free Software Foundation, Inc. +;; Contributed by Michael Eager (eager@eagercon.com). +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;;---------------------------------------------------- +;; Xilinx APU FPU Pipeline Description +;; +;; - attr 'type' and 'fp_type' should definitely +;; be cleaned up at some point in the future. +;; ddiv,sdiv,dmul,smul etc are quite confusing. +;; Should use consistent fp* attrs. 'fp_type' +;; should also go away, leaving us only with 'fp' +;; +;;---------------------------------------------------- + +;; ------------------------------------------------------------------------- +;; Latencies +;; Latest latency figures (all in FCB cycles). PowerPC to FPU frequency ratio +;; assumed to be 1/2. (most common deployment) +;; Add 2 PPC cycles for (register file access + wb) and 2 PPC cycles +;; for issue (from PPC) +;; SP DP +;; Loads: 4 6 +;; Stores: 1 2 (from availability of data) +;; Move/Abs/Neg: 1 1 +;; Add/Subtract: 5 7 +;; Multiply: 4 11 +;; Multiply-add: 10 19 +;; Convert (any): 4 6 +;; Divide/Sqrt: 27 56 +;; Compares: 1 2 +;; +;; bypasses needed for forwarding capability of the FPU. +;; Add this at some future time. +;; ------------------------------------------------------------------------- +(define_automaton "Xfpu") +(define_cpu_unit "Xfpu_issue,Xfpu_addsub,Xfpu_mul,Xfpu_div,Xfpu_sqrt" "Xfpu") + + +(define_insn_reservation "fp-default" 2 + (and (and + (eq_attr "type" "fp,fpsimple") + (eq_attr "fp_type" "fp_default")) + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2") + +(define_insn_reservation "fp-compare" 6 + (and (eq_attr "type" "fpcompare") ;; Inconsistent naming + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_addsub") + +(define_insn_reservation "fp-addsub-s" 14 + (and (and + (eq_attr "type" "fp,fpsimple") + (eq_attr "fp_type" "fp_addsub_s")) + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_addsub") + +(define_insn_reservation "fp-addsub-d" 18 + (and (and + (eq_attr "type" "fp,fpsimple") + (eq_attr "fp_type" "fp_addsub_d")) + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_addsub") + +(define_insn_reservation "fp-mul-s" 12 + (and (and + (eq_attr "type" "fp") + (eq_attr "fp_type" "fp_mul_s")) + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_mul") + +(define_insn_reservation "fp-mul-d" 16 ;; Actually 28. Long latencies are killing the automaton formation. Need to figure out why. + (and (and + (eq_attr "type" "fp") + (eq_attr "fp_type" "fp_mul_d")) + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_mul") + +(define_insn_reservation "fp-div-s" 24 ;; Actually 34 + (and (eq_attr "type" "sdiv") ;; Inconsistent attr naming + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_div*10") ;; Unpipelined + +(define_insn_reservation "fp-div-d" 34 ;; Actually 116 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc405")) ;; Inconsistent attr naming + "Xfpu_issue*2,Xfpu_div*10") ;; Unpipelined + +(define_insn_reservation "fp-maddsub-s" 24 + (and (and + (eq_attr "type" "fp") + (eq_attr "fp_type" "fp_maddsub_s")) + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_mul,nothing*7,Xfpu_addsub") + +(define_insn_reservation "fp-maddsub-d" 34 ;; Actually 42 + (and (and + (eq_attr "type" "dmul") ;; Inconsistent attr naming + (eq_attr "fp_type" "fp_maddsub_d")) + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_mul,nothing*7,Xfpu_addsub") + +(define_insn_reservation "fp-load" 10 ;; FIXME. Is double/single precision the same ? + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*10") + +(define_insn_reservation "fp-store" 4 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*4") + +(define_insn_reservation "fp-sqrt-s" 24 ;; Actually 56 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_sqrt*10") ;; Unpipelined + + +(define_insn_reservation "fp-sqrt-d" 34 ;; Actually 116 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppc405")) + "Xfpu_issue*2,Xfpu_sqrt*10") ;; Unpipelined + diff --git a/gcc/config/powerpcspe/xilinx.h b/gcc/config/powerpcspe/xilinx.h new file mode 100644 index 000000000000..3f1c71d175bd --- /dev/null +++ b/gcc/config/powerpcspe/xilinx.h @@ -0,0 +1,47 @@ +/* Support for GCC on Xilinx embedded PowerPC systems + Copyright (C) 2008-2017 Free Software Foundation, Inc. + Contributed by Michael Eager, eager@eagercon.com + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* Set defaults for Xilinx embedded target boards. */ + +#undef CPP_SPEC +#define CPP_SPEC "\ +-mxilinx-fpu \ +%{mfpu=sp_lite: -DHAVE_XFPU_SP_LITE} \ +%{mfpu=sp_full: -DHAVE_XFPU_SP_FULL} \ +%{mfpu=dp_lite: -DHAVE_XFPU_DP_LITE} \ +%{mfpu=dp_full: -DHAVE_XFPU_DP_FULL} \ +%{mfpu=*: -DHAVE_XFPU}" + +#undef LIB_DEFAULT_SPEC +#define LIB_DEFAULT_SPEC "\ +%{!nostdlib: --start-group -lxil -lc -lm --end-group \ +%{mppcperflib: %{mfpu=*: -lppcstr405 -lgcc} \ +%{!mfpu=*: -lppcstr405 -lppcfp -lgcc}} \ +%{!mppcperflib: -lgcc}}" + +#undef STARTFILE_DEFAULT_SPEC +#define STARTFILE_DEFAULT_SPEC "\ +ecrti.o%s %{pg: %{!mno-clearbss: xil-pgcrt0.o%s} \ +%{mno-clearbss: xil-sim-pgcrt0.o%s}} \ +%{!pg: %{!mno-clearbss: xil-crt0.o%s} \ +%{mno-clearbss: xil-sim-crt0.o%s}} crtbegin.o%s" + +#undef LINK_START_DEFAULT_SPEC +#define LINK_START_DEFAULT_SPEC "-T xilinx.ld%s" diff --git a/gcc/config/powerpcspe/xilinx.opt b/gcc/config/powerpcspe/xilinx.opt new file mode 100644 index 000000000000..29aaf51f81ea --- /dev/null +++ b/gcc/config/powerpcspe/xilinx.opt @@ -0,0 +1,32 @@ +; Xilinx embedded PowerPC options. + +; Copyright (C) 2011-2017 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +mno-clearbss +Target RejectNegative + +mppcperflib +Target RejectNegative + +; This comment is to ensure we retain the blank line above. -- GitLab