diff --git a/gcc/config.gcc b/gcc/config.gcc index 9c397156868ae686e91be611f6a2c29b6667244c..0782cbc6e9154eb0aa5130e33097260ca4e419ea 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4529,7 +4529,7 @@ case "${target}" in for which in arch tune; do eval "val=\$with_$which" case ${val} in - "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a) + "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030) # OK ;; *) diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 0b5610bbcbe628998531210aae3778db90cfe02e..aa1294cf48fdba73e00152a02bfd6db19e699708 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -75,7 +75,7 @@ extern unsigned int gcn_local_sym_hash (const char *name); supported for gcn. */ #define GOMP_SELF_SPECS "" -#define NO_XNACK "!march=*:;march=fiji:;" +#define NO_XNACK "!march=*:;march=fiji:;march=gfx1030:;" #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;" /* In HSACOv4 no attribute setting means the binary supports "any" hardware @@ -92,6 +92,7 @@ extern unsigned int gcn_local_sym_hash (const char *name); "%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \ "%{" NO_XNACK XNACKOPT "}" \ "%{" NO_SRAM_ECC SRAMOPT "} " \ + "%{march=gfx1030:-mattr=+wavefrontsize64} " \ "-filetype=obj" #define LINK_SPEC "--pie --export-dynamic" #define LIB_SPEC "-lc" diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index f780a7c17fefb5ae01b57c692e08a3c360396f7e..b4f494d868cd7cd1b971e57f0a9b58f697a36ef4 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -24,7 +24,8 @@ enum processor_type PROCESSOR_VEGA10, // gfx900 PROCESSOR_VEGA20, // gfx906 PROCESSOR_GFX908, - PROCESSOR_GFX90a + PROCESSOR_GFX90a, + PROCESSOR_GFX1030 }; #define TARGET_FIJI (gcn_arch == PROCESSOR_FIJI) @@ -32,12 +33,14 @@ enum processor_type #define TARGET_VEGA20 (gcn_arch == PROCESSOR_VEGA20) #define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908) #define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a) +#define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030) /* Set in gcn_option_override. */ extern enum gcn_isa { ISA_UNKNOWN, ISA_GCN3, ISA_GCN5, + ISA_RDNA2, ISA_CDNA1, ISA_CDNA2 } gcn_isa; @@ -50,6 +53,8 @@ extern enum gcn_isa { #define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1) #define TARGET_CDNA2 (gcn_isa == ISA_CDNA2) #define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2) +#define TARGET_RDNA2 (gcn_isa == ISA_RDNA2) + #define TARGET_M0_LDS_LIMIT (TARGET_GCN3) #define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 32b170e852263b0ad5d2ebeb40a8370ddd6e2102..c128c819c8939cebe2491ee949acd11cc737eef2 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1412,7 +1412,7 @@ [(match_operand:V_noHI 1 "register_operand" " v") (match_operand:SI 2 "const_int_operand" " n")] UNSPEC_MOV_DPP_SHR))] - "" + "!TARGET_RDNA2" { return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32", UNSPEC_MOV_DPP_SHR, INTVAL (operands[2])); @@ -1548,7 +1548,7 @@ (match_dup 1)) (match_dup 1))))] "" - "v_addc%^_u32\t%0, %4, %2, %1, %3" + "{v_addc%^_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3" [(set_attr "type" "vop2,vop3b") (set_attr "length" "4,8")]) @@ -1613,10 +1613,10 @@ (match_dup 1))))] "" "@ - v_subb%^_u32\t%0, %4, %1, %2, %3 - v_subb%^_u32\t%0, %4, %1, %2, %3 - v_subbrev%^_u32\t%0, %4, %2, %1, %3 - v_subbrev%^_u32\t%0, %4, %2, %1, %3" + {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3 + {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3 + {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3 + {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3" [(set_attr "type" "vop2,vop3b,vop2,vop3b") (set_attr "length" "4,8,4,8")]) @@ -3667,11 +3667,11 @@ ;; {{{ Vector comparison/merge (define_insn "vec_cmp<mode>di" - [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") + [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e") (match_operator:DI 1 "gcn_fp_compare_operator" - [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") - (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])) - (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))] + [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B") + (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])) + (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))] "" "@ v_cmp%E1\tvcc, %2, %3 @@ -3679,9 +3679,12 @@ v_cmpx%E1\tvcc, %2, %3 v_cmpx%E1\tvcc, %2, %3 v_cmp%E1\t%0, %2, %3 - v_cmp%E1\t%0, %2, %3" - [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") - (set_attr "length" "4,8,4,8,8,8")]) + v_cmp%E1\t%0, %2, %3 + v_cmpx%E1\t%2, %3 + v_cmpx%E1\t%2, %3" + [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "length" "4,8,4,8,8,8,4,8") + (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) (define_expand "vec_cmpu<mode>di" [(match_operand:DI 0 "register_operand") @@ -3716,13 +3719,13 @@ }) (define_insn "vec_cmp<mode>di_exec" - [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") + [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e") (and:DI (match_operator 1 "gcn_fp_compare_operator" - [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") - (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]) - (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e"))) - (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))] + [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B") + (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]) + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e"))) + (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))] "" "@ v_cmp%E1\tvcc, %2, %3 @@ -3730,9 +3733,12 @@ v_cmpx%E1\tvcc, %2, %3 v_cmpx%E1\tvcc, %2, %3 v_cmp%E1\t%0, %2, %3 - v_cmp%E1\t%0, %2, %3" - [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") - (set_attr "length" "4,8,4,8,8,8")]) + v_cmp%E1\t%0, %2, %3 + v_cmpx%E1\t%2, %3 + v_cmpx%E1\t%2, %3" + [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "length" "4,8,4,8,8,8,4,8") + (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) (define_expand "vec_cmpu<mode>di_exec" [(match_operand:DI 0 "register_operand") @@ -3772,42 +3778,48 @@ }) (define_insn "vec_cmp<mode>di_dup" - [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") + [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e") (match_operator:DI 1 "gcn_fp_compare_operator" [(vec_duplicate:V_noQI (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" - " Sv, B,Sv,B, A")) - (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])) - (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))] + " Sv, B,Sv,B, A,Sv,B")) + (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])) + (clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))] "" "@ v_cmp%E1\tvcc, %2, %3 v_cmp%E1\tvcc, %2, %3 v_cmpx%E1\tvcc, %2, %3 v_cmpx%E1\tvcc, %2, %3 - v_cmp%E1\t%0, %2, %3" - [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") - (set_attr "length" "4,8,4,8,8")]) + v_cmp%E1\t%0, %2, %3 + v_cmpx%E1\t%2, %3 + v_cmpx%E1\t%2, %3" + [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "length" "4,8,4,8,8,4,8") + (set_attr "rdna" "*,*,no,no,*,yes,yes")]) (define_insn "vec_cmp<mode>di_dup_exec" - [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") + [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e") (and:DI (match_operator 1 "gcn_fp_compare_operator" [(vec_duplicate:V_noQI (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" - " Sv, B,Sv,B, A")) - (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]) - (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e"))) - (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))] + " Sv, B,Sv,B, A,Sv,B")) + (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]) + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e"))) + (clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))] "" "@ v_cmp%E1\tvcc, %2, %3 v_cmp%E1\tvcc, %2, %3 v_cmpx%E1\tvcc, %2, %3 v_cmpx%E1\tvcc, %2, %3 - v_cmp%E1\t%0, %2, %3" - [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") - (set_attr "length" "4,8,4,8,8")]) + v_cmp%E1\t%0, %2, %3 + v_cmpx%E1\t%2, %3 + v_cmpx%E1\t%2, %3" + [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "length" "4,8,4,8,8,4,8") + (set_attr "rdna" "*,*,no,no,*,yes,yes")]) (define_expand "vcond_mask_<mode>di" [(parallel @@ -4176,7 +4188,7 @@ (unspec:<SCALAR_MODE> [(match_operand:V_ALL 1 "register_operand")] REDUC_UNSPEC))] - "" + "!TARGET_RDNA2" { rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1], <reduc_unspec>); @@ -4229,7 +4241,8 @@ REDUC_UNSPEC))] ; GCN3 requires a carry out, GCN5 not "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode) - && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)" + && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR) + && !TARGET_RDNA2" { return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>", <reduc_unspec>, INTVAL (operands[3])); @@ -4274,7 +4287,7 @@ (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] - "" + "!TARGET_RDNA2" { return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32", UNSPEC_PLUS_CARRY_DPP_SHR, @@ -4292,7 +4305,7 @@ (match_operand:DI 4 "register_operand" "cV")] UNSPEC_PLUS_CARRY_IN_DPP_SHR)) (clobber (reg:DI VCC_REG))] - "" + "!TARGET_RDNA2" { return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32", UNSPEC_PLUS_CARRY_IN_DPP_SHR, diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index ef3b6472a52d693f8c9427a634a4779a5b202ef7..6f85f55803c811621f216b3fc127c4c8d74af852 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -136,6 +136,7 @@ gcn_option_override (void) : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5 : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1 : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2 + : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2 : ISA_UNKNOWN); gcc_assert (gcn_isa != ISA_UNKNOWN); @@ -1616,6 +1617,7 @@ gcn_global_address_p (rtx addr) { rtx base = XEXP (addr, 0); rtx offset = XEXP (addr, 1); + int offsetbits = (TARGET_RDNA2 ? 11 : 12); bool immediate_p = (CONST_INT_P (offset) && INTVAL (offset) >= -(1 << 12) && INTVAL (offset) < (1 << 12)); @@ -1748,10 +1750,11 @@ gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict, rtx base = XEXP (x, 0); rtx offset = XEXP (x, 1); + int offsetbits = (TARGET_RDNA2 ? 11 : 12); bool immediate_p = (GET_CODE (offset) == CONST_INT - /* Signed 13-bit immediate. */ - && INTVAL (offset) >= -(1 << 12) - && INTVAL (offset) < (1 << 12) + /* Signed 12/13-bit immediate. */ + && INTVAL (offset) >= -(1 << offsetbits) + && INTVAL (offset) < (1 << offsetbits) /* The low bits of the offset are ignored, even when they're meant to realign the pointer. */ && !(INTVAL (offset) & 0x3)); @@ -3029,6 +3032,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait, return gcn_arch == PROCESSOR_GFX908; if (strcmp (name, "gfx90a") == 0) return gcn_arch == PROCESSOR_GFX90a; + if (strcmp (name, "gfx1030") == 0) + return gcn_arch == PROCESSOR_GFX1030; return 0; default: gcc_unreachable (); @@ -3610,9 +3615,11 @@ gcn_expand_epilogue (void) set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT); emit_move_insn (kernarg_reg, retptr_mem); - rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg); - rtx scalar_retval = gen_rtx_REG (SImode, FIRST_PARM_REG); - set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT); + rtx retval_addr = gen_rtx_REG (DImode, FIRST_VPARM_REG); + emit_move_insn (retval_addr, kernarg_reg); + rtx retval_mem = gen_rtx_MEM (SImode, retval_addr); + rtx scalar_retval = gen_rtx_REG (SImode, FIRST_VPARM_REG + 2); + set_mem_addr_space (retval_mem, ADDR_SPACE_FLAT); emit_move_insn (scalar_retval, gen_rtx_REG (SImode, RETURN_VALUE_REG)); emit_move_insn (retval_mem, scalar_retval); } @@ -6454,6 +6461,11 @@ output_file_start (void) case PROCESSOR_GFX90a: cpu = "gfx90a"; break; + case PROCESSOR_GFX1030: + cpu = "gfx1030"; + xnack = ""; + sram_ecc = ""; + break; default: gcc_unreachable (); } diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h index 4ff9a5d4d12c21c16aac6fe09376f651e9db760e..6372f49d379204c19530bbd94fc6db9c8b2f0a0b 100644 --- a/gcc/config/gcn/gcn.h +++ b/gcc/config/gcn/gcn.h @@ -28,6 +28,8 @@ builtin_define ("__CDNA1__"); \ else if (TARGET_CDNA2) \ builtin_define ("__CDNA2__"); \ + else if (TARGET_RDNA2) \ + builtin_define ("__RDNA2__"); \ if (TARGET_FIJI) \ { \ builtin_define ("__fiji__"); \ @@ -43,6 +45,8 @@ builtin_define ("__gfx90a__"); \ } while (0) +#define ASSEMBLER_DIALECT (TARGET_RDNA2 ? 1 : 0) + /* Support for a compile-time default architecture and tuning. The rules are: --with-arch is ignored if -march is specified. diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 30fe9e34a35f24a05b102758c41bb4129d98769c..a3d8beefd6d97bb7c0418fa6054fbf7d129477e3 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -285,9 +285,16 @@ ; Disable alternatives that only apply to specific ISA variants. (define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3")) +(define_attr "rdna" "any,no,yes" (const_string "any")) (define_attr "enabled" "" - (cond [(eq_attr "gcn_version" "gcn3") (const_int 1) + (cond [(and (eq_attr "rdna" "no") + (ne (symbol_ref "TARGET_RDNA2") (const_int 0))) + (const_int 0) + (and (eq_attr "rdna" "yes") + (eq (symbol_ref "TARGET_RDNA2") (const_int 0))) + (const_int 0) + (eq_attr "gcn_version" "gcn3") (const_int 1) (and (eq_attr "gcn_version" "gcn5") (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0))) (const_int 1)] @@ -812,7 +819,7 @@ if (cfun && cfun->machine && cfun->machine->normal_function) return "s_setpc_b64\ts[18:19]"; else - return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm"; + return "s_waitcnt\tlgkmcnt(0)\;s_endpgm"; } [(set_attr "type" "sop1") (set_attr "length" "12")]) @@ -1179,7 +1186,7 @@ "" "@ s_addc_u32\t%0, %1, %2 - v_addc%^_u32\t%0, vcc, %2, %1, vcc" + {v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, %2, %1, vcc" [(set_attr "type" "sop2,vop2") (set_attr "length" "8,4")]) @@ -1195,7 +1202,7 @@ "" "@ s_addc_u32\t%0, %1, 0 - v_addc%^_u32\t%0, vcc, 0, %1, vcc" + {v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, 0, %1, vcc" [(set_attr "type" "sop2,vop2") (set_attr "length" "4")]) @@ -1225,7 +1232,8 @@ gen_rtx_REG (DImode, CC_SAVE_REG) }; output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands); - output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands); + output_asm_insn ("{v_addc%^_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3", + new_operands); } else { @@ -1363,7 +1371,7 @@ s_mul_i32\t%0, %1, %2 s_mulk_i32\t%0, %2 s_mul_i32\t%0, %1, %2 - v_mul_lo_i32\t%0, %1, %2" + v_mul_lo_u32\t%0, %1, %2" [(set_attr "type" "sop2,sopk,sop2,vop3a") (set_attr "length" "4,4,8,4")]) @@ -1885,7 +1893,7 @@ [(set (match_operand:BLK 0) (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] "" - "buffer_wbinvl1_vol" + "{buffer_wbinvl1_vol|buffer_gl0_inv}" [(set_attr "type" "mubuf") (set_attr "length" "4")]) @@ -2004,6 +2012,7 @@ (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))] "" { + /* FIXME: RDNA cache instructions may be too conservative? */ switch (INTVAL (operands[2])) { case MEMMODEL_RELAXED: @@ -2026,11 +2035,17 @@ return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;" "s_dcache_wb_vol"; case 1: - return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" - "buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" + "buffer_gl0_inv" + : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" + "buffer_wbinvl1_vol"); case 2: - return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" - "buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" + "buffer_gl0_inv" + : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" + "buffer_wbinvl1_vol"); } break; case MEMMODEL_ACQ_REL: @@ -2042,11 +2057,17 @@ return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: - return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;" - "s_waitcnt\t0\;buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;" + "s_waitcnt\t0\;buffer_gl0_inv" + : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;" + "s_waitcnt\t0\;buffer_wbinvl1_vol"); case 2: - return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;" - "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" + : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"); } break; } @@ -2054,7 +2075,8 @@ } [(set_attr "type" "smem,flat,flat") (set_attr "length" "20") - (set_attr "gcn_version" "gcn5,*,gcn5")]) + (set_attr "gcn_version" "gcn5,*,gcn5") + (set_attr "rdna" "no,*,*")]) (define_insn "atomic_store<mode>" [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM") @@ -2084,9 +2106,13 @@ case 0: return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc"; case 1: - return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc" + : "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"); case 2: - return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc" + : "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"); } break; case MEMMODEL_ACQ_REL: @@ -2098,11 +2124,17 @@ return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: - return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" - "s_waitcnt\t0\;buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;" + "s_waitcnt\t0\;buffer_gl0_inv" + : "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" + "s_waitcnt\t0\;buffer_wbinvl1_vol"); case 2: - return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" - "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" + : "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"); } break; } @@ -2110,7 +2142,8 @@ } [(set_attr "type" "smem,flat,flat") (set_attr "length" "20") - (set_attr "gcn_version" "gcn5,*,gcn5")]) + (set_attr "gcn_version" "gcn5,*,gcn5") + (set_attr "rdna" "no,*,*")]) (define_insn "atomic_exchange<mode>" [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") @@ -2145,11 +2178,17 @@ return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;" "s_dcache_wb_vol\;s_dcache_inv_vol"; case 1: - return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;" - "buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;" + "buffer_gl0_inv" + : "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;" + "buffer_wbinvl1_vol"); case 2: - return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" - "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" + : "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"); } break; case MEMMODEL_RELEASE: @@ -2160,12 +2199,19 @@ return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" "s_waitcnt\tlgkmcnt(0)"; case 1: - return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" - "s_waitcnt\t0"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" + "s_waitcnt\t0" + : "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" + "s_waitcnt\t0"); case 2: - return "buffer_wbinvl1_vol\;" - "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" - "s_waitcnt\tvmcnt(0)"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;" + "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" + "s_waitcnt\tvmcnt(0)" + : "buffer_wbinvl1_vol\;" + "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" + "s_waitcnt\tvmcnt(0)"); } break; case MEMMODEL_ACQ_REL: @@ -2177,12 +2223,19 @@ return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: - return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" - "s_waitcnt\t0\;buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" + "s_waitcnt\t0\;buffer_gl0_inv" + : "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" + "s_waitcnt\t0\;buffer_wbinvl1_vol"); case 2: - return "buffer_wbinvl1_vol\;" - "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" - "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; + return (TARGET_RDNA2 + ? "buffer_gl0_inv\;" + "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" + : "buffer_wbinvl1_vol\;" + "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" + "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"); } break; } @@ -2190,7 +2243,8 @@ } [(set_attr "type" "smem,flat,flat") (set_attr "length" "20") - (set_attr "gcn_version" "gcn5,*,gcn5")]) + (set_attr "gcn_version" "gcn5,*,gcn5") + (set_attr "rdna" "no,*,*")]) ;; }}} ;; {{{ OpenACC / OpenMP diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 36c2b53528440f71d1f5319a8df0314985f8ec96..7a852c51c84c3246f87b8b4c39edc172f73caf03 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -40,6 +40,9 @@ Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908) EnumValue Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a) +EnumValue +Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030) + march= Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI) Specify the name of the target GPU. diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index 8b608bf024efd07e39b8bd4dc1e3b90895be45af..f6d56b798e149b391c0d2f4757bd78689722810d 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -57,6 +57,8 @@ #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30 #undef EF_AMDGPU_MACH_AMDGCN_GFX90a #define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f +#undef EF_AMDGPU_MACH_AMDGCN_GFX1030 +#define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36 #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */ #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000 @@ -942,6 +944,8 @@ main (int argc, char **argv) elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908; else if (strcmp (argv[i], "-march=gfx90a") == 0) elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a; + else if (strcmp (argv[i], "-march=gfx1030") == 0) + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030; #define STR "-mstack-size=" else if (startswith (argv[i], STR)) gcn_stack_size = atoi (argv[i] + strlen (STR)); diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device index 538624f7ec7d77a1c94e399f85287f282740a905..b1cd998a8b1c4b210107df40e362a37cdcf82577 100644 --- a/gcc/config/gcn/t-omp-device +++ b/gcc/config/gcn/t-omp-device @@ -1,4 +1,4 @@ omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc echo kind: gpu > $@ echo arch: amdgcn gcn >> $@ - echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a >> $@ + echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 >> $@ diff --git a/libgcc/config/gcn/amdgcn_veclib.h b/libgcc/config/gcn/amdgcn_veclib.h index 15ea20bcd557d8b4e765f422bec49a192a1cb899..88df5c7df914a46c707fb1dd63999608d0921f41 100644 --- a/libgcc/config/gcn/amdgcn_veclib.h +++ b/libgcc/config/gcn/amdgcn_veclib.h @@ -229,7 +229,8 @@ do { \ #if defined (__GCN3__) || defined (__GCN5__) \ - || defined (__CDNA1__) || defined (__CDNA2__) + || defined (__CDNA1__) || defined (__CDNA2__) \ + || defined (__RDNA2__) #define CDNA3_PLUS 0 #else #define CDNA3_PLUS 1 diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index ef22d48da795de670c307f32f6922f00be0e9bfc..4328d3de14e368a42153fc9ef2d511fd7bc228ad 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -377,7 +377,8 @@ typedef enum { EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c, EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030, - EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f + EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f, + EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036 } EF_AMDGPU_MACH; const static int EF_AMDGPU_MACH_MASK = 0x000000ff; @@ -1633,6 +1634,7 @@ const static char *gcn_gfx900_s = "gfx900"; const static char *gcn_gfx906_s = "gfx906"; const static char *gcn_gfx908_s = "gfx908"; const static char *gcn_gfx90a_s = "gfx90a"; +const static char *gcn_gfx1030_s = "gfx1030"; const static int gcn_isa_name_len = 6; /* Returns the name that the HSA runtime uses for the ISA or NULL if we do not @@ -1652,6 +1654,8 @@ isa_hsa_name (int isa) { return gcn_gfx908_s; case EF_AMDGPU_MACH_AMDGCN_GFX90a: return gcn_gfx90a_s; + case EF_AMDGPU_MACH_AMDGCN_GFX1030: + return gcn_gfx1030_s; } return NULL; } @@ -1691,6 +1695,9 @@ isa_code(const char *isa) { if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len)) return EF_AMDGPU_MACH_AMDGCN_GFX90a; + if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len)) + return EF_AMDGPU_MACH_AMDGCN_GFX1030; + return -1; } diff --git a/libgomp/team.c b/libgomp/team.c index b4fd6f2704c394fab6bd7ffdd0f08267608de42c..0edc6e5bf2875c1661d73022eef2f5d9cb5c406d 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -253,8 +253,7 @@ gomp_free_pool_helper (void *thread_pool) #elif defined(__nvptx__) asm ("exit;"); #elif defined(__AMDGCN__) - asm ("s_dcache_wb\n\t" - "s_endpgm"); + asm ("s_endpgm"); #else #error gomp_free_pool_helper must terminate the thread #endif