diff --git a/gcc/Makefile.in b/gcc/Makefile.in index abdeb8f563d6c8265f77b3d905214d3ba4f5a7c4..51c25b06e081cad15ab25a91779967d026c54f29 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3052,6 +3052,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/tree-ssa-operands.h \ $(srcdir)/tree-profile.cc $(srcdir)/tree-nested.cc \ $(srcdir)/omp-offload.h \ + $(srcdir)/omp-general.h \ $(srcdir)/omp-general.cc \ $(srcdir)/omp-low.cc \ $(srcdir)/targhooks.cc $(out_file) $(srcdir)/passes.cc \ @@ -3078,7 +3079,6 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/ipa-strub.cc \ $(srcdir)/internal-fn.h \ $(srcdir)/calls.cc \ - $(srcdir)/omp-general.h \ $(srcdir)/analyzer/analyzer-language.cc \ @all_gtfiles@ diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index d2f45912cc43f2ba67b207dea21318cda2ef61d8..36c8f79320e0ef11f3085a30d2c526a1fecfbadf 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -26933,7 +26933,7 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms) ctx = c_parser_omp_context_selector_specification (parser, parms); if (ctx == error_mark_node) goto fail; - ctx = omp_check_context_selector (match_loc, ctx); + ctx = omp_check_context_selector (match_loc, ctx, false); if (ctx != error_mark_node && variant != error_mark_node) { if (TREE_CODE (variant) != FUNCTION_DECL) @@ -27195,7 +27195,7 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms) tree construct = omp_get_context_selector_list (ctx, OMP_TRAIT_SET_CONSTRUCT); omp_mark_declare_variant (match_loc, variant, construct); - if (omp_context_selector_matches (ctx)) + if (omp_context_selector_matches (ctx, NULL_TREE, false)) { tree attr = tree_cons (get_identifier ("omp declare variant base"), build_tree_list (variant, ctx), diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 123cebbbba9b03511e1d90d1d02732d3c4d14309..464d33f213cb30c8f77f9f032792463ef44288e7 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -904,6 +904,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node ipcp_clone (false), declare_variant_alt (false), calls_declare_variant_alt (false), gc_candidate (false), called_by_ifunc_resolver (false), + has_omp_variant_constructs (false), m_uid (uid), m_summary_id (-1) {} @@ -1505,6 +1506,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node unsigned gc_candidate : 1; /* Set if the function is called by an IFUNC resolver. */ unsigned called_by_ifunc_resolver : 1; + /* True if the function contains unresolved OpenMP metadirectives. */ + unsigned has_omp_variant_constructs : 1; private: /* Unique id of the node. */ diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index cff7776241a87162beefa360a98fefdaeb86dbbe..cc46b6e39cc79026089e6d553ff8ae92cea9346d 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -389,6 +389,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, prof_count = count.combine_with_ipa_count (prof_count); new_node->count = prof_count; new_node->calls_declare_variant_alt = this->calls_declare_variant_alt; + new_node->has_omp_variant_constructs = this->has_omp_variant_constructs; /* Update IPA profile. Local profiles need no updating in original. */ if (update_original) diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 5c6a4996a89edcdc5a0fcb04668507872e635b2b..90e9a7fc7cb56baa321aef8fccb5960a7b09e954 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -8626,7 +8626,7 @@ omp_declare_variant_finalize_one (tree decl, tree attr) tree construct = omp_get_context_selector_list (ctx, OMP_TRAIT_SET_CONSTRUCT); omp_mark_declare_variant (match_loc, variant, construct); - if (!omp_context_selector_matches (ctx)) + if (!omp_context_selector_matches (ctx, NULL_TREE, false)) return true; TREE_PURPOSE (TREE_VALUE (attr)) = variant; diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 9600b14091642c47d50265ce1f0fbb47c610e04d..a91c7aae3757453867e9705cc1e4adbcf7a3b154 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -50195,7 +50195,7 @@ cp_finish_omp_declare_variant (cp_parser *parser, cp_token *pragma_tok, ctx = cp_parser_omp_context_selector_specification (parser, true); if (ctx == error_mark_node) goto fail; - ctx = omp_check_context_selector (match_loc, ctx); + ctx = omp_check_context_selector (match_loc, ctx, false); if (ctx != error_mark_node && variant != error_mark_node) { tree match_loc_node diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 635fcfda356a2ac93793b46957df50d7e956f6ca..2c6192820cc69cf9efe4dbb4e1e2e88b346b2fbf 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -8760,7 +8760,7 @@ gfc_trans_omp_declare_variant (gfc_namespace *ns) continue; } set_selectors = omp_check_context_selector - (gfc_get_location (&odv->where), set_selectors); + (gfc_get_location (&odv->where), set_selectors, false); if (set_selectors != error_mark_node) { if (!variant_proc_sym->attr.implicit_type @@ -8809,7 +8809,8 @@ gfc_trans_omp_declare_variant (gfc_namespace *ns) omp_mark_declare_variant (gfc_get_location (&odv->where), gfc_get_symbol_decl (variant_proc_sym), construct); - if (omp_context_selector_matches (set_selectors)) + if (omp_context_selector_matches (set_selectors, + NULL_TREE, false)) { tree id = get_identifier ("omp declare variant base"); tree variant = gfc_get_symbol_decl (variant_proc_sym); diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 58a9d2a748d6c0716e08cf22fcc874260b6e76f4..7b9bf41e3d3f2c4304ec4c418483ca525c86ddf7 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -120,6 +120,11 @@ tree_associate_condition_with_expr (tree stmt, unsigned uid) /* Hash set of poisoned variables in a bind expr. */ static hash_set<tree> *asan_poisoned_variables = NULL; +/* Hash set of already-resolved calls to OpenMP "declare variant" + functions. A call can resolve to the original function and + we don't want to repeat the resolution multiple times. */ +static hash_set<tree> *omp_resolved_variant_calls = NULL; + enum gimplify_omp_var_data { GOVD_SEEN = 0x000001, @@ -3847,12 +3852,180 @@ maybe_fold_stmt (gimple_stmt_iterator *gsi) return fold_stmt (gsi); } +static tree +expand_late_variant_directive (vec<struct omp_variant> all_candidates, + tree construct_context); + + +/* Helper function for calls to omp_dynamic_cond: find the current + enclosing block in the gimplification context. */ +static tree +find_supercontext (void) +{ + vec<gbind *>stack = gimple_bind_expr_stack (); + for (int i = stack.length () - 1; i >= 0; i++) + { + gbind *b = stack[i]; + if (b->block) + return b->block; + } + return NULL_TREE; +} + + +/* Helper function for gimplify_call_expr: handle "declare variant" + resolution and expansion. Arguments are as for gimplify_call_expr. + If *EXPR_P is unchanged, the return value should be ignored and the + normal gimplify_call_expr handling should be applied. Otherwise GS_OK + is returned if the new *EXPR_P is something that needs to be further + gimplified. */ + +static enum gimplify_status +gimplify_variant_call_expr (tree *expr_p, gimple_seq *pre_p, + fallback_t fallback) +{ + /* If we've already processed this call, stop now. This can happen + if the variant call resolves to the original function, or to + a dynamic conditional that includes the default call to the original + function. */ + gcc_assert (omp_resolved_variant_calls != NULL); + if (omp_resolved_variant_calls->contains (*expr_p)) + return GS_OK; + + tree fndecl = get_callee_fndecl (*expr_p); + tree fnptrtype = TREE_TYPE (CALL_EXPR_FN (*expr_p)); + location_t loc = EXPR_LOCATION (*expr_p); + tree construct_context = omp_get_construct_context (); + vec<struct omp_variant> all_candidates + = omp_declare_variant_candidates (fndecl, construct_context); + gcc_assert (!all_candidates.is_empty ()); + vec<struct omp_variant> candidates + = omp_get_dynamic_candidates (all_candidates, construct_context); + + /* If the variant call could be resolved now, build a nest of COND_EXPRs + if there are dynamic candidates, and/or a new CALL_EXPR for each + candidate call. */ + if (!candidates.is_empty ()) + { + int n = candidates.length (); + tree tail = NULL_TREE; + + for (int i = n - 1; i >= 0; i--) + { + if (tail) + gcc_assert (candidates[i].dynamic_selector); + else + gcc_assert (!candidates[i].dynamic_selector); + if (candidates[i].alternative == fndecl) + { + /* We should only get the original function back as the + default. */ + gcc_assert (!tail); + omp_resolved_variant_calls->add (*expr_p); + tail = *expr_p; + } + else + { + /* For the final static selector, we can re-use the old + CALL_EXPR and just replace the function. Otherwise, + make a copy of it. */ + tree thiscall = tail ? unshare_expr (*expr_p) : *expr_p; + CALL_EXPR_FN (thiscall) = build1 (ADDR_EXPR, fnptrtype, + candidates[i].alternative); + if (!tail) + tail = thiscall; + else + tail = build3 (COND_EXPR, TREE_TYPE (*expr_p), + omp_dynamic_cond (candidates[i].selector, + find_supercontext ()), + thiscall, tail); + } + } + *expr_p = tail; + return GS_OK; + } + + /* If we couldn't resolve the variant call now, expand it into a loop using + a switch and OMP_NEXT_VARIANT for dispatch. The ompdevlow pass will + handle OMP_NEXT_VARIANT expansion. */ + else + { + /* If we need a usable return value, we need a temporary + and an assignment in each alternative. This logic was borrowed + from gimplify_cond_expr. */ + tree type = TREE_TYPE (*expr_p); + bool want_value = (fallback != fb_none && !VOID_TYPE_P (type)); + bool pointerize = false; + tree tmp = NULL_TREE, result = NULL_TREE; + + if (want_value) + { + /* If either an rvalue is ok or we do not require an lvalue, + create the temporary. But we cannot do that if the type is + addressable. */ + if (((fallback & fb_rvalue) || !(fallback & fb_lvalue)) + && !TREE_ADDRESSABLE (type)) + { + tmp = create_tmp_var (type, "iftmp"); + result = tmp; + } + + /* Otherwise, only create and copy references to the values. */ + else + { + pointerize = true; + type = build_pointer_type (type); + tmp = create_tmp_var (type, "iftmp"); + result = build_simple_mem_ref_loc (loc, tmp); + } + } + + /* Preprocess the all_candidates array so that the alternative field of + each element holds the actual function call expression and possible + assignment, instead of just the decl for the variant function. */ + for (unsigned int i = 0; i < all_candidates.length (); i++) + { + tree decl = all_candidates[i].alternative; + tree thiscall; + + /* We need to turn the decl from the candidate into a function + call and possible assignment, gimplify it, and stuff that in + the directive seq of the gomp_variant. */ + if (decl == fndecl) + { + thiscall = *expr_p; + omp_resolved_variant_calls->add (*expr_p); + } + else + { + thiscall = unshare_expr (*expr_p); + CALL_EXPR_FN (thiscall) = build1 (ADDR_EXPR, fnptrtype, decl); + } + if (pointerize) + thiscall = build_fold_addr_expr_loc (loc, thiscall); + if (want_value) + thiscall = build2 (INIT_EXPR, type, tmp, thiscall); + all_candidates[i].alternative = thiscall; + } + + cgraph_node::get (cfun->decl)->has_omp_variant_constructs = 1; + tree expansion = expand_late_variant_directive (all_candidates, + construct_context); + for (tree_stmt_iterator tsi = tsi_start (expansion); !tsi_end_p (tsi); + tsi_delink (&tsi)) + gimplify_stmt (tsi_stmt_ptr (tsi), pre_p); + *expr_p = result; + return GS_ALL_DONE; + } +} + /* Gimplify the CALL_EXPR node *EXPR_P into the GIMPLE sequence PRE_P. WANT_VALUE is true if the result of the call is desired. */ static enum gimplify_status -gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) +gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, fallback_t fallback) { + bool want_value = (fallback != fb_none); tree fndecl, parms, p, fnptrtype; enum gimplify_status ret; int i, nargs; @@ -4029,17 +4202,43 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) /* Remember the original function pointer type. */ fnptrtype = TREE_TYPE (CALL_EXPR_FN (*expr_p)); + /* Handle "declare variant" substitution. */ if (flag_openmp && fndecl && cfun - && (cfun->curr_properties & PROP_gimple_any) == 0) - { - tree variant = omp_resolve_declare_variant (fndecl); - if (variant != fndecl) + && (cfun->curr_properties & PROP_gimple_any) == 0 + && !omp_has_novariants () + && lookup_attribute ("omp declare variant base", + DECL_ATTRIBUTES (fndecl))) + { + tree orig = *expr_p; + enum gimplify_status ret + = gimplify_variant_call_expr (expr_p, pre_p, fallback); + /* This may resolve to the same call, or the call expr with just + the function replaced, in which case we should just continue to + gimplify it normally. Otherwise, if we get something else back, + stop here and re-gimplify the whole replacement expr. */ + if (*expr_p != orig) { - CALL_EXPR_FN (*expr_p) = build1 (ADDR_EXPR, fnptrtype, variant); - variant_substituted_p = true; + /* FIXME: The dispatch construct argument-munging code below + breaks when variant substitution returns a conditional + instead of just a (possibly modified) CALL_EXPR. The "right" + solution is probably to move the argument-munging to + a separate function called from gimplify_variant_call_expr, + where we generate the new calls. That would also be more + satisfying from an engineering perspective as it would get + the large blob of complicated OpenMP-specific code out of + general function gimplification here. See PR 118457. */ + if (omp_dispatch_p + && gimplify_omp_ctxp != NULL + && !gimplify_omp_ctxp->in_call_args) + sorry_at (EXPR_LOCATION (orig), + "late or dynamic variant resolution required for " + "call in a %<dispatch%> construct"); + return ret; } + if (get_callee_fndecl (*expr_p) != fndecl) + variant_substituted_p = true; } /* There is a sequence point before the call, so any side effects in @@ -6741,6 +6940,7 @@ is_gimple_stmt (tree t) case OMP_TASKGROUP: case OMP_ORDERED: case OMP_CRITICAL: + case OMP_METADIRECTIVE: case OMP_TASK: case OMP_TARGET: case OMP_TARGET_DATA: @@ -15192,6 +15392,7 @@ omp_has_nocontext (void) return 0; } +#if 0 /* Return 0 if CONSTRUCTS selectors don't match the OpenMP context, -1 if unknown yet (simd is involved, won't be known until vectorization) and 1 if they do. If SCORES is non-NULL, it should point to an array @@ -15338,6 +15539,78 @@ omp_construct_selector_matches (enum tree_code *constructs, int nconstructs, return simd_seen ? -1 : 1; return 0; } +#endif + +/* Collect a list of traits for enclosing constructs in the current + OpenMP context. The list is in the same format as the trait selector + list of construct trait sets built by the front ends. + + Per the OpenMP specification, the construct trait set includes constructs + up to an enclosing "target" construct. If there is no "target" construct, + then additional things may be added to the construct trait set (simd for + simd clones, additional constructs associated with "declare variant", + the target trait for "declare target"); those are not handled here. + In particular simd clones are not known during gimplification so + matching/scoring of context selectors that might involve them needs + to be deferred to the omp_device_lower pass. */ + +tree +omp_get_construct_context (void) +{ + tree result = NULL_TREE; + for (struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp; ctx;) + { + if (((ctx->region_type & (ORT_TARGET | ORT_IMPLICIT_TARGET | ORT_ACC)) + == ORT_TARGET) + && ctx->code == OMP_TARGET) + { + result = make_trait_selector (OMP_TRAIT_CONSTRUCT_TARGET, + NULL_TREE, NULL_TREE, result); + /* We're not interested in any outer constructs. */ + break; + } + else if ((ctx->region_type & ORT_PARALLEL) && ctx->code == OMP_PARALLEL) + result = make_trait_selector (OMP_TRAIT_CONSTRUCT_PARALLEL, + NULL_TREE, NULL_TREE, result); + else if ((ctx->region_type & ORT_TEAMS) && ctx->code == OMP_TEAMS) + result = make_trait_selector (OMP_TRAIT_CONSTRUCT_TEAMS, + NULL_TREE, NULL_TREE, result); + else if (ctx->region_type == ORT_WORKSHARE && ctx->code == OMP_FOR) + result = make_trait_selector (OMP_TRAIT_CONSTRUCT_FOR, + NULL_TREE, NULL_TREE, result); + else if (ctx->code == OMP_DISPATCH && omp_has_nocontext () != 1) + result = make_trait_selector (OMP_TRAIT_CONSTRUCT_DISPATCH, + NULL_TREE, NULL_TREE, result); + else if (ctx->region_type == ORT_SIMD + && ctx->code == OMP_SIMD + && !omp_find_clause (ctx->clauses, OMP_CLAUSE_BIND)) + { + tree props = NULL_TREE; + tree *last = &props; + for (tree c = ctx->clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SIMDLEN + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INBRANCH + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NOTINBRANCH) + { + *last = unshare_expr (c); + last = &(OMP_CLAUSE_CHAIN (c)); + } + result = make_trait_selector (OMP_TRAIT_CONSTRUCT_SIMD, + NULL_TREE, props, result); + } + else if (ctx->region_type == ORT_WORKSHARE + && ctx->code == OMP_LOOP + && ctx->outer_context + && ctx->outer_context->region_type == ORT_COMBINED_PARALLEL + && ctx->outer_context->outer_context + && ctx->outer_context->outer_context->code == OMP_LOOP + && ctx->outer_context->outer_context->distribute) + ctx = ctx->outer_context->outer_context; + ctx = ctx->outer_context; + } + + return result; +} /* Gimplify OACC_CACHE. */ @@ -18476,7 +18749,15 @@ gimplify_omp_dispatch (tree *expr_p, gimple_seq *pre_p) DECL_NAME (base_fndecl)); } - tree variant_fndecl = omp_resolve_declare_variant (base_fndecl); + tree construct_context = omp_get_construct_context (); + vec<struct omp_variant> all_candidates + = omp_declare_variant_candidates (base_fndecl, construct_context); + gcc_assert (!all_candidates.is_empty ()); + vec<struct omp_variant> candidates + = omp_get_dynamic_candidates (all_candidates, construct_context); + tree variant_fndecl + = (candidates.length () == 1 ? candidates[0].alternative : NULL_TREE); + if (base_fndecl != variant_fndecl && (omp_has_novariants () == -1 || omp_has_nocontext () == -1)) { @@ -18638,6 +18919,228 @@ gimplify_omp_dispatch (tree *expr_p, gimple_seq *pre_p) return GS_ALL_DONE; } +/* Expand a metadirective that has been resolved at gimplification time + into the candidate directive variants in CANDIDATES. */ + +static enum gimplify_status +expand_omp_metadirective (vec<struct omp_variant> &candidates, + gimple_seq *pre_p) +{ + auto_vec<tree> selectors; + auto_vec<tree> directive_labels; + auto_vec<gimple_seq> directive_bodies; + tree body_label = NULL_TREE; + tree end_label = create_artificial_label (UNKNOWN_LOCATION); + + /* Construct bodies for each candidate. */ + for (unsigned i = 0; i < candidates.length(); i++) + { + struct omp_variant &candidate = candidates[i]; + gimple_seq body = NULL; + + selectors.safe_push (omp_dynamic_cond (candidate.selector, + find_supercontext ())); + directive_labels.safe_push (create_artificial_label (UNKNOWN_LOCATION)); + + gimplify_seq_add_stmt (&body, + gimple_build_label (directive_labels.last ())); + if (candidate.alternative != NULL_TREE) + gimplify_stmt (&candidate.alternative, &body); + if (candidate.body != NULL_TREE) + { + if (body_label != NULL_TREE) + gimplify_seq_add_stmt (&body, gimple_build_goto (body_label)); + else + { + body_label = create_artificial_label (UNKNOWN_LOCATION); + gimplify_seq_add_stmt (&body, gimple_build_label (body_label)); + gimplify_stmt (&candidate.body, &body); + } + } + + directive_bodies.safe_push (body); + } + + auto_vec<tree> cond_labels; + + cond_labels.safe_push (NULL_TREE); + for (unsigned i = 1; i < candidates.length () - 1; i++) + cond_labels.safe_push (create_artificial_label (UNKNOWN_LOCATION)); + if (candidates.length () > 1) + cond_labels.safe_push (directive_labels.last ()); + + /* Generate conditionals to test each dynamic selector in turn, executing + the directive candidate if successful. */ + for (unsigned i = 0; i < candidates.length () - 1; i++) + { + if (i != 0) + gimplify_seq_add_stmt (pre_p, gimple_build_label (cond_labels [i])); + + enum gimplify_status ret = gimplify_expr (&selectors[i], pre_p, NULL, + is_gimple_val, fb_rvalue); + if (ret == GS_ERROR || ret == GS_UNHANDLED) + return ret; + + gcond *cond_stmt + = gimple_build_cond_from_tree (selectors[i], directive_labels[i], + cond_labels[i + 1]); + + gimplify_seq_add_stmt (pre_p, cond_stmt); + gimplify_seq_add_seq (pre_p, directive_bodies[i]); + gimplify_seq_add_stmt (pre_p, gimple_build_goto (end_label)); + } + + gimplify_seq_add_seq (pre_p, directive_bodies.last ()); + gimplify_seq_add_stmt (pre_p, gimple_build_label (end_label)); + + return GS_ALL_DONE; +} + +/* Expand a variant construct that requires late resolution in the ompdevlow + pass. It's a bit easier to do this in tree form and then gimplify that, + than to emit gimple. The output is going to look something like: + + switch_var = OMP_NEXT_VARIANT (0, state); + loop_label: + switch (switch_var) + { + case 1: + if (dynamic_selector_predicate_1) + { + alternative_1; + goto end_label; + } + else + { + switch_var = OMP_NEXT_VARIANT (1, state); + goto loop_label; + } + case 2: + ... + } + end_label: + + OMP_NEXT_VARIANT is a magic cookie that is replaced with the switch variable + index of the next variant to try, after late resolution. */ + +static tree +expand_late_variant_directive (vec<struct omp_variant> all_candidates, + tree construct_context) +{ + tree body_label = NULL_TREE; + tree standalone_body = NULL_TREE; + tree loop_label = create_artificial_label (UNKNOWN_LOCATION); + tree end_label = create_artificial_label (UNKNOWN_LOCATION); + tree selectors = make_tree_vec (all_candidates.length ()); + tree switch_body = NULL_TREE; + tree switch_var = create_tmp_var (integer_type_node, "variant"); + tree state = tree_cons (NULL_TREE, construct_context, selectors); + + for (unsigned int i = 0; i < all_candidates.length (); i++) + { + tree selector = all_candidates[i].selector; + tree alternative = all_candidates[i].alternative; + tree body = all_candidates[i].body; + TREE_VEC_ELT (selectors, i) = selector; + + /* Case label. Numbering is 1-based. */ + tree case_val = build_int_cst (integer_type_node, i + 1); + tree case_label + = build_case_label (case_val, NULL_TREE, + create_artificial_label (UNKNOWN_LOCATION)); + append_to_statement_list (case_label, &switch_body); + + /* The actual body of the variant. */ + tree variant_body = NULL_TREE; + append_to_statement_list (alternative, &variant_body); + + if (body != NULL_TREE) + { + if (standalone_body == NULL) + { + standalone_body = body; + body_label = create_artificial_label (UNKNOWN_LOCATION); + } + append_to_statement_list (build1 (GOTO_EXPR, void_type_node, + body_label), + &variant_body); + } + else + append_to_statement_list (build1 (GOTO_EXPR, void_type_node, + end_label), + &variant_body); + + /* If this is a dynamic selector, wrap variant_body with a conditional. + If the predicate doesn't match, the else clause sets switch_var and + jumps to loop_var to try again. */ + tree dynamic_selector = omp_dynamic_cond (selector, find_supercontext ()); + if (dynamic_selector) + { + tree else_stmt = NULL_TREE; + tree next = build2 (OMP_NEXT_VARIANT, integer_type_node, + case_val, state); + append_to_statement_list (build2 (MODIFY_EXPR, integer_type_node, + switch_var, next), + &else_stmt); + append_to_statement_list (build1 (GOTO_EXPR, void_type_node, + loop_label), + &else_stmt); + variant_body = build3 (COND_EXPR, void_type_node, dynamic_selector, + variant_body, else_stmt); + } + append_to_statement_list (variant_body, &switch_body); + } + + /* Put it all together. */ + tree result = NULL_TREE; + tree first = build2 (OMP_NEXT_VARIANT, integer_type_node, integer_zero_node, + state); + append_to_statement_list (build2 (MODIFY_EXPR, integer_type_node, + switch_var, first), + &result); + append_to_statement_list (build1 (LABEL_EXPR, void_type_node, loop_label), + &result); + append_to_statement_list (build2 (SWITCH_EXPR, integer_type_node, + switch_var, switch_body), + &result); + if (standalone_body) + { + append_to_statement_list (build1 (LABEL_EXPR, void_type_node, + body_label), + &result); + append_to_statement_list (standalone_body, &result); + } + append_to_statement_list (build1 (LABEL_EXPR, void_type_node, end_label), + &result); + cgraph_node::get (cfun->decl)->has_omp_variant_constructs = 1; + return result; +} + + +/* Gimplify an OMP_METADIRECTIVE construct. EXPR is the tree version. + The metadirective will be resolved at this point if possible, otherwise + a GIMPLE_OMP_VARIANT_CONSTRUCT is created. */ + +static enum gimplify_status +gimplify_omp_metadirective (tree *expr_p, gimple_seq *pre_p, gimple_seq *, + bool (*) (tree), fallback_t) +{ + /* Try to resolve the metadirective. */ + tree construct_context = omp_get_construct_context (); + vec<struct omp_variant> all_candidates + = omp_metadirective_candidates (*expr_p, construct_context); + vec<struct omp_variant> candidates + = omp_get_dynamic_candidates (all_candidates, construct_context); + if (!candidates.is_empty ()) + return expand_omp_metadirective (candidates, pre_p); + + /* The metadirective cannot be resolved yet. Turn it into a loop with + a nested switch statement, using OMP_NEXT_VARIANT to set the control + variable for the switch. */ + *expr_p = expand_late_variant_directive (all_candidates, construct_context); + return GS_OK; +} + /* Convert the GENERIC expression tree *EXPR_P to GIMPLE. If the expression produces a value to be used as an operand inside a GIMPLE statement, the value will be stored back in *EXPR_P. This value will @@ -18877,7 +19380,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; case CALL_EXPR: - ret = gimplify_call_expr (expr_p, pre_p, fallback != fb_none); + ret = gimplify_call_expr (expr_p, pre_p, fallback); /* C99 code may assign to an array in a structure returned from a function, and this has undefined behavior only on @@ -19585,6 +20088,22 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = gimplify_omp_dispatch (expr_p, pre_p); break; + case OMP_METADIRECTIVE: + ret = gimplify_omp_metadirective (expr_p, pre_p, post_p, + gimple_test_f, fallback); + break; + + case OMP_NEXT_VARIANT: + case OMP_TARGET_DEVICE_MATCHES: + /* These are placeholders for constants. There's nothing to do with + them here but we must mark the containing function as needing + to run the ompdevlow pass to resolve them. Note that + OMP_TARGET_DEVICE_MATCHES, in particular, may be inserted by + the front ends. */ + cgraph_node::get (cfun->decl)->has_omp_variant_constructs = 1; + ret = GS_ALL_DONE; + break; + case TRANSACTION_EXPR: ret = gimplify_transaction (expr_p, pre_p); break; @@ -20403,7 +20922,16 @@ gimplify_function_tree (tree fndecl) if (asan_sanitize_use_after_scope ()) asan_poisoned_variables = new hash_set<tree> (); + if (flag_openmp) + omp_resolved_variant_calls = new hash_set<tree> (); + bind = gimplify_body (fndecl, true); + + if (omp_resolved_variant_calls) + { + delete omp_resolved_variant_calls; + omp_resolved_variant_calls = NULL; + } if (asan_poisoned_variables) { delete asan_poisoned_variables; diff --git a/gcc/gimplify.h b/gcc/gimplify.h index 148fb8ff32b029a0cb0f10ecb52800065ffff42e..b66ceb3ce03a82cf0a805d89abb649e7b8f5e19e 100644 --- a/gcc/gimplify.h +++ b/gcc/gimplify.h @@ -76,7 +76,7 @@ extern void omp_firstprivatize_variable (struct gimplify_omp_ctx *, tree); extern enum gimplify_status gimplify_expr (tree *, gimple_seq *, gimple_seq *, bool (*) (tree), fallback_t); -int omp_construct_selector_matches (enum tree_code *, int, int *); +extern tree omp_get_construct_context (void); int omp_has_novariants (void); extern void gimplify_type_sizes (tree, gimple_seq *); diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index f3098dbc833f586aad4f9bad7d3c1071bdf10338..d6b0b8470fa617837a52beb6a33ffe8b6eded43e 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -552,6 +552,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, bp_pack_value (&bp, node->parallelized_function, 1); bp_pack_value (&bp, node->declare_variant_alt, 1); bp_pack_value (&bp, node->calls_declare_variant_alt, 1); + bp_pack_value (&bp, node->has_omp_variant_constructs, 1); /* Stream thunk info always because we use it in ipa_polymorphic_call_context::ipa_polymorphic_call_context @@ -1260,6 +1261,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data, node->parallelized_function = bp_unpack_value (bp, 1); node->declare_variant_alt = bp_unpack_value (bp, 1); node->calls_declare_variant_alt = bp_unpack_value (bp, 1); + node->has_omp_variant_constructs = bp_unpack_value (bp, 1); *has_thunk_info = bp_unpack_value (bp, 1); node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution, LDPR_NUM_KNOWN); diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index b49e1a89bcc79ace8be4afc0fac23b6366cc6cfd..96de7077e79011f5b483f4dd948408e20df048ed 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -88,6 +88,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_OMP_SET_DEFAULT_DEVICE, "omp_set_default_device", BT_FN_INT, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_INTEROP_INT, "omp_get_interop_int", BT_FN_PTRMODE_PTR_INT_PTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_NUM_DEVICES, "omp_get_num_devices", + BT_FN_INT, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_ATOMIC_START, "GOMP_atomic_start", BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST) diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc index 0653d050db0f99e3eabcd8a5f971bd82b03a508b..0ce3e0b36804d68cda85f80ffd0a05ec682fea9a 100644 --- a/gcc/omp-expand.cc +++ b/gcc/omp-expand.cc @@ -1510,6 +1510,8 @@ expand_omp_taskreg (struct omp_region *region) child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; cgraph_node *node = cgraph_node::get_create (child_fn); node->parallelized_function = 1; + node->has_omp_variant_constructs + |= cgraph_node::get (cfun->decl)->has_omp_variant_constructs; cgraph_node::add_new_function (child_fn, true); bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) @@ -10051,6 +10053,8 @@ expand_omp_target (struct omp_region *region) child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; cgraph_node *node = cgraph_node::get_create (child_fn); node->parallelized_function = 1; + node->has_omp_variant_constructs + |= cgraph_node::get (cfun->decl)->has_omp_variant_constructs; cgraph_node::add_new_function (child_fn, true); /* Add the new function to the offload table. */ diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index 4aab347f7b38f4d1c3bed3df873210a59df30c61..eafa4943073dde3056defa4232e011e2644735b9 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -1121,29 +1121,37 @@ omp_offload_device_kind_arch_isa (const char *props, const char *prop) region or when unsure, return false otherwise. */ static bool -omp_maybe_offloaded (void) +omp_maybe_offloaded (tree construct_context) { + /* No offload targets available? */ if (!ENABLE_OFFLOADING) return false; const char *names = getenv ("OFFLOAD_TARGET_NAMES"); if (names == NULL || *names == '\0') return false; + /* Parsing is too early to tell. */ if (symtab->state == PARSING) /* Maybe. */ return true; + + /* Late resolution of offloaded code happens in the offload compiler, + where it's treated as native code instead. So return false here. */ if (cfun && cfun->after_inlining) return false; + + /* Check if the function is marked for offloading (either explicitly + or via omp_discover_implicit_declare_target). */ if (current_function_decl && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (current_function_decl))) return true; - if (cfun && (cfun->curr_properties & PROP_gimple_any) == 0) - { - enum tree_code construct = OMP_TARGET; - if (omp_construct_selector_matches (&construct, 1, NULL)) - return true; - } + + /* Check for nesting inside a target directive. */ + for (tree ts = construct_context; ts; ts = TREE_CHAIN (ts)) + if (OMP_TS_CODE (ts) == OMP_TRAIT_CONSTRUCT_TARGET) + return true; + return false; } @@ -1287,6 +1295,9 @@ omp_context_name_list_prop (tree prop) case IDENTIFIER_NODE: return IDENTIFIER_POINTER (val); case STRING_CST: +#ifdef ACCEL_COMPILER + return TREE_STRING_POINTER (val); +#else { const char *ret = TREE_STRING_POINTER (val); if ((size_t) TREE_STRING_LENGTH (val) @@ -1294,16 +1305,29 @@ omp_context_name_list_prop (tree prop) return ret; return NULL; } +#endif default: return NULL; } } + +/* Helper function called via walk_tree, to determine if *TP is a + PARM_DECL. */ +static tree +expr_uses_parm_decl (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + if (TREE_CODE (*tp) == PARM_DECL) + return *tp; + return NULL_TREE; +} + /* Diagnose errors in an OpenMP context selector, return CTX if it is correct or error_mark_node otherwise. */ tree -omp_check_context_selector (location_t loc, tree ctx) +omp_check_context_selector (location_t loc, tree ctx, bool metadirective_p) { bool tss_seen[OMP_TRAIT_SET_LAST], ts_seen[OMP_TRAIT_LAST]; @@ -1314,10 +1338,6 @@ omp_check_context_selector (location_t loc, tree ctx) bool saw_any_prop = false; bool saw_other_prop = false; - /* We can parse this, but not handle it yet. */ - if (tss_code == OMP_TRAIT_SET_TARGET_DEVICE) - sorry_at (loc, "%<target_device%> selector set is not supported yet"); - /* Each trait-set-selector-name can only be specified once. */ if (tss_seen[tss_code]) { @@ -1401,6 +1421,35 @@ omp_check_context_selector (location_t loc, tree ctx) } } + /* This restriction is documented in the spec in the section + for the metadirective "when" clause (7.4.1 in the 5.2 spec). */ + if (metadirective_p + && ts_code == OMP_TRAIT_CONSTRUCT_SIMD + && OMP_TS_PROPERTIES (ts)) + { + error_at (loc, + "properties must not be specified for the %<simd%> " + "selector in a %<metadirective%> context-selector"); + return error_mark_node; + } + + /* Reject expressions that reference parameter variables in + "declare variant", as this is not yet implemented. FIXME; + see PR middle-end/113904. */ + if (!metadirective_p + && (ts_code == OMP_TRAIT_DEVICE_NUM + || ts_code == OMP_TRAIT_USER_CONDITION)) + { + tree exp = OMP_TS_PROPERTIES (ts); + if (walk_tree (&exp, expr_uses_parm_decl, NULL, NULL)) + { + sorry_at (loc, + "reference to function parameter in " + "%<declare variant%> dynamic selector expression"); + return error_mark_node; + } + } + /* Check for unknown properties. */ if (omp_ts_map[ts_code].valid_properties == NULL) continue; @@ -1465,6 +1514,9 @@ omp_check_context_selector (location_t loc, tree ctx) return ctx; } +/* Forward declarations. */ +static int omp_context_selector_set_compare (enum omp_tss_code, tree, tree); +static int omp_construct_simd_compare (tree, tree, bool); /* Register VARIANT as variant of some base function marked with #pragma omp declare variant. CONSTRUCT is corresponding list of @@ -1528,6 +1580,102 @@ make_trait_property (tree name, tree value, tree chain) return tree_cons (name, value, chain); } +/* Constructor for metadirective variants. */ +tree +make_omp_metadirective_variant (tree selector, tree directive, tree body) +{ + return build_tree_list (selector, build_tree_list (directive, body)); +} + +/* If the construct selector traits SELECTOR_TRAITS match the corresponding + OpenMP context traits CONTEXT_TRAITS, return true and set *SCORE to the + corresponding score if it is non-null. */ +static bool +omp_construct_traits_match (tree selector_traits, tree context_traits, + score_wide_int *score) +{ + int slength = list_length (selector_traits); + int clength = list_length (context_traits); + + /* Trivial failure: the selector has more traits than the OpenMP context. */ + if (slength > clength) + return false; + + /* There's only one trait in the selector and it doesn't have any properties + to match. */ + if (slength == 1 && !OMP_TS_PROPERTIES (selector_traits)) + { + int p = 0, i = 1; + enum omp_ts_code code = OMP_TS_CODE (selector_traits); + for (tree t = context_traits; t; t = TREE_CHAIN (t), i++) + if (OMP_TS_CODE (t) == code) + p = i; + if (p != 0) + { + if (score) + *score = wi::shifted_mask <score_wide_int> (p - 1, 1, false); + return true; + } + else + return false; + } + + /* Now handle the more general cases. + Both lists of traits are ordered from outside in, corresponding to + the c1, ..., cN numbering for the OpenMP context specified in + in section 7.1 of the OpenMP 5.2 spec. Section 7.3 of the spec says + "if the traits that correspond to the construct selector set appear + multiple times in the OpenMP context, the highest valued subset of + context traits that contains all trait selectors in the same order + are used". This means that we want to start the search for a match + from the end of the list, rather than the beginning. To facilitate + that, transfer the lists to temporary arrays to allow random access + to the elements (their order remains outside in). */ + int i, j; + tree s, c; + + tree *sarray = (tree *) alloca (slength * sizeof (tree)); + for (s = selector_traits, i = 0; s; s = TREE_CHAIN (s), i++) + sarray[i] = s; + + tree *carray = (tree *) alloca (clength * sizeof (tree)); + for (c = context_traits, j = 0; c; c = TREE_CHAIN (c), j++) + carray[j] = c; + + /* The variable "i" indexes the selector, "j" indexes the OpenMP context. + Find the "j" corresponding to each sarray[i]. Note that the spec uses + "p" as the 1-based position, but "j" is zero-based, e.g. equal to + p - 1. */ + score_wide_int result = 0; + j = clength - 1; + for (i = slength - 1; i >= 0; i--) + { + enum omp_ts_code code = OMP_TS_CODE (sarray[i]); + tree props = OMP_TS_PROPERTIES (sarray[i]); + for (; j >= 0; j--) + { + if (OMP_TS_CODE (carray[j]) != code) + continue; + if (code == OMP_TRAIT_CONSTRUCT_SIMD + && props + && omp_construct_simd_compare (props, + OMP_TS_PROPERTIES (carray[j]), + true) > 0) + continue; + break; + } + /* If j >= 0, we have a match for this trait at position j. */ + if (j < 0) + return false; + result += wi::shifted_mask <score_wide_int> (j, 1, false); + j--; + } + if (score) + *score = result; + return true; +} + +#if 0 /* Return 1 if context selector matches the current OpenMP context, 0 if it does not and -1 if it is unknown and need to be determined later. Some properties can be checked right away during parsing (this routine), @@ -1919,181 +2067,742 @@ omp_context_selector_matches (tree ctx) } return ret; } +#endif -/* Compare construct={simd} CLAUSES1 with CLAUSES2, return 0/-1/1/2 as - in omp_context_selector_set_compare. */ +/* Return 1 if context selector CTX matches the current OpenMP context, 0 + if it does not and -1 if it is unknown and need to be determined later. + Some properties can be checked right away during parsing, others need + to wait until the whole TU is parsed, others need to wait until + IPA, others until vectorization. -static int -omp_construct_simd_compare (tree clauses1, tree clauses2) + CONSTRUCT_CONTEXT is a list of construct traits from the OpenMP context, + which must be collected by omp_get_construct_context during + gimplification. It is ignored (and may be null) if this function is + called during parsing. Otherwise COMPLETE_P should indicate whether + CONSTRUCT_CONTEXT is known to be complete and not missing constructs + filled in later during compilation. + + Dynamic properties (which are evaluated at run-time) should always + return 1. */ + +int +omp_context_selector_matches (tree ctx, + tree construct_context, + bool complete_p) { - if (clauses1 == NULL_TREE) - return clauses2 == NULL_TREE ? 0 : -1; - if (clauses2 == NULL_TREE) - return 1; + int ret = 1; + bool maybe_offloaded = omp_maybe_offloaded (construct_context); - int r = 0; - struct declare_variant_simd_data { - bool inbranch, notinbranch; - tree simdlen; - auto_vec<tree,16> data_sharing; - auto_vec<tree,16> aligned; - declare_variant_simd_data () - : inbranch(false), notinbranch(false), simdlen(NULL_TREE) {} - } data[2]; - unsigned int i; - for (i = 0; i < 2; i++) - for (tree c = i ? clauses2 : clauses1; c; c = OMP_CLAUSE_CHAIN (c)) - { - vec<tree> *v; - switch (OMP_CLAUSE_CODE (c)) - { - case OMP_CLAUSE_INBRANCH: - data[i].inbranch = true; - continue; - case OMP_CLAUSE_NOTINBRANCH: - data[i].notinbranch = true; - continue; - case OMP_CLAUSE_SIMDLEN: - data[i].simdlen = OMP_CLAUSE_SIMDLEN_EXPR (c); - continue; - case OMP_CLAUSE_UNIFORM: - case OMP_CLAUSE_LINEAR: - v = &data[i].data_sharing; - break; - case OMP_CLAUSE_ALIGNED: - v = &data[i].aligned; - break; - default: - gcc_unreachable (); - } - unsigned HOST_WIDE_INT argno = tree_to_uhwi (OMP_CLAUSE_DECL (c)); - if (argno >= v->length ()) - v->safe_grow_cleared (argno + 1, true); - (*v)[argno] = c; - } - /* Here, r is used as a bitmask, 2 is set if CLAUSES1 has something - CLAUSES2 doesn't, 1 is set if CLAUSES2 has something CLAUSES1 - doesn't. Thus, r == 3 implies return value 2, r == 1 implies - -1, r == 2 implies 1 and r == 0 implies 0. */ - if (data[0].inbranch != data[1].inbranch) - r |= data[0].inbranch ? 2 : 1; - if (data[0].notinbranch != data[1].notinbranch) - r |= data[0].notinbranch ? 2 : 1; - if (!simple_cst_equal (data[0].simdlen, data[1].simdlen)) - { - if (data[0].simdlen && data[1].simdlen) - return 2; - r |= data[0].simdlen ? 2 : 1; - } - if (data[0].data_sharing.length () < data[1].data_sharing.length () - || data[0].aligned.length () < data[1].aligned.length ()) - r |= 1; - tree c1, c2; - FOR_EACH_VEC_ELT (data[0].data_sharing, i, c1) + for (tree tss = ctx; tss; tss = TREE_CHAIN (tss)) { - c2 = (i < data[1].data_sharing.length () - ? data[1].data_sharing[i] : NULL_TREE); - if ((c1 == NULL_TREE) != (c2 == NULL_TREE)) + enum omp_tss_code set = OMP_TSS_CODE (tss); + tree selectors = OMP_TSS_TRAIT_SELECTORS (tss); + + /* Immediately reject the match if there are any ignored + selectors present. */ + for (tree ts = selectors; ts; ts = TREE_CHAIN (ts)) + if (OMP_TS_CODE (ts) == OMP_TRAIT_INVALID) + return 0; + + if (set == OMP_TRAIT_SET_CONSTRUCT) { - r |= c1 != NULL_TREE ? 2 : 1; + /* We cannot resolve the construct selector during parsing because + the OpenMP context (and CONSTRUCT_CONTEXT) isn't available + until gimplification. */ + if (symtab->state == PARSING) + { + ret = -1; + continue; + } + + gcc_assert (selectors); + + /* During gimplification, CONSTRUCT_CONTEXT is partial, and doesn't + include a construct for "declare simd" that may be added + when there is not an enclosing "target" construct. We might + be able to find a positive match against the partial context + (although we cannot yet score it accurately), but if we can't, + treat it as unknown instead of no match. */ + if (!omp_construct_traits_match (selectors, construct_context, NULL)) + { + /* If we've got a complete context, it's definitely a failed + match. */ + if (complete_p) + return 0; + + /* If the selector doesn't include simd, then we don't have + to worry about whether "declare simd" would cause it to + match; so this is also a definite failure. */ + bool have_simd = false; + for (tree ts = selectors; ts; ts = TREE_CHAIN (ts)) + if (OMP_TS_CODE (ts) == OMP_TRAIT_CONSTRUCT_SIMD) + { + have_simd = true; + break; + } + if (!have_simd) + return 0; + else + ret = -1; + } continue; } - if (c1 == NULL_TREE) - continue; - if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_CODE (c2)) - return 2; - if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_LINEAR) - continue; - if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c1) - != OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c2)) - return 2; - if (OMP_CLAUSE_LINEAR_KIND (c1) != OMP_CLAUSE_LINEAR_KIND (c2)) - return 2; - if (!simple_cst_equal (OMP_CLAUSE_LINEAR_STEP (c1), - OMP_CLAUSE_LINEAR_STEP (c2))) - return 2; - } - FOR_EACH_VEC_ELT (data[0].aligned, i, c1) - { - c2 = i < data[1].aligned.length () ? data[1].aligned[i] : NULL_TREE; - if ((c1 == NULL_TREE) != (c2 == NULL_TREE)) + else if (set == OMP_TRAIT_SET_TARGET_DEVICE) + /* The target_device set is dynamic, so treat it as always + resolvable. However, the current implementation doesn't + support it in a target region, so diagnose that as an error. + FIXME: maybe make this a warning and return 0 instead? */ { - r |= c1 != NULL_TREE ? 2 : 1; + for (tree ts = construct_context; ts; ts = TREE_CHAIN (ts)) + if (OMP_TS_CODE (ts) == OMP_TRAIT_CONSTRUCT_TARGET) + sorry ("%<target_device%> selector set inside of %<target%> " + "directive"); continue; } - if (c1 == NULL_TREE) - continue; - if (!simple_cst_equal (OMP_CLAUSE_ALIGNED_ALIGNMENT (c1), - OMP_CLAUSE_ALIGNED_ALIGNMENT (c2))) - return 2; - } - switch (r) - { - case 0: return 0; - case 1: return -1; - case 2: return 1; - case 3: return 2; - default: gcc_unreachable (); - } -} - -/* Compare properties of selectors SEL from SET other than construct. - CTX1 and CTX2 are the lists of properties to compare. - Return 0/-1/1/2 as in omp_context_selector_set_compare. - Unlike set names or selector names, properties can have duplicates. */ -static int -omp_context_selector_props_compare (enum omp_tss_code set, - enum omp_ts_code sel, - tree ctx1, tree ctx2) -{ - int ret = 0; - for (int pass = 0; pass < 2; pass++) - for (tree p1 = pass ? ctx2 : ctx1; p1; p1 = TREE_CHAIN (p1)) - { - tree p2; - for (p2 = pass ? ctx1 : ctx2; p2; p2 = TREE_CHAIN (p2)) - if (OMP_TP_NAME (p1) == OMP_TP_NAME (p2)) + for (tree ts = selectors; ts; ts = TREE_CHAIN (ts)) + { + enum omp_ts_code sel = OMP_TS_CODE (ts); + switch (sel) { - if (OMP_TP_NAME (p1) == NULL_TREE) - { - if (set == OMP_TRAIT_SET_USER - && sel == OMP_TRAIT_USER_CONDITION) - { - if (integer_zerop (OMP_TP_VALUE (p1)) - != integer_zerop (OMP_TP_VALUE (p2))) - return 2; - break; - } - if (simple_cst_equal (OMP_TP_VALUE (p1), OMP_TP_VALUE (p2))) - break; - } - else if (OMP_TP_NAME (p1) == OMP_TP_NAMELIST_NODE) + case OMP_TRAIT_IMPLEMENTATION_VENDOR: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p)) { - /* Handle string constant vs identifier comparison for - name-list properties. */ - const char *n1 = omp_context_name_list_prop (p1); - const char *n2 = omp_context_name_list_prop (p2); - if (n1 && n2 && !strcmp (n1, n2)) - break; + const char *prop = omp_context_name_list_prop (p); + if (prop == NULL) + return 0; + if (!strcmp (prop, "gnu")) + continue; + return 0; } - else + break; + case OMP_TRAIT_IMPLEMENTATION_EXTENSION: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + /* We don't support any extensions right now. */ + return 0; + break; + case OMP_TRAIT_IMPLEMENTATION_ADMO: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) break; - } - if (p2 == NULL_TREE) - { - int r = pass ? -1 : 1; - if (ret && ret != r) - return 2; - else if (pass) - return r; - else + { - ret = r; - break; + enum omp_memory_order omo + = ((enum omp_memory_order) + (omp_requires_mask + & OMP_REQUIRES_ATOMIC_DEFAULT_MEM_ORDER)); + if (omo == OMP_MEMORY_ORDER_UNSPECIFIED) + { + /* We don't know yet, until end of TU. */ + if (symtab->state == PARSING) + { + ret = -1; + break; + } + else + omo = OMP_MEMORY_ORDER_RELAXED; + } + tree p = OMP_TS_PROPERTIES (ts); + const char *prop = IDENTIFIER_POINTER (OMP_TP_NAME (p)); + if (!strcmp (prop, "relaxed") + && omo != OMP_MEMORY_ORDER_RELAXED) + return 0; + else if (!strcmp (prop, "seq_cst") + && omo != OMP_MEMORY_ORDER_SEQ_CST) + return 0; + else if (!strcmp (prop, "acq_rel") + && omo != OMP_MEMORY_ORDER_ACQ_REL) + return 0; + else if (!strcmp (prop, "acquire") + && omo != OMP_MEMORY_ORDER_ACQUIRE) + return 0; + else if (!strcmp (prop, "release") + && omo != OMP_MEMORY_ORDER_RELEASE) + return 0; } - } - } + break; + case OMP_TRAIT_DEVICE_ARCH: + gcc_assert (set == OMP_TRAIT_SET_DEVICE); + for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p)) + { + const char *arch = omp_context_name_list_prop (p); + if (arch == NULL) + return 0; + int r = 0; + if (targetm.omp.device_kind_arch_isa != NULL) + r = targetm.omp.device_kind_arch_isa (omp_device_arch, + arch); + if (r == 0 || (r == -1 && symtab->state != PARSING)) + { + /* If we are or might be in a target region or + declare target function, need to take into account + also offloading values. + Note that maybe_offloaded is always false in late + resolution; that's handled as native code (the + above case) in the offload compiler instead. */ + if (!maybe_offloaded) + return 0; + if (ENABLE_OFFLOADING) + { + const char *arches = omp_offload_device_arch; + if (omp_offload_device_kind_arch_isa (arches, arch)) + { + ret = -1; + continue; + } + } + return 0; + } + else if (r == -1) + ret = -1; + /* If arch matches on the host, it still might not match + in the offloading region. */ + else if (maybe_offloaded) + ret = -1; + } + break; + case OMP_TRAIT_IMPLEMENTATION_UNIFIED_ADDRESS: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + + if ((omp_requires_mask & OMP_REQUIRES_UNIFIED_ADDRESS) == 0) + { + if (symtab->state == PARSING) + ret = -1; + else + return 0; + } + break; + case OMP_TRAIT_IMPLEMENTATION_UNIFIED_SHARED_MEMORY: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + + if ((omp_requires_mask + & OMP_REQUIRES_UNIFIED_SHARED_MEMORY) == 0) + { + if (symtab->state == PARSING) + ret = -1; + else + return 0; + } + break; + case OMP_TRAIT_IMPLEMENTATION_SELF_MAPS: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + + if ((omp_requires_mask & OMP_REQUIRES_SELF_MAPS) == 0) + { + if (symtab->state == PARSING) + ret = -1; + else + return 0; + } + break; + case OMP_TRAIT_IMPLEMENTATION_DYNAMIC_ALLOCATORS: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + + if ((omp_requires_mask + & OMP_REQUIRES_DYNAMIC_ALLOCATORS) == 0) + { + if (symtab->state == PARSING) + ret = -1; + else + return 0; + } + break; + case OMP_TRAIT_IMPLEMENTATION_REVERSE_OFFLOAD: + gcc_assert (set == OMP_TRAIT_SET_IMPLEMENTATION); + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + + if ((omp_requires_mask & OMP_REQUIRES_REVERSE_OFFLOAD) == 0) + { + if (symtab->state == PARSING) + ret = -1; + else + return 0; + } + break; + case OMP_TRAIT_DEVICE_KIND: + gcc_assert (set == OMP_TRAIT_SET_DEVICE); + for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p)) + { + const char *prop = omp_context_name_list_prop (p); + if (prop == NULL) + return 0; + if (!strcmp (prop, "any")) + continue; + if (!strcmp (prop, "host")) + { +#ifdef ACCEL_COMPILER + return 0; +#else + if (maybe_offloaded) + ret = -1; + continue; +#endif + } + if (!strcmp (prop, "nohost")) + { +#ifndef ACCEL_COMPILER + if (maybe_offloaded) + ret = -1; + else + return 0; +#endif + continue; + } + + int r = 0; + if (targetm.omp.device_kind_arch_isa != NULL) + r = targetm.omp.device_kind_arch_isa (omp_device_kind, + prop); + else +#ifndef ACCEL_COMPILER + r = strcmp (prop, "cpu") == 0; +#else + gcc_unreachable (); +#endif + if (r == 0 || (r == -1 && symtab->state != PARSING)) + { + /* If we are or might be in a target region or + declare target function, need to take into account + also offloading values. + Note that maybe_offloaded is always false in late + resolution; that's handled as native code (the + above case) in the offload compiler instead. */ + if (!maybe_offloaded) + return 0; + if (ENABLE_OFFLOADING) + { + const char *kinds = omp_offload_device_kind; + if (omp_offload_device_kind_arch_isa (kinds, prop)) + { + ret = -1; + continue; + } + } + return 0; + } + else if (r == -1) + ret = -1; + /* If kind matches on the host, it still might not match + in the offloading region. */ + else if (maybe_offloaded) + ret = -1; + } + break; + case OMP_TRAIT_DEVICE_ISA: + gcc_assert (set == OMP_TRAIT_SET_DEVICE); + for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p)) + { + const char *isa = omp_context_name_list_prop (p); + if (isa == NULL) + return 0; + int r = 0; + if (targetm.omp.device_kind_arch_isa != NULL) + r = targetm.omp.device_kind_arch_isa (omp_device_isa, + isa); + if (r == 0 || (r == -1 && symtab->state != PARSING)) + { + /* If isa is valid on the target, but not in the + current function and current function has + #pragma omp declare simd on it, some simd clones + might have the isa added later on. */ + if (r == -1 + && targetm.simd_clone.compute_vecsize_and_simdlen + && (cfun == NULL || !cfun->after_inlining)) + { + tree attrs + = DECL_ATTRIBUTES (current_function_decl); + if (lookup_attribute ("omp declare simd", attrs)) + { + ret = -1; + continue; + } + } + /* If we are or might be in a target region or + declare target function, need to take into account + also offloading values. + Note that maybe_offloaded is always false in late + resolution; that's handled as native code (the + above case) in the offload compiler instead. */ + if (!maybe_offloaded) + return 0; + if (ENABLE_OFFLOADING) + { + const char *isas = omp_offload_device_isa; + if (omp_offload_device_kind_arch_isa (isas, isa)) + { + ret = -1; + continue; + } + } + return 0; + } + else if (r == -1) + ret = -1; + /* If isa matches on the host, it still might not match + in the offloading region. */ + else if (maybe_offloaded) + ret = -1; + } + break; + case OMP_TRAIT_USER_CONDITION: + gcc_assert (set == OMP_TRAIT_SET_USER); + for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p)) + if (OMP_TP_NAME (p) == NULL_TREE) + { + /* If the expression is not a constant, the selector + is dynamic. */ + if (!tree_fits_shwi_p (OMP_TP_VALUE (p))) + break; + + if (integer_zerop (OMP_TP_VALUE (p))) + return 0; + if (integer_nonzerop (OMP_TP_VALUE (p))) + break; + ret = -1; + } + break; + default: + break; + } + } + } + return ret; +} + +/* Helper function for resolve_omp_target_device_matches, also used + directly when we know in advance that the device is the host to avoid + the overhead of late resolution. SEL is the selector code and + PROPERTIES are the properties to match. The return value is a + boolean. */ +static bool +omp_target_device_matches_on_host (enum omp_ts_code selector, + tree properties) +{ + bool result = 1; + + if (dump_file) + fprintf (dump_file, "omp_target_device_matches_on_host:\n"); + + switch (selector) + { + case OMP_TRAIT_DEVICE_KIND: + for (tree p = properties; p && result; p = TREE_CHAIN (p)) + { + const char *prop = omp_context_name_list_prop (p); + + if (prop == NULL) + result = 0; + else if (!strcmp (prop, "any")) + ; + else if (!strcmp (prop, "host")) + { +#ifdef ACCEL_COMPILER + result = 0; +#else + ; +#endif + } + else if (!strcmp (prop, "nohost")) + { +#ifdef ACCEL_COMPILER + ; +#else + result = 0; +#endif + } + else if (targetm.omp.device_kind_arch_isa != NULL) + result = targetm.omp.device_kind_arch_isa (omp_device_kind, prop); + else +#ifndef ACCEL_COMPILER + result = strcmp (prop, "cpu") == 0; +#else + gcc_unreachable (); +#endif + if (dump_file) + fprintf (dump_file, "Matching device kind %s = %s\n", + prop, (result ? "true" : "false")); + } + break; + case OMP_TRAIT_DEVICE_ARCH: + if (targetm.omp.device_kind_arch_isa != NULL) + for (tree p = properties; p && result; p = TREE_CHAIN (p)) + { + const char *prop = omp_context_name_list_prop (p); + if (prop == NULL) + result = 0; + else + result = targetm.omp.device_kind_arch_isa (omp_device_arch, + prop); + if (dump_file) + fprintf (dump_file, "Matching device arch %s = %s\n", + prop, (result ? "true" : "false")); + } + else + { + result = 0; + if (dump_file) + fprintf (dump_file, "Cannot match device arch on target\n"); + } + break; + case OMP_TRAIT_DEVICE_ISA: + if (targetm.omp.device_kind_arch_isa != NULL) + for (tree p = properties; p && result; p = TREE_CHAIN (p)) + { + const char *prop = omp_context_name_list_prop (p); + if (prop == NULL) + result = 0; + else + result = targetm.omp.device_kind_arch_isa (omp_device_isa, + prop); + if (dump_file) + fprintf (dump_file, "Matching device isa %s = %s\n", + prop, (result ? "true" : "false")); + } + else + { + result = 0; + if (dump_file) + fprintf (dump_file, "Cannot match device isa on target\n"); + } + break; + default: + gcc_unreachable (); + } + return result; +} + +/* Called for late resolution of the OMP_TARGET_DEVICE_MATCHES tree node to + a constant in omp-offload.cc. This is used in code that is wrapped in a + #pragma omp target construct to execute on the specified device, and + can be reduced to a compile-time constant in the offload compiler. + NODE is an OMP_TARGET_DEVICE_MATCHES tree node and the result is an + INTEGER_CST. */ +tree +resolve_omp_target_device_matches (tree node) +{ + tree sel = OMP_TARGET_DEVICE_MATCHES_SELECTOR (node); + enum omp_ts_code selector = (enum omp_ts_code) tree_to_shwi (sel); + tree properties = OMP_TARGET_DEVICE_MATCHES_PROPERTIES (node); + if (omp_target_device_matches_on_host (selector, properties)) + return integer_one_node; + else + return integer_zero_node; +} + +/* Compare construct={simd} CLAUSES1 with CLAUSES2, return 0/-1/1/2 as + in omp_context_selector_set_compare. If MATCH_P is true, additionally + apply the special matching rules for the "simdlen" and "aligned" clauses + used to determine whether the selector CLAUSES1 is part of matches + the OpenMP context containing CLAUSES2. */ + +static int +omp_construct_simd_compare (tree clauses1, tree clauses2, bool match_p) +{ + if (clauses1 == NULL_TREE) + return clauses2 == NULL_TREE ? 0 : -1; + if (clauses2 == NULL_TREE) + return 1; + + int r = 0; + struct declare_variant_simd_data { + bool inbranch, notinbranch; + tree simdlen; + auto_vec<tree,16> data_sharing; + auto_vec<tree,16> aligned; + declare_variant_simd_data () + : inbranch(false), notinbranch(false), simdlen(NULL_TREE) {} + } data[2]; + unsigned int i; + tree e0, e1; + for (i = 0; i < 2; i++) + for (tree c = i ? clauses2 : clauses1; c; c = OMP_CLAUSE_CHAIN (c)) + { + vec<tree> *v; + switch (OMP_CLAUSE_CODE (c)) + { + case OMP_CLAUSE_INBRANCH: + data[i].inbranch = true; + continue; + case OMP_CLAUSE_NOTINBRANCH: + data[i].notinbranch = true; + continue; + case OMP_CLAUSE_SIMDLEN: + data[i].simdlen = OMP_CLAUSE_SIMDLEN_EXPR (c); + continue; + case OMP_CLAUSE_UNIFORM: + case OMP_CLAUSE_LINEAR: + v = &data[i].data_sharing; + break; + case OMP_CLAUSE_ALIGNED: + v = &data[i].aligned; + break; + default: + gcc_unreachable (); + } + unsigned HOST_WIDE_INT argno = tree_to_uhwi (OMP_CLAUSE_DECL (c)); + if (argno >= v->length ()) + v->safe_grow_cleared (argno + 1, true); + (*v)[argno] = c; + } + /* Here, r is used as a bitmask, 2 is set if CLAUSES1 has something + CLAUSES2 doesn't, 1 is set if CLAUSES2 has something CLAUSES1 + doesn't. Thus, r == 3 implies return value 2, r == 1 implies + -1, r == 2 implies 1 and r == 0 implies 0. */ + if (data[0].inbranch != data[1].inbranch) + r |= data[0].inbranch ? 2 : 1; + if (data[0].notinbranch != data[1].notinbranch) + r |= data[0].notinbranch ? 2 : 1; + e0 = data[0].simdlen; + e1 = data[1].simdlen; + if (!simple_cst_equal (e0, e1)) + { + if (e0 && e1) + { + if (match_p && tree_fits_uhwi_p (e0) && tree_fits_uhwi_p (e1)) + { + /* The two simdlen clauses match if m is a multiple of n. */ + unsigned HOST_WIDE_INT n = tree_to_uhwi (e0); + unsigned HOST_WIDE_INT m = tree_to_uhwi (e1); + if (m % n != 0) + return 2; + } + else + return 2; + } + r |= data[0].simdlen ? 2 : 1; + } + if (data[0].data_sharing.length () < data[1].data_sharing.length () + || data[0].aligned.length () < data[1].aligned.length ()) + r |= 1; + tree c1, c2; + FOR_EACH_VEC_ELT (data[0].data_sharing, i, c1) + { + c2 = (i < data[1].data_sharing.length () + ? data[1].data_sharing[i] : NULL_TREE); + if ((c1 == NULL_TREE) != (c2 == NULL_TREE)) + { + r |= c1 != NULL_TREE ? 2 : 1; + continue; + } + if (c1 == NULL_TREE) + continue; + if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_CODE (c2)) + return 2; + if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_LINEAR) + continue; + if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c1) + != OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c2)) + return 2; + if (OMP_CLAUSE_LINEAR_KIND (c1) != OMP_CLAUSE_LINEAR_KIND (c2)) + return 2; + if (!simple_cst_equal (OMP_CLAUSE_LINEAR_STEP (c1), + OMP_CLAUSE_LINEAR_STEP (c2))) + return 2; + } + FOR_EACH_VEC_ELT (data[0].aligned, i, c1) + { + c2 = i < data[1].aligned.length () ? data[1].aligned[i] : NULL_TREE; + if ((c1 == NULL_TREE) != (c2 == NULL_TREE)) + { + r |= c1 != NULL_TREE ? 2 : 1; + continue; + } + if (c1 == NULL_TREE) + continue; + e0 = OMP_CLAUSE_ALIGNED_ALIGNMENT (c1); + e1 = OMP_CLAUSE_ALIGNED_ALIGNMENT (c2); + if (!simple_cst_equal (e0, e1)) + { + if (e0 && e1 + && match_p && tree_fits_uhwi_p (e0) && tree_fits_uhwi_p (e1)) + { + /* The two aligned clauses match if n is a multiple of m. */ + unsigned HOST_WIDE_INT n = tree_to_uhwi (e0); + unsigned HOST_WIDE_INT m = tree_to_uhwi (e1); + if (n % m != 0) + return 2; + } + else + return 2; + } + } + switch (r) + { + case 0: return 0; + case 1: return -1; + case 2: return 1; + case 3: return 2; + default: gcc_unreachable (); + } +} + +/* Compare properties of selectors SEL from SET other than construct. + CTX1 and CTX2 are the lists of properties to compare. + Return 0/-1/1/2 as in omp_context_selector_set_compare. + Unlike set names or selector names, properties can have duplicates. */ + +static int +omp_context_selector_props_compare (enum omp_tss_code set, + enum omp_ts_code sel, + tree ctx1, tree ctx2) +{ + int ret = 0; + for (int pass = 0; pass < 2; pass++) + for (tree p1 = pass ? ctx2 : ctx1; p1; p1 = TREE_CHAIN (p1)) + { + tree p2; + for (p2 = pass ? ctx1 : ctx2; p2; p2 = TREE_CHAIN (p2)) + if (OMP_TP_NAME (p1) == OMP_TP_NAME (p2)) + { + if (OMP_TP_NAME (p1) == NULL_TREE) + { + if (set == OMP_TRAIT_SET_USER + && sel == OMP_TRAIT_USER_CONDITION) + { + if (integer_zerop (OMP_TP_VALUE (p1)) + != integer_zerop (OMP_TP_VALUE (p2))) + return 2; + break; + } + if (simple_cst_equal (OMP_TP_VALUE (p1), OMP_TP_VALUE (p2))) + break; + } + else if (OMP_TP_NAME (p1) == OMP_TP_NAMELIST_NODE) + { + /* Handle string constant vs identifier comparison for + name-list properties. */ + const char *n1 = omp_context_name_list_prop (p1); + const char *n2 = omp_context_name_list_prop (p2); + if (n1 && n2 && !strcmp (n1, n2)) + break; + } + else + break; + } + if (p2 == NULL_TREE) + { + int r = pass ? -1 : 1; + if (ret && ret != r) + return 2; + else if (pass) + return r; + else + { + ret = r; + break; + } + } + } return ret; } @@ -2104,7 +2813,7 @@ omp_context_selector_props_compare (enum omp_tss_code set, 1 if CTX2 is a strict subset of CTX1, or 2 if neither context is a subset of another one. */ -int +static int omp_context_selector_set_compare (enum omp_tss_code set, tree ctx1, tree ctx2) { @@ -2141,7 +2850,8 @@ omp_context_selector_set_compare (enum omp_tss_code set, tree ctx1, tree ctx2) int r = 0; if (OMP_TS_CODE (ts1) == OMP_TRAIT_CONSTRUCT_SIMD) r = omp_construct_simd_compare (OMP_TS_PROPERTIES (ts1), - OMP_TS_PROPERTIES (ts2)); + OMP_TS_PROPERTIES (ts2), + false); if (r == 2 || (ret && r && (ret < 0) != (r < 0))) return 2; if (ret == 0) @@ -2271,42 +2981,333 @@ omp_get_context_selector (tree ctx, enum omp_tss_code set, return NULL_TREE; } -/* Similar, but returns the whole trait-selector list for SET in CTX. */ +/* Similar, but returns the whole trait-selector list for SET in CTX. */ +tree +omp_get_context_selector_list (tree ctx, enum omp_tss_code set) +{ + for (tree tss = ctx; tss; tss = TREE_CHAIN (tss)) + if (OMP_TSS_CODE (tss) == set) + return OMP_TSS_TRAIT_SELECTORS (tss); + return NULL_TREE; +} + +/* Map string S onto a trait selector set code. */ +enum omp_tss_code +omp_lookup_tss_code (const char * s) +{ + for (int i = 0; i < OMP_TRAIT_SET_LAST; i++) + if (strcmp (s, omp_tss_map[i]) == 0) + return (enum omp_tss_code) i; + return OMP_TRAIT_SET_INVALID; +} + +/* Map string S onto a trait selector code for set SET. */ +enum omp_ts_code +omp_lookup_ts_code (enum omp_tss_code set, const char *s) +{ + unsigned int mask = 1 << set; + for (int i = 0; i < OMP_TRAIT_LAST; i++) + if ((mask & omp_ts_map[i].tss_mask) != 0 + && strcmp (s, omp_ts_map[i].name) == 0) + return (enum omp_ts_code) i; + return OMP_TRAIT_INVALID; +} + + +/* Return true if the selector CTX is dynamic. */ +static bool +omp_selector_is_dynamic (tree ctx) +{ + tree user_sel = omp_get_context_selector (ctx, OMP_TRAIT_SET_USER, + OMP_TRAIT_USER_CONDITION); + if (user_sel) + { + tree expr = OMP_TP_VALUE (OMP_TS_PROPERTIES (user_sel)); + + /* The user condition is not dynamic if it is constant. */ + if (!tree_fits_shwi_p (expr)) + return true; + } + + tree target_device_ss + = omp_get_context_selector_list (ctx, OMP_TRAIT_SET_TARGET_DEVICE); + if (target_device_ss) + return true; + + return false; +} + +/* Helper function for omp_dynamic_cond: return a boolean tree expression + that tests whether *DEVICE_NUM is a "conforming device number other + than omp_invalid_device". This may modify *DEVICE_NUM (i.e, to be + a save_expr). *IS_HOST is set to true if the device can be statically + determined to be the host. */ + +static tree +omp_device_num_check (tree *device_num, bool *is_host) +{ + /* First check for some constant values we can treat specially. */ + if (tree_fits_shwi_p (*device_num)) + { + HOST_WIDE_INT num = tree_to_shwi (*device_num); + if (num < -1) + return integer_zero_node; + /* Initial device? */ + if (num == -1) + { + *is_host = true; + return integer_one_node; + } + /* There is always at least one device (the host + offload devices). */ + if (num == 0) + return integer_one_node; + /* If there is no offloading, there is exactly one device. */ + if (!ENABLE_OFFLOADING && num > 0) + return integer_zero_node; + } + + /* Also test for direct calls to OpenMP routines that return valid + device numbers. */ + if (TREE_CODE (*device_num) == CALL_EXPR) + { + tree fndecl = get_callee_fndecl (*device_num); + if (fndecl && omp_runtime_api_call (fndecl)) + { + const char *fnname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + if (strcmp (fnname, "omp_get_default_device") == 0 + || strcmp (fnname, "omp_get_device_num") == 0) + return integer_one_node; + if (strcmp (fnname, "omp_get_num_devices") == 0 + || strcmp (fnname, "omp_get_initial_device") == 0) + { + *is_host = true; + return integer_one_node; + } + } + } + + /* Otherwise, test that -1 <= *device_num <= omp_get_num_devices (). */ + *device_num = save_expr (*device_num); + tree lotest = build2 (GE_EXPR, integer_type_node, *device_num, + integer_minus_one_node); + tree fndecl = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_DEVICES); + tree hitest = build2 (LE_EXPR, integer_type_node, *device_num, + build_call_expr (fndecl, 0)); + return build2 (TRUTH_ANDIF_EXPR, integer_type_node, lotest, hitest); +} + +/* Return a tree expression representing the dynamic part of the context + selector CTX. SUPERCONTEXT is the surrounding BLOCK, in case we need + to introduce a new BLOCK in the result. */ tree -omp_get_context_selector_list (tree ctx, enum omp_tss_code set) +omp_dynamic_cond (tree ctx, tree supercontext) { - for (tree tss = ctx; tss; tss = TREE_CHAIN (tss)) - if (OMP_TSS_CODE (tss) == set) - return OMP_TSS_TRAIT_SELECTORS (tss); - return NULL_TREE; -} + tree user_cond = NULL_TREE, target_device_cond = NULL_TREE; -/* Map string S onto a trait selector set code. */ -enum omp_tss_code -omp_lookup_tss_code (const char * s) -{ - for (int i = 0; i < OMP_TRAIT_SET_LAST; i++) - if (strcmp (s, omp_tss_map[i]) == 0) - return (enum omp_tss_code) i; - return OMP_TRAIT_SET_INVALID; -} + /* Build the "user" part of the dynamic selector. This is a test + predicate taken directly for the "condition" trait in this set. */ + tree user_sel = omp_get_context_selector (ctx, OMP_TRAIT_SET_USER, + OMP_TRAIT_USER_CONDITION); + if (user_sel) + { + tree expr = OMP_TP_VALUE (OMP_TS_PROPERTIES (user_sel)); -/* Map string S onto a trait selector code for set SET. */ -enum omp_ts_code -omp_lookup_ts_code (enum omp_tss_code set, const char *s) -{ - unsigned int mask = 1 << set; - for (int i = 0; i < OMP_TRAIT_LAST; i++) - if ((mask & omp_ts_map[i].tss_mask) != 0 - && strcmp (s, omp_ts_map[i].name) == 0) - return (enum omp_ts_code) i; - return OMP_TRAIT_INVALID; -} + /* The user condition is not dynamic if it is constant. */ + if (!tree_fits_shwi_p (expr)) + user_cond = expr; + } + + /* Build the "target_device" part of the dynamic selector. In the + most general case this requires building a bit of code that runs + on the specified device_num using the same mechanism as + "#pragma omp target" that uses the OMP_TARGET_DEVICE_MATCHES magic + cookie to represent the kind/arch/isa tests which are and'ed together. + These cookies can be resolved into a constant truth value by the + offload compiler; see resolve_omp_target_device_matches, above. + + In some cases, we can (in)validate the device number in advance. + If it is not valid, the whole selector fails to match. If it is + valid and refers to the host (e.g., constant -1), then we can + resolve the match to a constant truth value now instead of having + to create a OMP_TARGET_DEVICE_MATCHES. */ + + tree target_device_ss + = omp_get_context_selector_list (ctx, OMP_TRAIT_SET_TARGET_DEVICE); + if (target_device_ss) + { + tree device_num = NULL_TREE; + tree kind = NULL_TREE; + tree arch = NULL_TREE; + tree isa = NULL_TREE; + tree device_ok = NULL_TREE; + bool is_host = !ENABLE_OFFLOADING; + + tree device_num_sel + = omp_get_context_selector (ctx, OMP_TRAIT_SET_TARGET_DEVICE, + OMP_TRAIT_DEVICE_NUM); + if (device_num_sel) + { + device_num = OMP_TP_VALUE (OMP_TS_PROPERTIES (device_num_sel)); + device_ok = omp_device_num_check (&device_num, &is_host); + /* If an invalid constant device number was specified, the + whole selector fails to match, and there's no point in + continuing to generate code that would never be executed. */ + if (device_ok == integer_zero_node) + { + target_device_cond = integer_zero_node; + goto wrapup; + } + } + + tree kind_sel + = omp_get_context_selector (ctx, OMP_TRAIT_SET_TARGET_DEVICE, + OMP_TRAIT_DEVICE_KIND); + /* "any" is equivalent to omitting this trait selector. */ + if (kind_sel + && strcmp (omp_context_name_list_prop (OMP_TS_PROPERTIES (kind_sel)), + "any")) + { + tree props = OMP_TS_PROPERTIES (kind_sel); + if (!is_host) + kind = build2 (OMP_TARGET_DEVICE_MATCHES, integer_type_node, + build_int_cst (integer_type_node, + (int) OMP_TRAIT_DEVICE_KIND), + props); + else if (!omp_target_device_matches_on_host (OMP_TRAIT_DEVICE_KIND, + props)) + { + /* The whole selector fails to match. */ + target_device_cond = integer_zero_node; + goto wrapup; + } + /* else it is statically resolved to true and is a no-op. */ + } + tree arch_sel + = omp_get_context_selector (ctx, OMP_TRAIT_SET_TARGET_DEVICE, + OMP_TRAIT_DEVICE_ARCH); + if (arch_sel) + { + tree props = OMP_TS_PROPERTIES (arch_sel); + if (!is_host) + arch = build2 (OMP_TARGET_DEVICE_MATCHES, integer_type_node, + build_int_cst (integer_type_node, + (int) OMP_TRAIT_DEVICE_ARCH), + props); + else if (!omp_target_device_matches_on_host (OMP_TRAIT_DEVICE_ARCH, + props)) + { + /* The whole selector fails to match. */ + target_device_cond = integer_zero_node; + goto wrapup; + } + /* else it is statically resolved to true and is a no-op. */ + } + + tree isa_sel + = omp_get_context_selector (ctx, OMP_TRAIT_SET_TARGET_DEVICE, + OMP_TRAIT_DEVICE_ISA); + if (isa_sel) + { + tree props = OMP_TS_PROPERTIES (isa_sel); + if (!is_host) + isa = build2 (OMP_TARGET_DEVICE_MATCHES, integer_type_node, + build_int_cst (integer_type_node, + (int) OMP_TRAIT_DEVICE_ISA), + props); + else if (!omp_target_device_matches_on_host (OMP_TRAIT_DEVICE_ISA, + props)) + { + /* The whole selector fails to match. */ + target_device_cond = integer_zero_node; + goto wrapup; + } + /* else it is statically resolved to true and is a no-op. */ + } + + /* AND the three possible tests together. */ + tree test_expr = kind ? kind : NULL_TREE; + if (arch && test_expr) + test_expr = build2 (TRUTH_ANDIF_EXPR, integer_type_node, + arch, test_expr); + else if (arch) + test_expr = arch; + if (isa && test_expr) + test_expr = build2 (TRUTH_ANDIF_EXPR, integer_type_node, + isa, test_expr); + else if (isa) + test_expr = isa; + + if (!test_expr) + /* This could happen if the selector includes only kind="any", + or is_host is true and it could be statically determined to + be true. The selector always matches, but we still have to + evaluate the device_num expression. */ + { + if (device_num) + target_device_cond = build2 (COMPOUND_EXPR, integer_type_node, + device_num, integer_one_node); + else + target_device_cond = integer_one_node; + } + else + { + /* Arrange to evaluate test_expr in the offload compiler for + device device_num. */ + tree stmt = make_node (OMP_TARGET); + TREE_TYPE (stmt) = void_type_node; + tree result_var = create_tmp_var (integer_type_node, "td_match"); + tree map = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_MAP); + OMP_CLAUSE_DECL (map) = result_var; + OMP_CLAUSE_SET_MAP_KIND (map, GOMP_MAP_FROM); + OMP_TARGET_CLAUSES (stmt) = map; + if (device_num) + { + tree clause = build_omp_clause (UNKNOWN_LOCATION, + OMP_CLAUSE_DEVICE); + OMP_CLAUSE_CHAIN (clause) = NULL_TREE; + OMP_CLAUSE_DEVICE_ID (clause) = device_num; + OMP_CLAUSE_DEVICE_ANCESTOR (clause) = false; + OMP_CLAUSE_CHAIN (map) = clause; + } + + tree block = make_node (BLOCK); + BLOCK_SUPERCONTEXT (block) = supercontext; + + tree bind = build3 (BIND_EXPR, void_type_node, NULL_TREE, + build2 (MODIFY_EXPR, integer_type_node, + result_var, test_expr), + block); + TREE_SIDE_EFFECTS (bind) = 1; + OMP_TARGET_BODY (stmt) = bind; + target_device_cond = build2 (COMPOUND_EXPR, integer_type_node, + stmt, result_var); + + /* If necessary, "and" target_device_cond with the test to + make sure the device number is valid. */ + if (device_ok && device_ok != integer_one_node) + target_device_cond = build2 (TRUTH_ANDIF_EXPR, integer_type_node, + device_ok, target_device_cond); + + /* Set the bit to trigger resolution of OMP_TARGET_DEVICE_MATCHES + in the ompdevlow pass. */ + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + cgraph_node::get (cfun->decl)->has_omp_variant_constructs = 1; + } + } -/* Needs to be a GC-friendly widest_int variant, but precision is - desirable to be the same on all targets. */ -typedef generic_wide_int <fixed_wide_int_storage <1024> > score_wide_int; + wrapup: + if (user_cond && target_device_cond) + return build2 (TRUTH_ANDIF_EXPR, integer_type_node, + user_cond, target_device_cond); + else if (user_cond) + return user_cond; + else if (target_device_cond) + return target_device_cond; + else + return NULL_TREE; +} +#if 0 /* Compute *SCORE for context selector CTX. Return true if the score would be different depending on whether it is a declare simd clone or not. DECLARE_SIMD should be true for the case when it would be @@ -2378,6 +3379,152 @@ omp_context_compute_score (tree ctx, score_wide_int *score, bool declare_simd) } return ret; } +#endif + +/* Given an omp_variant VARIANT, compute VARIANT->score and + VARIANT->scorable. + CONSTRUCT_CONTEXT is the OpenMP construct context; if this is null or + COMPLETE_P is false (e.g., during parsing or gimplification) then it + may not be possible to compute the score accurately and the scorable + flag is set to false. + + Cited text in the comments is from section 7.2 of the OpenMP 5.2 + specification. */ + +static void +omp_context_compute_score (struct omp_variant *variant, + tree construct_context, bool complete_p) +{ + int l = list_length (construct_context); + tree ctx = variant->selector; + variant->scorable = true; + + /* "the final score is the sum of the values of all specified selectors + plus 1". */ + variant->score = 1; + for (tree tss = ctx; tss; tss = TREE_CHAIN (tss)) + { + if (OMP_TSS_CODE (tss) == OMP_TRAIT_SET_CONSTRUCT) + { + /* "Each trait selector for which the corresponding trait appears + in the context trait set in the OpenMP context..." */ + score_wide_int tss_score = 0; + omp_construct_traits_match (OMP_TSS_TRAIT_SELECTORS (tss), + construct_context, &tss_score); + variant->score += tss_score; + if (!complete_p) + variant->scorable = false; + } + else if (OMP_TSS_CODE (tss) == OMP_TRAIT_SET_DEVICE + || OMP_TSS_CODE (tss) == OMP_TRAIT_SET_TARGET_DEVICE) + { + /* "The kind, arch, and isa selectors, if specified, are given + the values 2**l, 2**(l+1), and 2**(l+2), respectively..." + FIXME: the spec isn't clear what should happen if there are + both "device" and "target_device" selector sets specified. + This implementation adds up the bits rather than ORs them. */ + for (tree ts = OMP_TSS_TRAIT_SELECTORS (tss); ts; + ts = TREE_CHAIN (ts)) + { + enum omp_ts_code code = OMP_TS_CODE (ts); + if (code == OMP_TRAIT_DEVICE_KIND) + variant->score + += wi::shifted_mask <score_wide_int> (l, 1, false); + else if (code == OMP_TRAIT_DEVICE_ARCH) + variant->score + += wi::shifted_mask <score_wide_int> (l + 1, 1, false); + else if (code == OMP_TRAIT_DEVICE_ISA) + variant->score + += wi::shifted_mask <score_wide_int> (l + 2, 1, false); + } + if (!complete_p) + variant->scorable = false; + } + else + { + /* "Trait selectors for which a trait-score is specified..." + Note that there are no implementation-defined selectors, and + "other selectors are given a value of zero". */ + for (tree ts = OMP_TSS_TRAIT_SELECTORS (tss); ts; + ts = TREE_CHAIN (ts)) + { + tree s = OMP_TS_SCORE (ts); + if (s && TREE_CODE (s) == INTEGER_CST) + variant->score + += score_wide_int::from (wi::to_wide (s), + TYPE_SIGN (TREE_TYPE (s))); + } + } + } +} + +/* CONSTRUCT_CONTEXT contains "the directive names, each being a trait, + of all enclosing constructs at that point in the program up to a target + construct", per section 7.1 of the 5.2 specification. The traits are + collected during gimplification and are listed outermost first. + + This function attempts to apply the "if the point in the program is not + enclosed by a target construct, the following rules are applied in order" + requirements that follow in the same paragraph. This may not be possible, + depending on the compilation phase; in particular, "declare simd" clones + are not known until late resolution. + + The augmented context is returned, and *COMPLETEP is set to true if + the context is known to be complete, false otherwise. */ +static tree +omp_complete_construct_context (tree construct_context, bool *completep) +{ + /* The point in the program is enclosed by a target construct. */ + if (construct_context + && OMP_TS_CODE (construct_context) == OMP_TRAIT_CONSTRUCT_TARGET) + *completep = true; + + /* At parse time we have none of the information we need to collect + the missing pieces. */ + else if (symtab->state == PARSING) + *completep = false; + + else + { + tree attributes = DECL_ATTRIBUTES (current_function_decl); + + /* Add simd trait when in a simd clone. This information is only + available during late resolution in the omp_device_lower pass, + however we can also rule out cases where we know earlier that + cfun is not a candidate for cloning. */ + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + { + cgraph_node *node = cgraph_node::get (cfun->decl); + if (node->simdclone) + construct_context = make_trait_selector (OMP_TRAIT_CONSTRUCT_SIMD, + NULL_TREE, NULL_TREE, + construct_context); + *completep = true; + } + else if (lookup_attribute ("omp declare simd", attributes)) + *completep = false; + else + *completep = true; + + /* Add construct selector set within a "declare variant" function. */ + tree variant_attr + = lookup_attribute ("omp declare variant variant", attributes); + if (variant_attr) + { + tree temp = NULL_TREE; + for (tree t = TREE_VALUE (variant_attr); t; t = TREE_CHAIN (t)) + temp = chainon (temp, copy_node (t)); + construct_context = chainon (temp, construct_context); + } + + /* Add target trait when in a target variant. */ + if (lookup_attribute ("omp declare target block", attributes)) + construct_context = make_trait_selector (OMP_TRAIT_CONSTRUCT_TARGET, + NULL_TREE, NULL_TREE, + construct_context); + } + return construct_context; +} /* Class describing a single variant. */ struct GTY(()) omp_declare_variant_entry { @@ -2475,6 +3622,7 @@ omp_declare_variant_alt_hasher::equal (omp_declare_variant_base_entry *x, static GTY(()) hash_table<omp_declare_variant_alt_hasher> *omp_declare_variant_alt; +#if 0 /* Try to resolve declare variant after gimplification. */ static tree @@ -2860,6 +4008,7 @@ omp_resolve_declare_variant (tree base) return ((variant1 && variant1 == variant2) ? TREE_PURPOSE (TREE_VALUE (variant1)) : base); } +#endif void omp_lto_output_declare_variant_alt (lto_simple_output_block *ob, @@ -2981,6 +4130,425 @@ omp_lto_input_declare_variant_alt (lto_input_block *ib, cgraph_node *node, INSERT) = entryp; } +/* Comparison function for sorting routines, to sort OpenMP metadirective + variants by decreasing score. */ + +static int +sort_variant (const void * a, const void *b, void *) +{ + score_wide_int score1 + = ((const struct omp_variant *) a)->score; + score_wide_int score2 + = ((const struct omp_variant *) b)->score; + + if (score1 > score2) + return -1; + else if (score1 < score2) + return 1; + else + return 0; +} + +/* Return a vector of dynamic replacement candidates for the directive + candidates in ALL_VARIANTS. Return an empty vector if the candidates + cannot be resolved. */ + +vec<struct omp_variant> +omp_get_dynamic_candidates (vec <struct omp_variant> &all_variants, + tree construct_context) +{ + auto_vec <struct omp_variant> variants; + struct omp_variant default_variant; + bool default_found = false; + bool complete_p; + + construct_context + = omp_complete_construct_context (construct_context, &complete_p); + + if (dump_file) + { + fprintf (dump_file, "\nIn omp_get_dynamic_candidates:\n"); + if (symtab->state == PARSING) + fprintf (dump_file, "invoked during parsing\n"); + else if (cfun && (cfun->curr_properties & PROP_gimple_any) == 0) + fprintf (dump_file, "invoked during gimplification\n"); + else if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + fprintf (dump_file, "invoked during late resolution\n"); + else + fprintf (dump_file, "confused about invocation context?!?\n"); + fprintf (dump_file, "construct_context has %d traits (%s)\n", + (construct_context ? list_length (construct_context) : 0), + (complete_p ? "complete" : "incomplete")); + } + + for (unsigned int i = 0; i < all_variants.length (); i++) + { + struct omp_variant variant = all_variants[i]; + + if (variant.selector == NULL_TREE) + { + gcc_assert (!default_found); + default_found = true; + default_variant = variant; + default_variant.score = 0; + default_variant.scorable = true; + default_variant.matchable = true; + default_variant.dynamic_selector = false; + if (dump_file) + fprintf (dump_file, + "Considering default selector as candidate\n"); + continue; + } + + variant.matchable = true; + variant.scorable = true; + + if (dump_file) + { + fprintf (dump_file, "Considering selector "); + print_omp_context_selector (dump_file, variant.selector, TDF_NONE); + fprintf (dump_file, " as candidate - "); + } + + switch (omp_context_selector_matches (variant.selector, + construct_context, complete_p)) + { + case -1: + if (dump_file) + fprintf (dump_file, "unmatchable\n"); + /* At parse time, just give up if we can't determine whether + things match. */ + if (symtab->state == PARSING) + { + variants.truncate (0); + return variants.copy (); + } + /* Otherwise we must be invoked from the gimplifier. */ + gcc_assert (cfun && (cfun->curr_properties & PROP_gimple_any) == 0); + variant.matchable = false; + /* FALLTHRU */ + case 1: + omp_context_compute_score (&variant, construct_context, complete_p); + variant.dynamic_selector + = omp_selector_is_dynamic (variant.selector); + variants.safe_push (variant); + if (dump_file && variant.matchable) + { + if (variant.dynamic_selector) + fprintf (dump_file, "matched, dynamic"); + else + fprintf (dump_file, "matched, non-dynamic"); + } + break; + case 0: + if (dump_file) + fprintf (dump_file, "no match"); + break; + } + + if (dump_file) + fprintf (dump_file, "\n"); + } + + /* There must be one default variant. */ + gcc_assert (default_found); + + /* If there are no matching selectors, return the default. */ + if (variants.length () == 0) + { + variants.safe_push (default_variant); + return variants.copy (); + } + + /* If there is only one matching selector, use it. */ + if (variants.length () == 1) + { + if (variants[0].matchable) + { + if (variants[0].dynamic_selector) + variants.safe_push (default_variant); + return variants.copy (); + } + else + { + /* We don't know whether the one non-default selector will + actually match. */ + variants.truncate (0); + return variants.copy (); + } + } + + /* A context selector that is a strict subset of another context selector + has a score of zero. This only applies if the selector that is a + superset definitely matches, though. */ + for (unsigned int i = 0; i < variants.length (); i++) + for (unsigned int j = i + 1; j < variants.length (); j++) + { + int r = omp_context_selector_compare (variants[i].selector, + variants[j].selector); + if (r == -1 && variants[j].matchable) + { + /* variant i is a strict subset of variant j. */ + variants[i].score = 0; + variants[i].scorable = true; + break; + } + else if (r == 1 && variants[i].matchable) + /* variant j is a strict subset of variant i. */ + { + variants[j].score = 0; + variants[j].scorable = true; + } + } + + /* Sort the variants by decreasing score, preserving the original order + in case of a tie. */ + variants.stablesort (sort_variant, NULL); + + /* Add the default as a final choice. */ + variants.safe_push (default_variant); + + if (dump_file) + { + fprintf (dump_file, "Sorted variants are:\n"); + for (unsigned i = 0; i < variants.length (); i++) + { + HOST_WIDE_INT score = variants[i].score.to_shwi (); + fprintf (dump_file, "score %d matchable %d scorable %d ", + (int)score, (int)(variants[i].matchable), + (int)(variants[i].scorable)); + if (variants[i].selector) + { + fprintf (dump_file, "selector "); + print_omp_context_selector (dump_file, variants[i].selector, + TDF_NONE); + fprintf (dump_file, "\n"); + } + else + fprintf (dump_file, "default selector\n"); + } + } + + /* Build the dynamic candidate list. */ + for (unsigned i = 0; i < variants.length (); i++) + { + /* If we encounter a candidate that wasn't definitely matched, + give up now. */ + if (!variants[i].matchable) + { + variants.truncate (0); + break; + } + + /* In general, we can't proceed if we can't accurately score any + of the selectors, since the sorting may be incorrect. But, since + the actual score will never be lower than the guessed value, we + can use the first variant if it is not scorable but either the next + one is a subset of the first, is scorable, or we can make a + direct comparison of the high-order isa/arch/kind bits. */ + if (!variants[i].scorable) + { + bool ok = true; + if (i != 0) + ok = false; + else if (variants[i+1].scorable) + /* ok */ + ; + else if (variants[i+1].score > 0) + { + /* To keep comparisons simple, reject selectors that contain + sets other than device, target_device, or construct. */ + for (tree tss = variants[i].selector; + tss && ok; tss = TREE_CHAIN (tss)) + { + enum omp_tss_code code = OMP_TSS_CODE (tss); + if (code != OMP_TRAIT_SET_DEVICE + && code != OMP_TRAIT_SET_TARGET_DEVICE + && code != OMP_TRAIT_SET_CONSTRUCT) + ok = false; + } + for (tree tss = variants[i+1].selector; + tss && ok; tss = TREE_CHAIN (tss)) + { + enum omp_tss_code code = OMP_TSS_CODE (tss); + if (code != OMP_TRAIT_SET_DEVICE + && code != OMP_TRAIT_SET_TARGET_DEVICE + && code != OMP_TRAIT_SET_CONSTRUCT) + ok = false; + } + /* Ignore the construct bits of the score. If the isa/arch/kind + bits are strictly ordered, we're good to go. Since + "the final score is the sum of the values of all specified + selectors plus 1", subtract that 1 from both scores before + getting rid of the low bits. */ + if (ok) + { + size_t l = list_length (construct_context); + gcc_assert (variants[i].score > 0 + && variants[i+1].score > 0); + if ((variants[i].score - 1) >> l + <= (variants[i+1].score - 1) >> l) + ok = false; + } + } + + if (!ok) + { + variants.truncate (0); + break; + } + } + + if (dump_file) + { + fprintf (dump_file, "Adding directive variant with "); + + if (variants[i].selector) + { + fprintf (dump_file, "selector "); + print_omp_context_selector (dump_file, variants[i].selector, + TDF_NONE); + } + else + fprintf (dump_file, "default selector"); + + fprintf (dump_file, " as candidate.\n"); + } + + /* The last of the candidates is ended by a static selector. */ + if (!variants[i].dynamic_selector) + { + variants.truncate (i + 1); + break; + } + } + + return variants.copy (); +} + +/* Two attempts are made to resolve calls to "declare variant" functions: + early resolution in the gimplifier, and late resolution in the + omp_device_lower pass. If early resolution is not possible, the + original function call is gimplified into the same form as metadirective + and goes through the same late resolution code as metadirective. */ + +/* Collect "declare variant" candidates for BASE. CONSTRUCT_CONTEXT + is the un-augmented context, or NULL_TREE if that information is not + available yet. */ +vec<struct omp_variant> +omp_declare_variant_candidates (tree base, tree construct_context) +{ + auto_vec <struct omp_variant> candidates; + bool complete_p; + tree augmented_context + = omp_complete_construct_context (construct_context, &complete_p); + + /* The variants are stored on (possible multiple) "omp declare variant base" + attributes on the base function. */ + for (tree attr = DECL_ATTRIBUTES (base); attr; attr = TREE_CHAIN (attr)) + { + attr = lookup_attribute ("omp declare variant base", attr); + if (attr == NULL_TREE) + break; + + tree fndecl = TREE_PURPOSE (TREE_VALUE (attr)); + tree selector = TREE_VALUE (TREE_VALUE (attr)); + + if (TREE_CODE (fndecl) != FUNCTION_DECL) + continue; + + /* Ignore this variant if its selector is known not to match. */ + if (!omp_context_selector_matches (selector, augmented_context, + complete_p)) + continue; + + struct omp_variant candidate; + candidate.selector = selector; + candidate.dynamic_selector = false; + candidate.alternative = fndecl; + candidate.body = NULL_TREE; + candidates.safe_push (candidate); + } + + /* Add a default that is the base function. */ + struct omp_variant v; + v.selector = NULL_TREE; + v.dynamic_selector = false; + v.alternative = base; + v.body = NULL_TREE; + candidates.safe_push (v); + return candidates.copy (); +} + +/* Collect metadirective candidates for METADIRECTIVE. CONSTRUCT_CONTEXT + is the un-augmented context, or NULL_TREE if that information is not + available yet. */ +vec<struct omp_variant> +omp_metadirective_candidates (tree metadirective, tree construct_context) +{ + auto_vec <struct omp_variant> candidates; + tree variant = OMP_METADIRECTIVE_VARIANTS (metadirective); + bool complete_p; + tree augmented_context + = omp_complete_construct_context (construct_context, &complete_p); + + gcc_assert (variant); + for (; variant; variant = TREE_CHAIN (variant)) + { + tree selector = OMP_METADIRECTIVE_VARIANT_SELECTOR (variant); + + /* Ignore this variant if its selector is known not to match. */ + if (!omp_context_selector_matches (selector, augmented_context, + complete_p)) + continue; + + struct omp_variant candidate; + candidate.selector = selector; + candidate.dynamic_selector = false; + candidate.alternative = OMP_METADIRECTIVE_VARIANT_DIRECTIVE (variant); + candidate.body = OMP_METADIRECTIVE_VARIANT_BODY (variant); + candidates.safe_push (candidate); + } + return candidates.copy (); +} + +/* Return a vector of dynamic replacement candidates for the metadirective + statement in METADIRECTIVE. Return an empty vector if the metadirective + cannot be resolved. This function is intended to be called from the + front ends, prior to gimplification. */ + +vec<struct omp_variant> +omp_early_resolve_metadirective (tree metadirective) +{ + vec <struct omp_variant> candidates + = omp_metadirective_candidates (metadirective, NULL_TREE); + return omp_get_dynamic_candidates (candidates, NULL_TREE); +} + +/* Return a vector of dynamic replacement candidates for the variant construct + with SELECTORS and CONSTRUCT_CONTEXT. This version is called during late + resolution in the ompdevlow pass. */ + +vec<struct omp_variant> +omp_resolve_variant_construct (tree construct_context, tree selectors) +{ + auto_vec <struct omp_variant> variants; + + for (int i = 0; i < TREE_VEC_LENGTH (selectors); i++) + { + struct omp_variant variant; + + variant.selector = TREE_VEC_ELT (selectors, i); + variant.dynamic_selector = false; + variant.alternative = build_int_cst (integer_type_node, i + 1); + variant.body = NULL_TREE; + + variants.safe_push (variant); + } + + return omp_get_dynamic_candidates (variants, construct_context); +} + /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK macro on gomp-constants.h. We do not check for overflow. */ diff --git a/gcc/omp-general.h b/gcc/omp-general.h index f34e75f3f76ecb12c6cbf53cd9978d28f8ffb087..4e143ed586b77cb945fbb70896470712110415e7 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -91,6 +91,33 @@ struct omp_for_data tree adjn1; }; +/* Needs to be a GC-friendly widest_int variant, but precision is + desirable to be the same on all targets. */ +typedef generic_wide_int <fixed_wide_int_storage <1024> > score_wide_int; + +/* A structure describing a variant alternative in a metadirective or + variant function, used for matching and scoring during resolution. */ +struct GTY(()) omp_variant +{ + /* Context selector. This is NULL_TREE for the default. */ + tree selector; + /* For early resolution of "metadirective", contains the nested directive. + For early resolution of "declare variant", contains the function decl + for this alternative. For late resolution of both, contains the label + that is the branch target for this alternative. */ + tree alternative; + /* Common body, used for metadirective, null otherwise. */ + tree body; + /* The score, or the best guess if scorable is false. */ + score_wide_int score; + /* True if the selector is dynamic. Filled in during resolution. */ + bool dynamic_selector; + /* Whether the selector is known to definitely match. */ + bool matchable; + /* Whether the score for the selector is definitely known. */ + bool scorable; +}; + #define OACC_FN_ATTRIB "oacc function" /* Accessors for OMP context selectors, used by variant directives. @@ -150,6 +177,8 @@ extern tree make_trait_set_selector (enum omp_tss_code, tree, tree); extern tree make_trait_selector (enum omp_ts_code, tree, tree, tree); extern tree make_trait_property (tree, tree, tree); +extern tree make_omp_metadirective_variant (tree, tree, tree); + extern tree omp_find_clause (tree clauses, enum omp_clause_code kind); extern bool omp_is_allocatable_or_ptr (tree decl); extern tree omp_check_optional_argument (tree decl, bool for_present_check); @@ -165,16 +194,22 @@ extern tree find_combined_omp_for (tree *, int *, void *); extern poly_uint64 omp_max_vf (bool); extern int omp_max_simt_vf (void); extern const char *omp_context_name_list_prop (tree); -extern void omp_construct_traits_to_codes (tree, int, enum tree_code *); -extern tree omp_check_context_selector (location_t loc, tree ctx); +extern tree omp_check_context_selector (location_t loc, tree ctx, + bool metadirective_p); extern void omp_mark_declare_variant (location_t loc, tree variant, tree construct); -extern int omp_context_selector_matches (tree); -extern int omp_context_selector_set_compare (enum omp_tss_code, tree, tree); +extern int omp_context_selector_matches (tree, tree, bool); +extern tree resolve_omp_target_device_matches (tree node); extern tree omp_get_context_selector (tree, enum omp_tss_code, enum omp_ts_code); extern tree omp_get_context_selector_list (tree, enum omp_tss_code); -extern tree omp_resolve_declare_variant (tree); +extern vec<struct omp_variant> omp_declare_variant_candidates (tree, tree); +extern vec<struct omp_variant> omp_metadirective_candidates (tree, tree); +extern vec<struct omp_variant> +omp_get_dynamic_candidates (vec<struct omp_variant>&, tree); +extern vec<struct omp_variant> omp_early_resolve_metadirective (tree); +extern vec<struct omp_variant> omp_resolve_variant_construct (tree, tree); +extern tree omp_dynamic_cond (tree, tree); extern tree oacc_launch_pack (unsigned code, tree device, unsigned op); extern tree oacc_replace_fn_attrib_attr (tree attribs, tree dims); extern void oacc_replace_fn_attrib (tree fn, tree dims); diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc index da5c224ff651a49afe32904ff143874bcbaf2e3b..b6e7331a236117e6fb99fd08f0363080ae3e4025 100644 --- a/gcc/omp-offload.cc +++ b/gcc/omp-offload.cc @@ -2617,6 +2617,76 @@ find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *) return NULL_TREE; } +/* Helper function for execute_omp_device_lower, invoked via walk_gimple_op. + Resolve any OMP_TARGET_DEVICE_MATCHES and OMP_NEXT_VARIANT exprs to + constants. */ +static tree +resolve_omp_variant_cookies (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + if (TREE_CODE (*tp) == OMP_TARGET_DEVICE_MATCHES) + { + *tp = resolve_omp_target_device_matches (*tp); + *walk_subtrees = 0; + return NULL_TREE; + } + + if (TREE_CODE (*tp) != OMP_NEXT_VARIANT) + return NULL_TREE; + tree index = OMP_NEXT_VARIANT_INDEX (*tp); + tree state = OMP_NEXT_VARIANT_STATE (*tp); + + /* State is a triplet of (result-vector, construct_context, selector_vec). + If result-vector has already been computed, just use it. Otherwise we + must resolve the variant and fill in that part of the state object. + All OMP_NEXT_VARIANT exprs for the same variant construct are supposed + to share the same state object, but if something bad happens and we end + up with copies, that is OK, it will just cause the result-vector to be + computed multiple times. */ + tree result_vector = TREE_PURPOSE (state); + if (!result_vector) + { + tree construct_context = TREE_VALUE (state); + tree selectors = TREE_CHAIN (state); + + vec<struct omp_variant> candidates + = omp_resolve_variant_construct (construct_context, selectors); + int n = TREE_VEC_LENGTH (selectors); + TREE_PURPOSE (state) = result_vector = make_tree_vec (n + 1); + /* The result vector maps the index of each element of the original + selectors vector onto the index of the next element of the filtered/ + sorted candidates vector. Since some of the original variants may + have been discarded as non-matching in candidates, initialize the + whole array to zero so that we have a placeholder "next" value for + those elements. Hopefully dead code elimination will take care of + subsequently discarding the unreachable cases in the already-generated + switch statement. */ + for (int i = 1; i <= n; i++) + TREE_VEC_ELT (result_vector, i) = integer_zero_node; + /* Element 0 is the case label of the first variant in the sorted + list. */ + if (dump_file) + fprintf (dump_file, "Computing case map for variant directive\n"); + int j = 0; + for (unsigned int i = 0; i < candidates.length(); i++) + { + if (dump_file) + fprintf (dump_file, " %d -> case %d\n", + j, (int) tree_to_shwi (candidates[i].alternative)); + TREE_VEC_ELT (result_vector, j) = candidates[i].alternative; + j = (int) tree_to_shwi (candidates[i].alternative); + } + } + + /* Now just grab the value out of the precomputed array. */ + gcc_assert (TREE_CODE (index) == INTEGER_CST); + int indexval = (int) tree_to_shwi (index); + *tp = TREE_VEC_ELT (result_vector, indexval); + *walk_subtrees = 0; + return NULL_TREE; +} + + /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets, VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT @@ -2637,6 +2707,17 @@ execute_omp_device_lower () tree map_ptr_fn = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR); #endif + + /* Handle expansion of magic cookies for variant constructs first. */ + if (cgraph_node::get (cfun->decl)->has_omp_variant_constructs) + FOR_EACH_BB_FN (bb, cfun) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + walk_gimple_op (gsi_stmt (gsi), resolve_omp_variant_cookies, NULL); + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + walk_gimple_op (gsi_stmt (gsi), resolve_omp_variant_cookies, NULL); + } + FOR_EACH_BB_FN (bb, cfun) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { @@ -2645,16 +2726,8 @@ execute_omp_device_lower () continue; if (!gimple_call_internal_p (stmt)) { - if (calls_declare_variant_alt) - if (tree fndecl = gimple_call_fndecl (stmt)) - { - tree new_fndecl = omp_resolve_declare_variant (fndecl); - if (new_fndecl != fndecl) - { - gimple_call_set_fndecl (stmt, new_fndecl); - update_stmt (stmt); - } - } + /* FIXME: this is a leftover of obsolete code. */ + gcc_assert (!calls_declare_variant_alt); #ifdef ACCEL_COMPILER if (omp_redirect_indirect_calls && gimple_call_fndecl (stmt) == NULL_TREE) @@ -2821,6 +2894,7 @@ public: /* opt_pass methods: */ bool gate (function *fun) final override { + cgraph_node *node = cgraph_node::get (fun->decl); #ifdef ACCEL_COMPILER bool offload_ind_funcs_p = vec_safe_length (offload_ind_funcs) > 0; #else @@ -2828,7 +2902,8 @@ public: #endif return (!(fun->curr_properties & PROP_gimple_lomp_dev) || (flag_openmp - && (cgraph_node::get (fun->decl)->calls_declare_variant_alt + && (node->calls_declare_variant_alt + || node->has_omp_variant_constructs || offload_ind_funcs_p))); } unsigned int execute (function *) final override diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index f2d6da2d06c10964aad696b10dc9d78c32721709..e30fa318fbae583ab57414d52d093efe601a8b57 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -693,6 +693,8 @@ simd_clone_create (struct cgraph_node *old_node, bool force_local) new_node->externally_visible = old_node->externally_visible; new_node->calls_declare_variant_alt = old_node->calls_declare_variant_alt; + new_node->has_omp_variant_constructs + = old_node->has_omp_variant_constructs; } /* Mark clones with internal linkage as gc'able, so they will not be diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-12.c b/gcc/testsuite/c-c++-common/gomp/declare-variant-12.c index 3515d9ae44e6acefe220e272d183de5001f89ea0..f9150773b0e26f7b88ba4e4428f81fe73805516e 100644 --- a/gcc/testsuite/c-c++-common/gomp/declare-variant-12.c +++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-12.c @@ -29,29 +29,29 @@ void f13 (void); void f14 (void); void f15 (void); void f16 (void); -#pragma omp declare variant (f14) match (construct={teams,parallel,for}) /* 16+8+4 */ -#pragma omp declare variant (f15) match (construct={parallel},user={condition(score(19):1)}) /* 8+19 */ -#pragma omp declare variant (f16) match (implementation={atomic_default_mem_order(score(27):seq_cst)}) +#pragma omp declare variant (f14) match (construct={teams,parallel,for}) /* 1+8+16 */ +#pragma omp declare variant (f15) match (construct={parallel},user={condition(score(16):1)}) /* 8+16 */ +#pragma omp declare variant (f16) match (implementation={atomic_default_mem_order(score(24):seq_cst)}) void f17 (void); void f18 (void); void f19 (void); void f20 (void); -#pragma omp declare variant (f18) match (construct={teams,parallel,for}) /* 16+8+4 */ +#pragma omp declare variant (f18) match (construct={teams,parallel,for}) /* 1+8+6 */ #pragma omp declare variant (f19) match (construct={for},user={condition(score(25):1)}) /* 4+25 */ #pragma omp declare variant (f20) match (implementation={atomic_default_mem_order(score(28):seq_cst)}) void f21 (void); void f22 (void); void f23 (void); void f24 (void); -#pragma omp declare variant (f22) match (construct={parallel,for}) /* 2+1 */ +#pragma omp declare variant (f22) match (construct={parallel,for}) /* 8+16 */ #pragma omp declare variant (f23) match (construct={for}) /* 0 */ #pragma omp declare variant (f24) match (implementation={atomic_default_mem_order(score(2):seq_cst)}) void f25 (void); void f26 (void); void f27 (void); void f28 (void); -#pragma omp declare variant (f26) match (construct={parallel,for}) /* 2+1 */ -#pragma omp declare variant (f27) match (construct={for},user={condition(1)}) /* 4 */ +#pragma omp declare variant (f26) match (construct={parallel,for}) /* 8+16 */ +#pragma omp declare variant (f27) match (construct={for},user={condition(score(25):1)}) /* 16 + 25 */ #pragma omp declare variant (f28) match (implementation={atomic_default_mem_order(score(3):seq_cst)}) void f29 (void); diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-13.c b/gcc/testsuite/c-c++-common/gomp/declare-variant-13.c index 68e6a8979504d7452ab6bda6c109b45db6763089..83c3d85bbf9ca427e48c32a5c18cbee6a55da2b1 100644 --- a/gcc/testsuite/c-c++-common/gomp/declare-variant-13.c +++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-13.c @@ -20,5 +20,7 @@ test1 (int x) isa has score 2^2 or 2^3. We can't decide on whether avx512f will match or not, that also depends on whether it is a declare simd clone or not and which one, but the f03 variant has a higher score anyway. */ - return f05 (x); /* { dg-final { scan-tree-dump-times "f03 \\\(x" 1 "gimple" } } */ + return f05 (x); + /* { dg-final { scan-tree-dump "f03 \\\(x" "gimple" } } */ + /* { dg-final { scan-tree-dump-not "f05 \\\(x" "gimple" } } */ } diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c index 8a6bf09d3cf6b16ca1281283f55e16023d6eab00..8213b1af45087a3b57964c7f33351c1444568de7 100644 --- a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c +++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { { i?86-*-* x86_64-*-* } && vect_simd_clones } } } */ -/* { dg-additional-options "-mno-sse3 -fdump-tree-gimple -fdump-tree-optimized" } */ +/* { dg-additional-options "-O -mno-sse3 -fdump-tree-gimple -fdump-tree-optimized" } */ int f01 (int); int f02 (int); diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-12.f90 b/gcc/testsuite/gfortran.dg/gomp/declare-variant-12.f90 index f1b4a2280ec27cb42c416ab91c2c73b6ae24a30d..dd8d7c24d00eff3ef11872308fd675d62549475f 100644 --- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-12.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-12.f90 @@ -64,9 +64,9 @@ contains end subroutine subroutine f17 () - !$omp declare variant (f14) match (construct={teams,parallel,do}) ! 16+8+4 - !$omp declare variant (f15) match (construct={parallel},user={condition(score(19):.true.)}) ! 8+19 - !$omp declare variant (f16) match (implementation={atomic_default_mem_order(score(27):seq_cst)}) + !$omp declare variant (f14) match (construct={teams,parallel,do}) ! 1+8+16 + !$omp declare variant (f15) match (construct={parallel},user={condition(score(16):.true.)}) ! 8+16 + !$omp declare variant (f16) match (implementation={atomic_default_mem_order(score(24):seq_cst)}) end subroutine subroutine f18 () @@ -79,7 +79,7 @@ contains end subroutine subroutine f21 () - !$omp declare variant (f18) match (construct={teams,parallel,do}) ! 16+8+4 + !$omp declare variant (f18) match (construct={teams,parallel,do}) ! 1+8+16 !$omp declare variant (f19) match (construct={do},user={condition(score(25):.true.)}) ! 4+25 !$omp declare variant (f20) match (implementation={atomic_default_mem_order(score(28):seq_cst)}) end subroutine @@ -94,7 +94,7 @@ contains end subroutine subroutine f25 () - !$omp declare variant (f22) match (construct={parallel,do}) ! 2+1 + !$omp declare variant (f22) match (construct={parallel,do}) ! 8+16 !$omp declare variant (f23) match (construct={do}) ! 0 !$omp declare variant (f24) match (implementation={atomic_default_mem_order(score(2):seq_cst)}) end subroutine @@ -109,8 +109,8 @@ contains end subroutine subroutine f29 () - !$omp declare variant (f26) match (construct={parallel,do}) ! 2+1 - !$omp declare variant (f27) match (construct={do},user={condition(.true.)}) ! 4 + !$omp declare variant (f26) match (construct={parallel,do}) ! 8+16 + !$omp declare variant (f27) match (construct={do},user={condition(score(25):.true.)}) ! 16+25 !$omp declare variant (f28) match (implementation={atomic_default_mem_order(score(3):seq_cst)}) end subroutine diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-13.f90 b/gcc/testsuite/gfortran.dg/gomp/declare-variant-13.f90 index 97484a63d0b91a192c04bc170fdf787d4182558e..01268d82959d6bf093b07a06f13bd0a7aabeacd8 100644 --- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-13.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-13.f90 @@ -2,27 +2,25 @@ ! { dg-additional-options "-fdump-tree-gimple" } ! { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } } -program main - implicit none -contains +module main + +implicit none + +interface integer function f01 (x) integer, intent(in) :: x - f01 = x end function integer function f02 (x) integer, intent(in) :: x - f02 = x end function integer function f03 (x) integer, intent(in) :: x - f03 = x end function integer function f04 (x) integer, intent(in) :: x - f04 = x end function integer function f05 (x) @@ -32,8 +30,10 @@ contains !$omp declare variant (f02) match (implementation={vendor(score(3):gnu)},device={kind(cpu)}) ! (1 or 2) + 3 !$omp declare variant (f03) match (user={condition(score(9):.true.)}) !$omp declare variant (f04) match (implementation={vendor(score(6):gnu)},device={kind(host)}) ! (1 or 2) + 6 - f05 = x end function +end interface + +contains integer function test1 (x) !$omp declare simd @@ -43,6 +43,9 @@ contains ! isa has score 2^2 or 2^3. We can't decide on whether avx512f will match or ! not, that also depends on whether it is a declare simd clone or not and which ! one, but the f03 variant has a higher score anyway. */ - test1 = f05 (x) ! { dg-final { scan-tree-dump-times "f03 \\\(x" 1 "gimple" } } + test1 = f05 (x) + ! { dg-final { scan-tree-dump "f03 \\\(" "gimple" } } + ! { dg-final { scan-tree-dump-not "f05 \\\(" "gimple" } } end function -end program + +end module diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 index e154d93d73a5a881299a9356fb9c2e280051ee90..71b66cb5d0869711d52490ba615f105d5193902d 100644 --- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 @@ -1,22 +1,21 @@ ! { dg-do compile { target { { i?86-*-* x86_64-*-* } && vect_simd_clones } } } */ -! { dg-additional-options "-mno-sse3 -O0 -fdump-tree-gimple -fdump-tree-optimized" } +! { dg-additional-options "-mno-sse3 -O1 -fdump-tree-gimple -fdump-tree-optimized" } module main - implicit none -contains + +implicit none + +interface integer function f01 (x) integer, intent (in) :: x - f01 = x end function integer function f02 (x) integer, intent (in) :: x - f02 = x end function integer function f03 (x) integer, intent (in) :: x - f03 = x end function integer function f04 (x) @@ -25,9 +24,12 @@ contains !$omp declare variant (f01) match (device={isa("avx512f")}) ! 4 or 8 !$omp declare variant (f02) match (implementation={vendor(score(3):gnu)},device={kind(cpu)}) ! (1 or 2) + 3 !$omp declare variant (f03) match (implementation={vendor(score(5):gnu)},device={kind(host)}) ! (1 or 2) + 5 - f04 = x end function +end interface + +contains + integer function test1 (x) !$omp declare simd integer, intent (in) :: x diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc index 003e1fb65f412fbfc2d7d2ba845108c7b7313f58..0ac51911c9f31f3080dff9815fadead618db3602 100644 --- a/gcc/tree-inline.cc +++ b/gcc/tree-inline.cc @@ -5050,6 +5050,8 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, dst_cfun->calls_eh_return |= id->src_cfun->calls_eh_return; id->dst_node->calls_declare_variant_alt |= id->src_node->calls_declare_variant_alt; + id->dst_node->has_omp_variant_constructs + |= id->src_node->has_omp_variant_constructs; gcc_assert (!id->src_cfun->after_inlining); @@ -6352,6 +6354,8 @@ tree_function_versioning (tree old_decl, tree new_decl, new_entry ? new_entry->count : old_entry_block->count); new_version_node->calls_declare_variant_alt = old_version_node->calls_declare_variant_alt; + new_version_node->has_omp_variant_constructs + = old_version_node->has_omp_variant_constructs; if (DECL_STRUCT_FUNCTION (new_decl)->gimple_df) DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta = id.src_cfun->gimple_df->ipa_pta;