From 28d8c680aaea46137170fef2bd1c6a98301518dc Mon Sep 17 00:00:00 2001 From: Andrew Burgess <andrew.burgess@embecosm.com> Date: Sat, 5 Aug 2023 14:54:11 +0200 Subject: [PATCH] core: Support heap-based trampolines Generate heap-based nested function trampolines Add support for allocating nested function trampolines on an executable heap rather than on the stack. This is motivated by targets such as AArch64 Darwin, which globally prohibit executing code on the stack. The target-specific routines for allocating and writing trampolines are to be provided in libgcc. The gcc flag -ftrampoline-impl controls whether to generate code that instantiates trampolines on the stack, or to emit calls to __builtin_nested_func_ptr_created and __builtin_nested_func_ptr_deleted. Note that this flag is completely independent of libgcc: If libgcc is for any reason missing those symbols, you will get a link failure. This implementation imposes some implicit restrictions as compared to stack trampolines. longjmp'ing back to a state before a trampoline was created will cause us to skip over the corresponding __builtin_nested_func_ptr_deleted, which will leak trampolines starting from the beginning of the linked list of allocated trampolines. There may be scope for instrumenting longjmp/setjmp to trigger cleanups of trampolines. Co-Authored-By: Maxim Blinov <maxim.blinov@embecosm.com> Co-Authored-By: Iain Sandoe <iain@sandoe.co.uk> Co-Authored-By: Francois-Xavier Coudert <fxcoudert@gcc.gnu.org> gcc/ChangeLog: * builtins.def (BUILT_IN_NESTED_PTR_CREATED): Define. (BUILT_IN_NESTED_PTR_DELETED): Ditto. * common.opt (ftrampoline-impl): Add option to control generation of trampoline instantiation (heap or stack). * coretypes.h: Define enum trampoline_impl. * tree-nested.cc (convert_tramp_reference_op): Don't bother calling __builtin_adjust_trampoline for heap trampolines. (finalize_nesting_tree_1): Emit calls to __builtin_nested_...{created,deleted} if we're generating with -ftrampoline-impl=heap. * tree.cc (build_common_builtin_nodes): Build __builtin_nested_...{created,deleted}. * doc/invoke.texi (-ftrampoline-impl): Document. --- gcc/builtins.def | 2 + gcc/common.opt | 17 ++++++- gcc/coretypes.h | 6 +++ gcc/doc/invoke.texi | 17 ++++++- gcc/tree-nested.cc | 121 +++++++++++++++++++++++++++++++++++++------- gcc/tree.cc | 17 +++++++ 6 files changed, 161 insertions(+), 19 deletions(-) diff --git a/gcc/builtins.def b/gcc/builtins.def index eb6f4ec2034c..e989cd814a5d 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -1074,6 +1074,8 @@ DEF_BUILTIN_STUB (BUILT_IN_ADJUST_TRAMPOLINE, "__builtin_adjust_trampoline") DEF_BUILTIN_STUB (BUILT_IN_INIT_DESCRIPTOR, "__builtin_init_descriptor") DEF_BUILTIN_STUB (BUILT_IN_ADJUST_DESCRIPTOR, "__builtin_adjust_descriptor") DEF_BUILTIN_STUB (BUILT_IN_NONLOCAL_GOTO, "__builtin_nonlocal_goto") +DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_CREATED, "__builtin_nested_func_ptr_created") +DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_DELETED, "__builtin_nested_func_ptr_deleted") /* Implementing __builtin_setjmp. */ DEF_BUILTIN_STUB (BUILT_IN_SETJMP_SETUP, "__builtin_setjmp_setup") diff --git a/gcc/common.opt b/gcc/common.opt index ce34075561f9..1cf3bdd3b518 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2927,10 +2927,25 @@ Common Var(flag_tracer) Optimization Perform superblock formation via tail duplication. ftrampolines -Common Var(flag_trampolines) Init(0) +Common Var(flag_trampolines) Init(HEAP_TRAMPOLINES_INIT) For targets that normally need trampolines for nested functions, always generate them instead of using descriptors. +ftrampoline-impl= +Common Joined RejectNegative Enum(trampoline_impl) Var(flag_trampoline_impl) Init(HEAP_TRAMPOLINES_INIT ? TRAMPOLINE_IMPL_HEAP : TRAMPOLINE_IMPL_STACK) +Whether trampolines are generated in executable memory rather than +executable stack. + +Enum +Name(trampoline_impl) Type(enum trampoline_impl) UnknownError(unknown trampoline implementation %qs) + +EnumValue +Enum(trampoline_impl) String(stack) Value(TRAMPOLINE_IMPL_STACK) + +EnumValue +Enum(trampoline_impl) String(heap) Value(TRAMPOLINE_IMPL_HEAP) + + ; Zero means that floating-point math operations cannot generate a ; (user-visible) trap. This is the case, for example, in nonstop ; IEEE 754 arithmetic. diff --git a/gcc/coretypes.h b/gcc/coretypes.h index f86dc169a40b..db7813bdd3d9 100644 --- a/gcc/coretypes.h +++ b/gcc/coretypes.h @@ -204,6 +204,12 @@ enum tls_model { TLS_MODEL_LOCAL_EXEC }; +/* Types of trampoline implementation. */ +enum trampoline_impl { + TRAMPOLINE_IMPL_STACK, + TRAMPOLINE_IMPL_HEAP +}; + /* Types of ABI for an offload compiler. */ enum offload_abi { OFFLOAD_ABI_UNSET, diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index aebe9195ef0f..17aaa8cc058a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -718,7 +718,8 @@ Objective-C and Objective-C++ Dialects}. -fverbose-asm -fpack-struct[=@var{n}] -fleading-underscore -ftls-model=@var{model} -fstack-reuse=@var{reuse_level} --ftrampolines -ftrapv -fwrapv +-ftrampolines -ftrampoline-impl=@r{[}stack@r{|}heap@r{]} +-ftrapv -fwrapv -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]} -fstrict-volatile-bitfields -fsync-libcalls} @@ -19050,6 +19051,20 @@ For languages other than Ada, the @code{-ftrampolines} and trampolines are always generated on platforms that need them for nested functions. +@opindex ftrampoline-impl +@item -ftrampoline-impl=@r{[}stack@r{|}heap@r{]} +By default, trampolines are generated on stack. However, certain platforms +(such as the Apple M1) do not permit an executable stack. Compiling with +@option{-ftrampoline-impl=heap} generate calls to +@code{__builtin_nested_func_ptr_created} and +@code{__builtin_nested_func_ptr_deleted} in order to allocate and +deallocate trampoline space on the executable heap. These functions are +implemented in libgcc, and will only be provided on specific targets: +x86_64 Darwin, x86_64 and aarch64 Linux. @emph{PLEASE NOTE}: Heap +trampolines are @emph{not} guaranteed to be correctly deallocated if you +@code{setjmp}, instantiate nested functions, and then @code{longjmp} back +to a state prior to having allocated those nested functions. + @opindex fvisibility @item -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]} Set the default ELF image symbol visibility to the specified option---all diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc index 31c7b6001bd4..d2fe3fca8af9 100644 --- a/gcc/tree-nested.cc +++ b/gcc/tree-nested.cc @@ -611,6 +611,14 @@ get_trampoline_type (struct nesting_info *info) if (trampoline_type) return trampoline_type; + /* When trampolines are created off-stack then the only thing we need in the + local frame is a single pointer. */ + if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP) + { + trampoline_type = build_pointer_type (void_type_node); + return trampoline_type; + } + align = TRAMPOLINE_ALIGNMENT; size = TRAMPOLINE_SIZE; @@ -2790,17 +2798,27 @@ convert_tramp_reference_op (tree *tp, int *walk_subtrees, void *data) /* Compute the address of the field holding the trampoline. */ x = get_frame_field (info, target_context, x, &wi->gsi); - x = build_addr (x); - x = gsi_gimplify_val (info, x, &wi->gsi); - /* Do machine-specific ugliness. Normally this will involve - computing extra alignment, but it can really be anything. */ - if (descr) - builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR); + /* APB: We don't need to do the adjustment calls when using off-stack + trampolines, any such adjustment will be done when the off-stack + trampoline is created. */ + if (!descr && flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP) + x = gsi_gimplify_val (info, x, &wi->gsi); else - builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE); - call = gimple_build_call (builtin, 1, x); - x = init_tmp_var_with_call (info, &wi->gsi, call); + { + x = build_addr (x); + + x = gsi_gimplify_val (info, x, &wi->gsi); + + /* Do machine-specific ugliness. Normally this will involve + computing extra alignment, but it can really be anything. */ + if (descr) + builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR); + else + builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE); + call = gimple_build_call (builtin, 1, x); + x = init_tmp_var_with_call (info, &wi->gsi, call); + } /* Cast back to the proper function type. */ x = build1 (NOP_EXPR, TREE_TYPE (t), x); @@ -3380,6 +3398,7 @@ build_init_call_stmt (struct nesting_info *info, tree decl, tree field, static void finalize_nesting_tree_1 (struct nesting_info *root) { + gimple_seq cleanup_list = NULL; gimple_seq stmt_list = NULL; gimple *stmt; tree context = root->context; @@ -3511,9 +3530,48 @@ finalize_nesting_tree_1 (struct nesting_info *root) if (!field) continue; - x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE); - stmt = build_init_call_stmt (root, i->context, field, x); - gimple_seq_add_stmt (&stmt_list, stmt); + if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP) + { + /* We pass a whole bunch of arguments to the builtin function that + creates the off-stack trampoline, these are + 1. The nested function chain value (that must be passed to the + nested function so it can find the function arguments). + 2. A pointer to the nested function implementation, + 3. The address in the local stack frame where we should write + the address of the trampoline. + + When this code was originally written I just kind of threw + everything at the builtin, figuring I'd work out what was + actually needed later, I think, the stack pointer could + certainly be dropped, arguments #2 and #4 are based off the + stack pointer anyway, so #1 doesn't seem to add much value. */ + tree arg1, arg2, arg3; + + gcc_assert (DECL_STATIC_CHAIN (i->context)); + arg1 = build_addr (root->frame_decl); + arg2 = build_addr (i->context); + + x = build3 (COMPONENT_REF, TREE_TYPE (field), + root->frame_decl, field, NULL_TREE); + arg3 = build_addr (x); + + x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_CREATED); + stmt = gimple_build_call (x, 3, arg1, arg2, arg3); + gimple_seq_add_stmt (&stmt_list, stmt); + + /* This call to delete the nested function trampoline is added to + the cleanup list, and called when we exit the current scope. */ + x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_DELETED); + stmt = gimple_build_call (x, 0); + gimple_seq_add_stmt (&cleanup_list, stmt); + } + else + { + /* Original code to initialise the on stack trampoline. */ + x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE); + stmt = build_init_call_stmt (root, i->context, field, x); + gimple_seq_add_stmt (&stmt_list, stmt); + } } } @@ -3538,11 +3596,40 @@ finalize_nesting_tree_1 (struct nesting_info *root) /* If we created initialization statements, insert them. */ if (stmt_list) { - gbind *bind; - annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context)); - bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context)); - gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind)); - gimple_bind_set_body (bind, stmt_list); + if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP) + { + /* Handle off-stack trampolines. */ + gbind *bind; + annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context)); + annotate_all_with_location (cleanup_list, DECL_SOURCE_LOCATION (context)); + bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context)); + gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind)); + + gimple_seq xxx_list = NULL; + + if (cleanup_list != NULL) + { + /* Maybe we shouldn't be creating this try/finally if -fno-exceptions is + in use. If this is the case, then maybe we should, instead, be + inserting the cleanup code onto every path out of this function? Not + yet figured out how we would do this. */ + gtry *t = gimple_build_try (stmt_list, cleanup_list, GIMPLE_TRY_FINALLY); + gimple_seq_add_stmt (&xxx_list, t); + } + else + xxx_list = stmt_list; + + gimple_bind_set_body (bind, xxx_list); + } + else + { + /* The traditional, on stack trampolines. */ + gbind *bind; + annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context)); + bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context)); + gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind)); + gimple_bind_set_body (bind, stmt_list); + } } /* If a chain_decl was created, then it needs to be registered with diff --git a/gcc/tree.cc b/gcc/tree.cc index f7bfd9e3451b..f9fa7b78ffff 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -9922,6 +9922,23 @@ build_common_builtin_nodes (void) "__builtin_nonlocal_goto", ECF_NORETURN | ECF_NOTHROW); + tree ptr_ptr_type_node = build_pointer_type (ptr_type_node); + + ftype = build_function_type_list (void_type_node, + ptr_type_node, // void *chain + ptr_type_node, // void *func + ptr_ptr_type_node, // void **dst + NULL_TREE); + local_define_builtin ("__builtin_nested_func_ptr_created", ftype, + BUILT_IN_NESTED_PTR_CREATED, + "__builtin_nested_func_ptr_created", ECF_NOTHROW); + + ftype = build_function_type_list (void_type_node, + NULL_TREE); + local_define_builtin ("__builtin_nested_func_ptr_deleted", ftype, + BUILT_IN_NESTED_PTR_DELETED, + "__builtin_nested_func_ptr_deleted", ECF_NOTHROW); + ftype = build_function_type_list (void_type_node, ptr_type_node, ptr_type_node, NULL_TREE); local_define_builtin ("__builtin_setjmp_setup", ftype, -- GitLab