From dffc37deaddf08bd11208a459496b30021700203 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely <jwakely@redhat.com> Date: Thu, 14 Nov 2024 14:25:52 +0000 Subject: [PATCH] libstdc++: Fix invalid casts in unordered container merge functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit François pointed out that static_cast<__node_ptr>(&_M_before_begin) is invalid, because _M_before_begin is only a node-base not a node. Refactor the new merge overloads to only cast when we know we have a valid node. He also pointed out some optimizations to allow reusing hash codes that might be cached in the node. The _M_src_hash_code function already has the right logic to decide when a cached hash code can be reused by a different _Hashtable object. libstdc++-v3/ChangeLog: * include/bits/hashtable.h (_Hashtable::_M_src_hash_code): Improve comments. (_Hashtable::_M_merge_unique(_Hashtable&)): Use pointer_traits to get before-begin pointer. Only use static_cast on valid nodes, not the before-begin pointer. Reuse a hash code cached in the node when possible. (_Hashtable::_M_merge_multi(_Hashtable&)): Likewise. Reviewed-by: François Dumont <fdumont@gcc.gnu.org> --- libstdc++-v3/include/bits/hashtable.h | 43 +++++++++++++++++---------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 6a2da121ab98..a704816573ae 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1204,8 +1204,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return { __n, this->_M_node_allocator() }; } - // Only use the possibly cached node's hash code if its hash function - // _H2 matches _Hash and is stateless. Otherwise recompute it using _Hash. + // Hash code for node __src_n with key __k, using this->hash_function(). + // Will use a hash code cached in the node if safe to do so. This is + // for use in _M_merge_multi where the node comes from another container + // with a hash function that might not match this->hash_function(). template<typename _H2> __hash_code _M_src_hash_code(const _H2&, const key_type& __k, @@ -1213,6 +1215,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if constexpr (std::is_same_v<_H2, _Hash>) if constexpr (std::is_empty_v<_Hash>) + // If the node has a cached hash code, it's OK to use it. return this->_M_hash_code(__src_n); return this->_M_hash_code(__k); @@ -1246,28 +1249,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __glibcxx_assert(get_allocator() == __src.get_allocator()); + using _PTr = pointer_traits<__node_base_ptr>; + auto __n_elt = __src.size(); size_type __first = 1; - // For a container of identical type we can use its private members. - auto __p = static_cast<__node_ptr>(&__src._M_before_begin); + // For a container of identical type we can use its private members, + // __src._M_before_begin, __src._M_bucket_index etc. + auto __prev = _PTr::pointer_to(__src._M_before_begin); while (__n_elt--) { - const auto __prev = __p; - __p = __p->_M_next(); - const auto& __node = *__p; + const auto __next = __prev->_M_nxt; + const auto& __node = static_cast<__node_type&>(*__next); const key_type& __k = _ExtractKey{}(__node._M_v()); - auto __loc = _M_locate(__k); + const auto __loc = _M_locate(__k); if (__loc) - continue; + { + __prev = __next; + continue; + } - size_type __src_bkt - = __src._M_bucket_index(__src.hash_function()(__k)); + auto __src_bkt = __src._M_bucket_index(__node); auto __nh = __src._M_extract_node(__src_bkt, __prev); _M_insert_unique_node(__loc._M_bucket_index, __loc._M_hash_code, __nh._M_ptr, __first * __n_elt + 1); __nh.release(); __first = 0; - __p = __prev; } } @@ -1311,15 +1317,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__src.size() == 0) [[__unlikely__]] return; + using _PTr = pointer_traits<__node_base_ptr>; + __node_ptr __hint = nullptr; this->reserve(size() + __src.size()); - // For a container of identical type we can use its private members. - auto __prev = static_cast<__node_ptr>(&__src._M_before_begin); + // For a container of identical type we can use its private members, + // __src._M_before_begin, __src._M_bucket_index etc. + auto __prev = _PTr::pointer_to(__src._M_before_begin); do { - const auto& __node = *__prev->_M_next(); + const auto& __node = static_cast<__node_type&>(*__prev->_M_nxt); const key_type& __k = _ExtractKey{}(__node._M_v()); - __hash_code __code = this->_M_hash_code(__k); + // Hash code from this->hash_function(): + auto __code = _M_src_hash_code(__src.hash_function(), __k, __node); + // Bucket index in __src, using code from __src.hash_function(): size_type __src_bkt = __src._M_bucket_index(__node); auto __nh = __src._M_extract_node(__src_bkt, __prev); __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur; -- GitLab