#pragma once #include "heap_allocator.h" #include "page_allocator.h" #include "Internal/chunked_allocator.inl.h" namespace baselib { BASELIB_CPP_INTERFACE { // chunked_allocator_flags // Behaviour flags used with `chunked_allocator` parameter `flags`. // enum chunked_allocator_flags { // Disable page based allocation for base allocator. Allocate/deallocate invocation per block instead of page state modification. chunked_allocator_flags_paged_base_allocator_disable = detail::chunked_allocator_flags_paged_base_allocator_disable, // Evict (deallocate) expired blocks, i.e. when all allocations within a block have been deallocated (Note: reserved blocks does not expire). chunked_allocator_flags_evict_expired_blocks = detail::chunked_allocator_flags_evict_expired_blocks, // Disable clamping zero size to 1. Optional optimization if requested size of an re/allocation is guaranteed to be non-zero. chunked_allocator_flags_clamp_zero_size_disable = detail::chunked_allocator_flags_clamp_zero_size_disable }; // chunked_allocator // Lockless chunked memory allocator capable of handling multiple concurrent allocations and deallocations (unless otherwise stated). // The allocator internally contain blocks of data, who's size and max number can be set when creating the allocator. // Allocating will bump an offset in the currently active block. When an allocation no longer fits in the current block, a new block is selected. // When all allocations within a block memory range have been deallocated by the user, the block becomes expired and is available for allocation again. // The exception for this is when the `chunked_allocator_flags_evict_expired_blocks` is set in which case the block is deallocated and must be allocated // if required to be used again (i.e. a tradeoff between performance and memory footprint). // Note that blocks that have been reserved can only be deallocated by invoking `deallocated_all`. // // Allocating memory is lockless with the cost of O(1) except when the current active block capacity is exhausted and // swapping to a new block is required, in which case a lock is taken to prevent greedy allocation. // When a new block is required, it is allocated if there are no expired blocks available. // // // Notes on size and alignment: // // - Alignment is defined by the alignment of `Allocator`. An alignment of 1 will optimise away alignment instructions when compiler optimisation is // enabled i.e. an alignment size of 1 should be used if an inherited allocator is responsible for alignment. // // - Zero size allocations are by default allowed and will return a unique memory address. If size is guaranteed to be non-zero, this behaviour can be // disabled saving the extra instructions (typically shr-add or sub-shr-add) using the `chunked_allocator_flags_clamp_zero_size_disable` flag. // // Notes on performance/memory requirements: // // - Allocating and deallocating synchronisation are decoupled (no false sharing). Allocating does in terms of concurrency execute only one relaxed atomic // fetch_add operation when the allocator isn't exhausted. Deallocating will always emit a release barrier. // // - Except potential alignment, no memory overhead per allocation (no header information). // // - Max block count does not have any more memory overhead unless block memory is actually required (allocated). // // - Allocation only increment blocks internal offset when allocating. Allocating large blocks may lead to "holes" at the end of a block if they do not // fit into the remaining block free space. Be careful to balance block size to frequent allocations of large sizes (use allocator composition to amend). // // - Reallocating will require a new allocation unless the new size is less than the current size or fit within the alignment padding of the old allocation. // If a new memory alloction is required, a memory copy required and the old allocation space is lost until the block is expired. // // - allocate, reallocate and deallocate provides indexed alternatives, which can perform better when using many blocks and the flag // chunked_allocator_flags_paged_base_allocator_disable. It has no effect on paged (default) base allocators which does not gain from this optimisation. // Note that the `owns` method does not provide and indexed alternative and has the max cost of O(max_block_count) as it will need to check // if the memory address is within range of any active buffer. // // Example use-cases: // When utilizing deallocation, allocations with a defined life-time over a certain time span (frames), such as render queues, input data or task processing. // When not utilizing deallocation (reset by using `deallocate_all`), allocations that has equally defined lifetime such as scoped resource loading. // // Parameters: // block_size - size of a block used for allocating a chunk of memory. Must be a pow2 value. Must be zero if constructor alternative is used. // max_block_count - max number of blocks used. Must be in the range 2-64. Must be zero if constructor alternative is used. // Allocator - backing (base) memory allocator. Defaults to baselib `page_allocator`. Alignment of allocations are inherited from this allocator. // flags - behavior flags (see `chunked_allocator_flags`). Default is the value zero, no flags. // concurrent_access_capacity - max amount of concurrent access to `allocate` or `reallocate`. Must be a pow2 value. Defaults to 1024. // Concurrent access above this limit is not allowed and may corrupt the allocator. // max_block_size_factor - size factor of `block_size` to which the allocator can grow blocks for allocations of size larger than `block_size`. // Must be a pow2 value in the range 1-128. Defaults to 1. // template<size_t block_size, size_t max_block_count, class Allocator = baselib::page_allocator<8, baselib::Memory_PageState_Reserved>, uint32_t flags = 0, uint32_t concurrent_access_capacity = 1024, uint32_t max_block_size_factor = 1> class chunked_allocator : protected detail::chunked_allocator<Allocator, flags, concurrent_access_capacity, max_block_size_factor> { friend class detail::chunked_allocator_stats; using Impl = detail::chunked_allocator<Allocator, flags, concurrent_access_capacity, max_block_size_factor>; static constexpr bool ctorConfig = (block_size == 0 && max_block_count == 0); static_assert((block_size == 0) == (max_block_count == 0), "block_size and max_block_count must both be zero or non-zero"); static_assert(baselib::Algorithm::IsPowerOfTwo(block_size), "block_size must be a pow2 value"); static_assert(ctorConfig ? true : Allocator::alignment <= block_size, "Allocator::alignment must be less or equal to block_size"); static_assert(Allocator::alignment != 0, "Allocator::alignment must not be zero"); static_assert(baselib::Algorithm::IsPowerOfTwo(Allocator::alignment), "Allocator::alignment must be a pow2 value"); static_assert(max_block_size_factor != 0, "max_block_size_factor must not be zero"); static_assert(max_block_size_factor <= 128, "max_block_size_factor must be less or equal to 128"); static_assert(baselib::Algorithm::IsPowerOfTwo(max_block_size_factor), "max_block_size_factor must be a pow2 value"); static_assert(concurrent_access_capacity != 0, "concurrent_access_capacity must not be zero"); static_assert(baselib::Algorithm::IsPowerOfTwo(concurrent_access_capacity), "concurrent_access_capacity must be a pow2 value"); public: // non-copyable chunked_allocator(const chunked_allocator& other) = delete; chunked_allocator& operator=(const chunked_allocator& other) = delete; // non-movable (strictly speaking not needed but listed to signal intent) chunked_allocator(chunked_allocator && other) = delete; chunked_allocator& operator=(chunked_allocator&& other) = delete; // Allocated memory is guaranteed to always be aligned to at least the value of `alignment`. static constexpr uint32_t alignment = Impl::alignment; // Creates a new instance. `args` are optional parameters forwarded to the `Allocator` constructor. template<class ... Args, bool value = ctorConfig, typename std::enable_if<(!value), bool>::type = 0> chunked_allocator(Args&& ... args) : Impl(block_size, max_block_count, std::forward<Args>(args)...) { atomic_thread_fence(memory_order_seq_cst); } // Creates a new instance using run-time constructor parameters for block size and count. The same restrictions on parameter values apply. // Template parameters `block_size` and `max_block_count` must be both zero when this constructor is used. // `args` are optional parameters forwarded to the `Allocator` constructor. template<class ... Args, bool value = ctorConfig, typename std::enable_if<(value), bool>::type = 0> chunked_allocator(size_t blockSize, size_t blockCount, Args&& ... args) : Impl(blockSize, blockCount, std::forward<Args>(args)...) { BaselibAssert(blockSize != 0); BaselibAssert(blockCount != 0); BaselibAssert(baselib::Algorithm::IsPowerOfTwo(blockSize)); atomic_thread_fence(memory_order_seq_cst); } // Destroy allocator, deallocates any memory allocated. // // If there are other threads currently accessing the allocator behavior is undefined. ~chunked_allocator() {} // Allocates a memory block large enough to hold `size` number of bytes. // `size` must less or equal to `block_size` multiplied by `max_block_size_factor`. // // \returns Address to memory block of allocated memory or nullptr if failed. void* allocate(size_t size) { return Impl::allocate(size); } // Allocates a memory block large enough to hold `size` number of bytes. // `size` must less or equal to `block_size` multiplied by `max_block_size_factor`. // If operation is successful `block_index` contains the internal block index of the allocation, to be used with subsequent indexed method calls. // // \returns Address to memory block of allocated memory or nullptr if failed. void* allocate(size_t size, uint32_t &block_index) { return Impl::allocate(size, block_index); } // Reallocates previously allocated or reallocated memory pointed to by `ptr` from `old_size` to `new_size` number of bytes. // `new_size` must be less or equal to `block_size` multiplied by `max_block_size_factor`. // Passing `nullptr` in `ptr` yield the same result as calling `allocate`. // // \returns Address to memory block of reallocated memory or nullptr if failed. void* reallocate(void* ptr, size_t old_size, size_t new_size) { return Impl::reallocate(ptr, old_size, new_size); } // Reallocates previously allocated or reallocated memory pointed to by `ptr` from `old_size` to `new_size` number of bytes. // `new_size` must be less or equal to `block_size` multiplied by `max_block_size_factor`. // `block_index` is the internal block index resulting from a previous call to `allocate` or `reallocate`. // If operation is successful `new_block_index` contains the internal block index of the reallocated memory. // Passing `nullptr` in `ptr` yield the same result as calling `allocate`. // // \returns Address to memory block of reallocated memory or nullptr if failed. void* reallocate(void* ptr, size_t old_size, size_t new_size, uint32_t block_index, uint32_t& new_block_index) { return Impl::reallocate(ptr, old_size, new_size, block_index, new_block_index); } // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`. // Passing nullptr in `ptr` result in a no-op. // // \returns Always returns true unless `ptr` is nullptr. bool deallocate(void* ptr, size_t size) { return Impl::deallocate(ptr, size); } // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`. // `block_index` is the internal block index resulting from a previous call to `allocate` or `reallocate` using block index methods. // Passing nullptr in `ptr` result in a no-op. // This function is guaranteed to emit an release barrier. // // \returns Always returns true unless `ptr` is nullptr. bool deallocate(void* ptr, size_t size, uint32_t block_index) { return Impl::deallocate(ptr, size, block_index); } // Release all resources and set capacity to zero // // Calling this function invalidates any currently allocated memory // If there are other threads currently accessing the allocator behavior is undefined. void deallocate_all() { Impl::deallocate_all(); } // Request that the allocator capacity be at least enough to contain `capacity`. // // If `capacity` is less or equal to current capacity, the capacity is not affected. // Note that internally, `capacity` is rounded up to `block_size` which in turn is aligned to optimal allocation size of `Allocator`. // // \returns true if successful. bool reserve(size_t capacity) { return Impl::reserve(capacity); } // Get the current capacity. size_t capacity() const { return Impl::capacity(); } // Calculate optimal allocation size given `size`. // // \returns Optimal size when allocating memory given `size` or zero if `size` is larger than `block_size` multiplied by `max_block_size_factor`. static constexpr size_t optimal_size(size_t size) { return Impl::optimal_size(size); } // Checks for the ownership allocation given `ptr` and `size` // If `size` is valid `ptr` is checked to be in range of allocator memory pool. // Note that this function is not O(1) if the `chunked_allocator_flags_paged_base_allocator_disable` is used. See header documentation for details. // // \returns True if the allocator owns the allocation. bool owns(const void *ptr, size_t size) const { return Impl::owns(ptr, size); } }; // chunked_allocator_stats // Retrieve current state of an allocator. // class chunked_allocator_stats : protected detail::chunked_allocator_stats { using Impl = detail::chunked_allocator_stats; public: // Output data structure used by `block_stats`. struct block_stat : Impl::block_stat { FORCE_INLINE void* memory() { return m_Memory; } // Block memory address FORCE_INLINE size_t size() { return m_Size; } // Allocated bytes (including allocation alignment padding) FORCE_INLINE size_t capacity() { return m_Capacity; } // Capacity of block FORCE_INLINE uint8_t index() { return m_Index; } // Internal index of block (range zero to max block count of allocator) FORCE_INLINE uint8_t generation_id() { return m_GenerationId; } // Generation id of the block (see `set_block_generation_id`) }; // Retrieve an array current state of each active block in `allocator` into `block_stat` given `block_stats_flags`. `block_stat` must be at // least the size of the allocator max block count (64). // This is a lock-less operation, internally invoking atomic operations. // If there are other threads currently accessing the allocator the results of `block_stat` memory, size and capacity functions are approximations. // This method is intended for memory profiling, debugging and statistics. // // \returns Number of active blocks, i.e. the valid size of `block_stat`. template<class Allocator> static uint32_t block_stats(const Allocator& allocator, block_stat block_stat[]) { return Impl::block_stats(allocator, block_stat); } // Set the generation id for currently active and subsequent blocks used by `allocator`. // Default (initial) value is zero. Valid range is 0-255. template<class Allocator> static void set_block_generation_id(Allocator& allocator, uint8_t generationId) { Impl::set_block_generation_id(allocator, generationId); } }; } }