chunked_allocator.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. #pragma once
  2. #include "heap_allocator.h"
  3. #include "page_allocator.h"
  4. #include "Internal/chunked_allocator.inl.h"
  5. namespace baselib
  6. {
  7. BASELIB_CPP_INTERFACE
  8. {
  9. // chunked_allocator_flags
  10. // Behaviour flags used with `chunked_allocator` parameter `flags`.
  11. //
  12. enum chunked_allocator_flags
  13. {
  14. // Disable page based allocation for base allocator. Allocate/deallocate invocation per block instead of page state modification.
  15. chunked_allocator_flags_paged_base_allocator_disable = detail::chunked_allocator_flags_paged_base_allocator_disable,
  16. // Evict (deallocate) expired blocks, i.e. when all allocations within a block have been deallocated (Note: reserved blocks does not expire).
  17. chunked_allocator_flags_evict_expired_blocks = detail::chunked_allocator_flags_evict_expired_blocks,
  18. // Disable clamping zero size to 1. Optional optimization if requested size of an re/allocation is guaranteed to be non-zero.
  19. chunked_allocator_flags_clamp_zero_size_disable = detail::chunked_allocator_flags_clamp_zero_size_disable
  20. };
  21. // chunked_allocator
  22. // Lockless chunked memory allocator capable of handling multiple concurrent allocations and deallocations (unless otherwise stated).
  23. // The allocator internally contain blocks of data, who's size and max number can be set when creating the allocator.
  24. // Allocating will bump an offset in the currently active block. When an allocation no longer fits in the current block, a new block is selected.
  25. // When all allocations within a block memory range have been deallocated by the user, the block becomes expired and is available for allocation again.
  26. // The exception for this is when the `chunked_allocator_flags_evict_expired_blocks` is set in which case the block is deallocated and must be allocated
  27. // if required to be used again (i.e. a tradeoff between performance and memory footprint).
  28. // Note that blocks that have been reserved can only be deallocated by invoking `deallocated_all`.
  29. //
  30. // Allocating memory is lockless with the cost of O(1) except when the current active block capacity is exhausted and
  31. // swapping to a new block is required, in which case a lock is taken to prevent greedy allocation.
  32. // When a new block is required, it is allocated if there are no expired blocks available.
  33. //
  34. //
  35. // Notes on size and alignment:
  36. //
  37. // - Alignment is defined by the alignment of `Allocator`. An alignment of 1 will optimise away alignment instructions when compiler optimisation is
  38. // enabled i.e. an alignment size of 1 should be used if an inherited allocator is responsible for alignment.
  39. //
  40. // - Zero size allocations are by default allowed and will return a unique memory address. If size is guaranteed to be non-zero, this behaviour can be
  41. // disabled saving the extra instructions (typically shr-add or sub-shr-add) using the `chunked_allocator_flags_clamp_zero_size_disable` flag.
  42. //
  43. // Notes on performance/memory requirements:
  44. //
  45. // - Allocating and deallocating synchronisation are decoupled (no false sharing). Allocating does in terms of concurrency execute only one relaxed atomic
  46. // fetch_add operation when the allocator isn't exhausted. Deallocating will always emit a release barrier.
  47. //
  48. // - Except potential alignment, no memory overhead per allocation (no header information).
  49. //
  50. // - Max block count does not have any more memory overhead unless block memory is actually required (allocated).
  51. //
  52. // - Allocation only increment blocks internal offset when allocating. Allocating large blocks may lead to "holes" at the end of a block if they do not
  53. // fit into the remaining block free space. Be careful to balance block size to frequent allocations of large sizes (use allocator composition to amend).
  54. //
  55. // - Reallocating will require a new allocation unless the new size is less than the current size or fit within the alignment padding of the old allocation.
  56. // If a new memory alloction is required, a memory copy required and the old allocation space is lost until the block is expired.
  57. //
  58. // - allocate, reallocate and deallocate provides indexed alternatives, which can perform better when using many blocks and the flag
  59. // chunked_allocator_flags_paged_base_allocator_disable. It has no effect on paged (default) base allocators which does not gain from this optimisation.
  60. // Note that the `owns` method does not provide and indexed alternative and has the max cost of O(max_block_count) as it will need to check
  61. // if the memory address is within range of any active buffer.
  62. //
  63. // Example use-cases:
  64. // When utilizing deallocation, allocations with a defined life-time over a certain time span (frames), such as render queues, input data or task processing.
  65. // When not utilizing deallocation (reset by using `deallocate_all`), allocations that has equally defined lifetime such as scoped resource loading.
  66. //
  67. // Parameters:
  68. // block_size - size of a block used for allocating a chunk of memory. Must be a pow2 value. Must be zero if constructor alternative is used.
  69. // max_block_count - max number of blocks used. Must be in the range 2-64. Must be zero if constructor alternative is used.
  70. // Allocator - backing (base) memory allocator. Defaults to baselib `page_allocator`. Alignment of allocations are inherited from this allocator.
  71. // flags - behavior flags (see `chunked_allocator_flags`). Default is the value zero, no flags.
  72. // concurrent_access_capacity - max amount of concurrent access to `allocate` or `reallocate`. Must be a pow2 value. Defaults to 1024.
  73. // Concurrent access above this limit is not allowed and may corrupt the allocator.
  74. // max_block_size_factor - size factor of `block_size` to which the allocator can grow blocks for allocations of size larger than `block_size`.
  75. // Must be a pow2 value in the range 1-128. Defaults to 1.
  76. //
  77. template<size_t block_size, size_t max_block_count, class Allocator = baselib::page_allocator<8, baselib::Memory_PageState_Reserved>, uint32_t flags = 0, uint32_t concurrent_access_capacity = 1024, uint32_t max_block_size_factor = 1>
  78. class chunked_allocator : protected detail::chunked_allocator<Allocator, flags, concurrent_access_capacity, max_block_size_factor>
  79. {
  80. friend class detail::chunked_allocator_stats;
  81. using Impl = detail::chunked_allocator<Allocator, flags, concurrent_access_capacity, max_block_size_factor>;
  82. static constexpr bool ctorConfig = (block_size == 0 && max_block_count == 0);
  83. static_assert((block_size == 0) == (max_block_count == 0), "block_size and max_block_count must both be zero or non-zero");
  84. static_assert(baselib::Algorithm::IsPowerOfTwo(block_size), "block_size must be a pow2 value");
  85. static_assert(ctorConfig ? true : Allocator::alignment <= block_size, "Allocator::alignment must be less or equal to block_size");
  86. static_assert(Allocator::alignment != 0, "Allocator::alignment must not be zero");
  87. static_assert(baselib::Algorithm::IsPowerOfTwo(Allocator::alignment), "Allocator::alignment must be a pow2 value");
  88. static_assert(max_block_size_factor != 0, "max_block_size_factor must not be zero");
  89. static_assert(max_block_size_factor <= 128, "max_block_size_factor must be less or equal to 128");
  90. static_assert(baselib::Algorithm::IsPowerOfTwo(max_block_size_factor), "max_block_size_factor must be a pow2 value");
  91. static_assert(concurrent_access_capacity != 0, "concurrent_access_capacity must not be zero");
  92. static_assert(baselib::Algorithm::IsPowerOfTwo(concurrent_access_capacity), "concurrent_access_capacity must be a pow2 value");
  93. public:
  94. // non-copyable
  95. chunked_allocator(const chunked_allocator& other) = delete;
  96. chunked_allocator& operator=(const chunked_allocator& other) = delete;
  97. // non-movable (strictly speaking not needed but listed to signal intent)
  98. chunked_allocator(chunked_allocator && other) = delete;
  99. chunked_allocator& operator=(chunked_allocator&& other) = delete;
  100. // Allocated memory is guaranteed to always be aligned to at least the value of `alignment`.
  101. static constexpr uint32_t alignment = Impl::alignment;
  102. // Creates a new instance. `args` are optional parameters forwarded to the `Allocator` constructor.
  103. template<class ... Args, bool value = ctorConfig, typename std::enable_if<(!value), bool>::type = 0>
  104. chunked_allocator(Args&& ... args) : Impl(block_size, max_block_count, std::forward<Args>(args)...)
  105. {
  106. atomic_thread_fence(memory_order_seq_cst);
  107. }
  108. // Creates a new instance using run-time constructor parameters for block size and count. The same restrictions on parameter values apply.
  109. // Template parameters `block_size` and `max_block_count` must be both zero when this constructor is used.
  110. // `args` are optional parameters forwarded to the `Allocator` constructor.
  111. template<class ... Args, bool value = ctorConfig, typename std::enable_if<(value), bool>::type = 0>
  112. chunked_allocator(size_t blockSize, size_t blockCount, Args&& ... args) : Impl(blockSize, blockCount, std::forward<Args>(args)...)
  113. {
  114. BaselibAssert(blockSize != 0);
  115. BaselibAssert(blockCount != 0);
  116. BaselibAssert(baselib::Algorithm::IsPowerOfTwo(blockSize));
  117. atomic_thread_fence(memory_order_seq_cst);
  118. }
  119. // Destroy allocator, deallocates any memory allocated.
  120. //
  121. // If there are other threads currently accessing the allocator behavior is undefined.
  122. ~chunked_allocator() {}
  123. // Allocates a memory block large enough to hold `size` number of bytes.
  124. // `size` must less or equal to `block_size` multiplied by `max_block_size_factor`.
  125. //
  126. // \returns Address to memory block of allocated memory or nullptr if failed.
  127. void* allocate(size_t size)
  128. {
  129. return Impl::allocate(size);
  130. }
  131. // Allocates a memory block large enough to hold `size` number of bytes.
  132. // `size` must less or equal to `block_size` multiplied by `max_block_size_factor`.
  133. // If operation is successful `block_index` contains the internal block index of the allocation, to be used with subsequent indexed method calls.
  134. //
  135. // \returns Address to memory block of allocated memory or nullptr if failed.
  136. void* allocate(size_t size, uint32_t &block_index)
  137. {
  138. return Impl::allocate(size, block_index);
  139. }
  140. // Reallocates previously allocated or reallocated memory pointed to by `ptr` from `old_size` to `new_size` number of bytes.
  141. // `new_size` must be less or equal to `block_size` multiplied by `max_block_size_factor`.
  142. // Passing `nullptr` in `ptr` yield the same result as calling `allocate`.
  143. //
  144. // \returns Address to memory block of reallocated memory or nullptr if failed.
  145. void* reallocate(void* ptr, size_t old_size, size_t new_size)
  146. {
  147. return Impl::reallocate(ptr, old_size, new_size);
  148. }
  149. // Reallocates previously allocated or reallocated memory pointed to by `ptr` from `old_size` to `new_size` number of bytes.
  150. // `new_size` must be less or equal to `block_size` multiplied by `max_block_size_factor`.
  151. // `block_index` is the internal block index resulting from a previous call to `allocate` or `reallocate`.
  152. // If operation is successful `new_block_index` contains the internal block index of the reallocated memory.
  153. // Passing `nullptr` in `ptr` yield the same result as calling `allocate`.
  154. //
  155. // \returns Address to memory block of reallocated memory or nullptr if failed.
  156. void* reallocate(void* ptr, size_t old_size, size_t new_size, uint32_t block_index, uint32_t& new_block_index)
  157. {
  158. return Impl::reallocate(ptr, old_size, new_size, block_index, new_block_index);
  159. }
  160. // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
  161. // Passing nullptr in `ptr` result in a no-op.
  162. //
  163. // \returns Always returns true unless `ptr` is nullptr.
  164. bool deallocate(void* ptr, size_t size)
  165. {
  166. return Impl::deallocate(ptr, size);
  167. }
  168. // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
  169. // `block_index` is the internal block index resulting from a previous call to `allocate` or `reallocate` using block index methods.
  170. // Passing nullptr in `ptr` result in a no-op.
  171. // This function is guaranteed to emit an release barrier.
  172. //
  173. // \returns Always returns true unless `ptr` is nullptr.
  174. bool deallocate(void* ptr, size_t size, uint32_t block_index)
  175. {
  176. return Impl::deallocate(ptr, size, block_index);
  177. }
  178. // Release all resources and set capacity to zero
  179. //
  180. // Calling this function invalidates any currently allocated memory
  181. // If there are other threads currently accessing the allocator behavior is undefined.
  182. void deallocate_all()
  183. {
  184. Impl::deallocate_all();
  185. }
  186. // Request that the allocator capacity be at least enough to contain `capacity`.
  187. //
  188. // If `capacity` is less or equal to current capacity, the capacity is not affected.
  189. // Note that internally, `capacity` is rounded up to `block_size` which in turn is aligned to optimal allocation size of `Allocator`.
  190. //
  191. // \returns true if successful.
  192. bool reserve(size_t capacity)
  193. {
  194. return Impl::reserve(capacity);
  195. }
  196. // Get the current capacity.
  197. size_t capacity() const
  198. {
  199. return Impl::capacity();
  200. }
  201. // Calculate optimal allocation size given `size`.
  202. //
  203. // \returns Optimal size when allocating memory given `size` or zero if `size` is larger than `block_size` multiplied by `max_block_size_factor`.
  204. static constexpr size_t optimal_size(size_t size)
  205. {
  206. return Impl::optimal_size(size);
  207. }
  208. // Checks for the ownership allocation given `ptr` and `size`
  209. // If `size` is valid `ptr` is checked to be in range of allocator memory pool.
  210. // Note that this function is not O(1) if the `chunked_allocator_flags_paged_base_allocator_disable` is used. See header documentation for details.
  211. //
  212. // \returns True if the allocator owns the allocation.
  213. bool owns(const void *ptr, size_t size) const
  214. {
  215. return Impl::owns(ptr, size);
  216. }
  217. };
  218. // chunked_allocator_stats
  219. // Retrieve current state of an allocator.
  220. //
  221. class chunked_allocator_stats : protected detail::chunked_allocator_stats
  222. {
  223. using Impl = detail::chunked_allocator_stats;
  224. public:
  225. // Output data structure used by `block_stats`.
  226. struct block_stat : Impl::block_stat
  227. {
  228. FORCE_INLINE void* memory() { return m_Memory; } // Block memory address
  229. FORCE_INLINE size_t size() { return m_Size; } // Allocated bytes (including allocation alignment padding)
  230. FORCE_INLINE size_t capacity() { return m_Capacity; } // Capacity of block
  231. FORCE_INLINE uint8_t index() { return m_Index; } // Internal index of block (range zero to max block count of allocator)
  232. FORCE_INLINE uint8_t generation_id() { return m_GenerationId; } // Generation id of the block (see `set_block_generation_id`)
  233. };
  234. // Retrieve an array current state of each active block in `allocator` into `block_stat` given `block_stats_flags`. `block_stat` must be at
  235. // least the size of the allocator max block count (64).
  236. // This is a lock-less operation, internally invoking atomic operations.
  237. // If there are other threads currently accessing the allocator the results of `block_stat` memory, size and capacity functions are approximations.
  238. // This method is intended for memory profiling, debugging and statistics.
  239. //
  240. // \returns Number of active blocks, i.e. the valid size of `block_stat`.
  241. template<class Allocator>
  242. static uint32_t block_stats(const Allocator& allocator, block_stat block_stat[]) { return Impl::block_stats(allocator, block_stat); }
  243. // Set the generation id for currently active and subsequent blocks used by `allocator`.
  244. // Default (initial) value is zero. Valid range is 0-255.
  245. template<class Allocator>
  246. static void set_block_generation_id(Allocator& allocator, uint8_t generationId) { Impl::set_block_generation_id(allocator, generationId); }
  247. };
  248. }
  249. }