19 #include <rmm/detail/aligned.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/logging_assert.hpp>
22 #include <rmm/mr/device/detail/fixed_size_free_list.hpp>
23 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
25 #include <rmm/detail/thrust_namespace.h>
26 #include <thrust/iterator/counting_iterator.h>
27 #include <thrust/iterator/transform_iterator.h>
29 #include <cuda_runtime_api.h>
50 template <
typename Upstream>
52 :
public detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
53 detail::fixed_size_free_list> {
56 detail::fixed_size_free_list>;
62 static constexpr std::size_t default_blocks_to_preallocate = 128;
75 explicit fixed_size_memory_resource(
76 Upstream* upstream_mr,
77 std::size_t block_size = default_block_size,
78 std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
79 : upstream_mr_{upstream_mr},
80 block_size_{rmm::detail::align_up(block_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT)},
81 upstream_chunk_size_{block_size * blocks_to_preallocate}
84 this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
91 ~fixed_size_memory_resource() override { release(); }
93 fixed_size_memory_resource() = delete;
94 fixed_size_memory_resource(fixed_size_memory_resource const&) = delete;
95 fixed_size_memory_resource(fixed_size_memory_resource&&) = delete;
96 fixed_size_memory_resource& operator=(fixed_size_memory_resource const&) = delete;
97 fixed_size_memory_resource& operator=(fixed_size_memory_resource&&) = delete;
105 [[nodiscard]] bool supports_streams() const noexcept override { return true; }
112 [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
119 Upstream* get_upstream() const noexcept { return upstream_mr_; }
126 [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; }
129 using free_list = detail::fixed_size_free_list;
130 using block_type = free_list::block_type;
131 using typename detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
132 detail::fixed_size_free_list>::split_block;
133 using lock_guard = std::lock_guard<std::mutex>;
141 [[nodiscard]] std::size_t get_maximum_allocation_size() const { return get_block_size(); }
154 block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
156 blocks.insert(std::move(blocks_from_upstream(stream)));
157 return blocks.get_block(size);
166 free_list blocks_from_upstream(cuda_stream_view stream)
168 void* ptr = get_upstream()->allocate(upstream_chunk_size_, stream);
170 upstream_blocks_.push_back(block);
172 auto num_blocks = upstream_chunk_size_ / block_size_;
174 auto block_gen = [ptr,
this](
int index) {
176 return block_type{
static_cast<char*
>(ptr) + index * block_size_};
179 thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), block_gen);
180 return free_list(first, first + num_blocks);
210 RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <=
226 return std::make_pair(0, 0);
237 for (
auto block : upstream_blocks_) {
238 get_upstream()->deallocate(block.pointer(), upstream_chunk_size_);
240 upstream_blocks_.clear();
243 #ifdef RMM_DEBUG_PRINT
246 lock_guard lock(this->get_mutex());
248 auto const [free, total] = get_upstream()->get_mem_info(rmm::cuda_stream_default);
249 std::cout <<
"GPU free memory: " << free <<
" total: " << total <<
"\n";
251 std::cout <<
"upstream_blocks: " << upstream_blocks_.size() <<
"\n";
252 std::size_t upstream_total{0};
254 for (
auto blocks : upstream_blocks_) {
256 upstream_total += upstream_chunk_size_;
258 std::cout <<
"total upstream: " << upstream_total <<
" B\n";
260 this->print_free_blocks();
274 return blocks.is_empty() ? std::make_pair(std::size_t{0}, std::size_t{0})
275 : std::make_pair(block_size_, blocks.size() * block_size_);
279 Upstream* upstream_mr_;
281 std::size_t
const block_size_;
282 std::size_t
const upstream_chunk_size_;
285 std::vector<block_type> upstream_blocks_;
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
A device_memory_resource which allocates memory blocks of a single fixed size.
Definition: fixed_size_memory_resource.hpp:53
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: fixed_size_memory_resource.hpp:272
static constexpr std::size_t default_block_size
Default allocation block size.
Definition: fixed_size_memory_resource.hpp:58
std::pair< std::size_t, std::size_t > do_get_mem_info([[maybe_unused]] cuda_stream_view stream) const override
Get free and available memory for memory resource.
Definition: fixed_size_memory_resource.hpp:223
detail::fixed_size_free_list free_list
The free list type.
Definition: fixed_size_memory_resource.hpp:129
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer.
Definition: fixed_size_memory_resource.hpp:206
free_list::block_type block_type
The type of block managed by the free list.
Definition: fixed_size_memory_resource.hpp:130
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: fixed_size_memory_resource.hpp:133
void release()
free all memory allocated using the upstream resource.
Definition: fixed_size_memory_resource.hpp:233
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: fixed_size_memory_resource.hpp:193