From ee50c66126d56d4b456046ed533f90abb0075363 Mon Sep 17 00:00:00 2001 From: quou Date: Sun, 29 Dec 2024 18:24:57 +1100 Subject: vram allocator --- todo.txt | 3 +- video.cpp | 302 ++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 257 insertions(+), 48 deletions(-) diff --git a/todo.txt b/todo.txt index a2f0f56..4d9ea63 100644 --- a/todo.txt +++ b/todo.txt @@ -12,8 +12,9 @@ todo list - [x] render a model - [x] depth buffer - [x] seperate framebuffers and renderpasses - - [ ] VRAM allocator + - [x] VRAM allocator - [x] don't re-bind pipelines and descriptors if not necessary + - [ ] memory debug tool - [ ] material system - [ ] render a sky box - [ ] PBR + IBL diff --git a/video.cpp b/video.cpp index f244e57..9e9c810 100644 --- a/video.cpp +++ b/video.cpp @@ -192,6 +192,65 @@ struct Hash_Map { } }; +struct Vram_Allocator { + static constexpr int size_alignment = (1024 * 1024 * 32); + struct Page; + struct Chunk; + struct Allocation { + VkDeviceMemory mem; + Page* page; + Chunk* chunk; + bool valid() const { return chunk != 0; } + VkDeviceSize offset() const { return chunk->get_offset(); } + void* map(VkDeviceSize off) { + assert(page->mapping != 0); + return (char*)page->mapping + offset() + off; + } + }; + struct Chunk { + VkDeviceSize offset; + VkDeviceSize pad; + VkDeviceSize size; + Chunk* next; + bool free; + + VkDeviceSize get_offset() { + return offset + pad; + } + }; + struct Page { + VkDeviceMemory memory; + VkDeviceSize size; + int type; + Chunk* chunks; + Page* next; + void* mapping; + /* need something better for host-writable, + * non-coherent mappings */ + + void init(Device_Vk* dev, VkDeviceSize size, int type); + void defrag(Device_Vk* dev); + Allocation imp_alloc(Device_Vk* dev, VkDeviceSize size); + Allocation alloc( + Device_Vk* dev, + VkDeviceSize size, + VkDeviceSize align + ); + }; + Page* pages; + Device_Vk* dev; + + void init(Device_Vk* d); + void destroy(); + Allocation alloc( + int type, + VkDeviceSize size, + VkDeviceSize align + ); + void free(Allocation& alloc); +}; + + static VkFormat get_vk_format(Texture_Format fmt) { switch (fmt) { case texture_format_r8i: return VK_FORMAT_R8_UNORM; @@ -510,7 +569,7 @@ struct Context_Vk : public Context { struct Texture_Vk : public Texture, public Late_Terminated { VkImage image; VkImageView view; - VkDeviceMemory memory; + Vram_Allocator::Allocation memory; Resource_State state; void destroy(Device_Vk*) override; @@ -518,8 +577,8 @@ struct Texture_Vk : public Texture, public Late_Terminated { struct Buffer_Vk : public Buffer, public Late_Terminated { VkBuffer buf; - VkDeviceMemory memory; VkDeviceSize size; + Vram_Allocator::Allocation memory; int flags; void init(Device_Vk* dev, int flags, VkDeviceSize size); @@ -861,6 +920,7 @@ struct Device_Vk : public Device { Context_Vk* current_ctx; Shader_Loader shader_loader; Texture_Loader texture_loader; + Vram_Allocator vrama; #ifdef DEBUG VkDebugUtilsMessengerEXT msg; #endif @@ -1266,6 +1326,7 @@ void Device_Vk::init_internal() { #endif surf = app_create_vk_surface(app, inst); create_dev(&swap_cap); + vrama.init(this); gladLoaderLoadVulkan(inst, phys_dev, dev); vkGetDeviceQueue(dev, (uint32_t)queue_index, 0, &queue); terminators = 0; @@ -1329,6 +1390,7 @@ void Device_Vk::deinit_internal() { for (i = 0; i < image_count; i++) { terminators[i].execute(this); } + vrama.destroy(); vkDestroyDevice(dev, &ac); #ifdef DEBUG destroy_dmesg( @@ -3078,30 +3140,18 @@ void Buffer_Vk::init( { VkMemoryPropertyFlags props = get_memory_flags(flags); int mt = dev->find_memory_type(req.memoryTypeBits, props); - VkMemoryAllocateInfo ai{}; - if (mt < 0) { - print("Failed to find a satisfying memory type index.\n"); - pbreak(mt); - } - ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - ai.allocationSize = req.size; - ai.memoryTypeIndex = mt; - r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &memory); - if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) { - print_err("Out of video memory.\n"); - pbreak(r); - } - if (r != VK_SUCCESS) { - print_err("VRAM allocation failed.\n"); - pbreak(r); + memory = dev->vrama.alloc(mt, req.size, req.alignment); + if (!memory.valid()) { + print_err("Failed to allocate memory for buffer.\n"); + pbreak(900); } } - vkBindBufferMemory(dev->dev, buf, memory, 0); + vkBindBufferMemory(dev->dev, buf, memory.mem, memory.offset()); } void Buffer_Vk::destroy(Device_Vk* dev) { vkDestroyBuffer(dev->dev, buf, &dev->ac); - vkFreeMemory(dev->dev, memory, &dev->ac); + dev->vrama.free(memory); dev->buffers.remove(id); } @@ -3124,24 +3174,16 @@ void* Device::map_buffer( size_t offset, size_t size ) { - Device_Vk* dev = (Device_Vk*)this; Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); - void* ptr; - vkMapMemory( - dev->dev, - buf.memory, - (VkDeviceSize)offset, - (VkDeviceSize)size, - 0, - &ptr - ); - return ptr; + (void)size; + return buf.memory.map(offset); } void Device::unmap_buffer(Buffer_Id id) { - Device_Vk* dev = (Device_Vk*)this; + (void)id; +/* Device_Vk* dev = (Device_Vk*)this; Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); - vkUnmapMemory(dev->dev, buf.memory); + vkUnmapMemory(dev->dev, buf.memory.mem);*/ } Buffer& Device::get_buffer(Buffer_Id id) { @@ -3188,25 +3230,22 @@ Texture_Id Device::create_texture( VkMemoryPropertyFlags props = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; int mt = dev->find_memory_type(req.memoryTypeBits, props); - VkMemoryAllocateInfo ai{}; if (mt < 0) { print("Failed to find a satisfying memory type index.\n"); pbreak(mt); } - ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - ai.allocationSize = req.size; - ai.memoryTypeIndex = mt; - r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &tex.memory); - if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) { - print_err("Out of video memory.\n"); - pbreak(r); - } - if (r != VK_SUCCESS) { - print_err("VRAM allocation failed.\n"); - pbreak(r); + tex.memory = dev->vrama.alloc(mt, req.size, req.alignment); + if (!tex.memory.valid()) { + print_err("Failed to allocate memory for texture.\n"); + pbreak(900); } } - vkBindImageMemory(dev->dev, tex.image, tex.memory, 0); + vkBindImageMemory( + dev->dev, + tex.image, + tex.memory.mem, + tex.memory.offset() + ); tex.w = w; tex.h = h; tex.fmt = fmt; @@ -3443,7 +3482,7 @@ void Texture_Loader::unload(Asset* a) { void Texture_Vk::destroy(Device_Vk* dev) { if (!alias) { vkDestroyImage(dev->dev, image, &dev->ac); - vkFreeMemory(dev->dev, memory, &dev->ac); + dev->vrama.free(memory); } vkDestroyImageView(dev->dev, view, &dev->ac); dev->textures.remove(id); @@ -3519,3 +3558,172 @@ void Sampler_Vk::init(Device_Vk* dev, const Sampler_State& s) { void Sampler_Vk::destroy(Device_Vk* dev) { vkDestroySampler(dev->dev, sampler, &dev->ac); } + +void Vram_Allocator::Page::init( + Device_Vk* dev, + VkDeviceSize s, + int t +) { + VkMemoryAllocateInfo ai{}; + Chunk* chunk; + VkResult r; + const auto& props = dev->mem_props.memoryTypes[t]; + ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + ai.allocationSize = s; + ai.memoryTypeIndex = t; + size = s; + type = t; + next = 0; + r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &memory); + if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) { + print_err("Out of VRAM.\n"); + pbreak(r); + } + if (r != VK_SUCCESS) { + print_err("vkAllocateMemory failed.\n"); + pbreak(r); + } + if ( + props.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT || + props.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + ) { + vkMapMemory( + dev->dev, + memory, + 0, + size, + 0, + &mapping + ); + } else + mapping = 0; + chunk = (Chunk*)heap_alloc(dev->heap, sizeof *chunk); + chunk->offset = 0; + chunk->pad = 0; + chunk->size = s; + chunk->next = 0; + chunk->free = true; + chunks = chunk; +} + +Vram_Allocator::Allocation Vram_Allocator::Page::imp_alloc( + Device_Vk* dev, + VkDeviceSize asize +) { + Chunk* chunk; + for (chunk = chunks; chunk; chunk = chunk->next) { + if (chunk->free) { + if (chunk->size == asize) { + chunk->free = false; + return { memory, 0, chunk }; + } else if (chunk->size > asize) { + Chunk* nc = (Chunk*)heap_alloc(dev->heap, sizeof *nc); + nc->offset = chunk->offset + asize; + nc->pad = 0; + nc->size = chunk->size - asize; + nc->next = chunk->next; + nc->free = true; + chunk->next = nc; + chunk->size = asize; + chunk->pad = 0; + chunk->free = false; + return { memory, 0, chunk }; + } + } + } + return { 0, 0, 0 }; +} + +void Vram_Allocator::Page::defrag(Device_Vk* dev) { + Chunk* chunk; + for (chunk = chunks; chunk;) { + if (chunk->free) { + Chunk* end = chunk->next; + VkDeviceSize csize = chunk->size; + for (; end && end->free;) { + Chunk* next = end->next; + csize += end->size; + heap_free(dev->heap, end); + end = next; + } + chunk->next = end; + chunk->size = csize; + if (end) { + chunk = end->next; + } else chunk = 0; + } else + chunk = chunk->next; + } +} + +Vram_Allocator::Allocation Vram_Allocator::Page::alloc( + Device_Vk* dev, + VkDeviceSize asize, + VkDeviceSize align +) { + VkDeviceSize as = asize + align; + VkDeviceSize al; + Allocation a = imp_alloc(dev, as); + if (!a.chunk) { + defrag(dev); + a = imp_alloc(dev, as); + } + if (!a.chunk) return a; + al = align_address((uintptr_t)a.chunk->offset, (size_t)align); + a.chunk->pad = al - a.chunk->offset; + return a; +} + +void Vram_Allocator::init(Device_Vk* d) { + pages = 0; + dev = d; +} + +void Vram_Allocator::destroy() { + Page* page = pages; + for (; page; page = page->next) { + Chunk* chunk = page->chunks; + if (page->mapping) + vkUnmapMemory(dev->dev, page->memory); + vkFreeMemory(dev->dev, page->memory, &dev->ac); + for (; chunk; chunk = chunk->next) + heap_free(dev->heap, chunk); + heap_free(dev->heap, page); + } +} + +Vram_Allocator::Allocation Vram_Allocator::alloc( + int type, + VkDeviceSize size, + VkDeviceSize align +) { + Page* page = pages; + for (; page; page = page->next) { + if (page->type == type) { + auto a = page->alloc(dev, size, align); + if (a.chunk) { + a.page = page; + return a; + } + } + } + page = (Page*)heap_alloc(dev->heap, sizeof *page); + page->init( + dev, + (VkDeviceSize)align_address( + (uintptr_t)size, + (size_t)size_alignment + ), + type + ); + page->next = pages; + pages = page; + auto a = page->alloc(dev, size, align); + if (a.chunk) + a.page = page; + return a; +} + +void Vram_Allocator::free(Allocation& alloc) { + alloc.chunk->free = true; +} -- cgit v1.2.3-54-g00ecf