From b2ece42822225fd67a1286a2ef51f7b76c634255 Mon Sep 17 00:00:00 2001 From: quou Date: Thu, 26 Dec 2024 16:11:33 +1100 Subject: seperate descriptor sets from pipelines to allow swapping out shader resources without recreating pipelines --- Makefile | 5 +- c2.cpp | 7 +- intermediate/kita.bmp | Bin 0 -> 1758538 bytes pipeline.cpp | 51 ++++++--- qstd/str.c | 9 ++ qstd/str.h | 1 + video.cpp | 302 ++++++++++++++++++++++++++++++++------------------ video.hpp | 60 ++++++++-- 8 files changed, 299 insertions(+), 136 deletions(-) create mode 100644 intermediate/kita.bmp diff --git a/Makefile b/Makefile index d56363b..ac6149c 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ target = c2 data_dir = data shaders = $(data_dir)/triangle.csh -textures = $(data_dir)/22.tex +textures = $(data_dir)/22.tex $(data_dir)/kita.tex packed_files = $(shaders) $(textures) tools = qstd cfg sc objects = app.o c2.o video.o pipeline.o asset.o @@ -40,6 +40,9 @@ data/triangle.csh: intermediate/triangle.glsl | $(data_dir) sc data/22.tex: intermediate/22.bmp | $(data_dir) convtexture ./convtexture intermediate/22.bmp $(data_dir)/22.tex bc1 +data/kita.tex: intermediate/kita.bmp | $(data_dir) convtexture + ./convtexture intermediate/kita.bmp $(data_dir)/kita.tex bc1 + app.o: $(CXX) -c $(cflags) app.cpp -o app.o diff --git a/c2.cpp b/c2.cpp index 37e4876..3b46920 100644 --- a/c2.cpp +++ b/c2.cpp @@ -65,10 +65,12 @@ int main() { Device* dev; Shader* shader; Texture* texture; + Texture* texture2; Buffer_Id vbo; Sampler_Id clamped_linear; C2* app = App::create("c2"); void* per_frame; + int frame = 0; app->running = 1; init_arena( &video_arena, @@ -85,6 +87,7 @@ int main() { app->dev = dev; shader = (Shader*)assets.load("triangle.csh"); texture = (Texture*)assets.load("22.tex"); + texture2 = (Texture*)assets.load("kita.tex"); per_frame = heap_alloc( dev->heap, per_frame_memory_size @@ -110,7 +113,7 @@ int main() { pb.vertex_format(shader->vf); pb.texture( shader->descriptor_index("colour_texture"), - texture->id, + frame % 2? texture->id: texture2->id, clamped_linear ); Pipeline& pip = pb.build(); @@ -127,9 +130,9 @@ int main() { draw.instance_count = 1; dev->get_ctx().submit(draw, pip, pass); - /* dev->get_ctx().submit(*dev, pass);*/ r += 10; + frame++; dev->present(); app->end(); } diff --git a/intermediate/kita.bmp b/intermediate/kita.bmp new file mode 100644 index 0000000..6510f03 Binary files /dev/null and b/intermediate/kita.bmp differ diff --git a/pipeline.cpp b/pipeline.cpp index 1715930..527c1e7 100644 --- a/pipeline.cpp +++ b/pipeline.cpp @@ -1,8 +1,11 @@ #include "video.hpp" +#include + extern "C" { -#include "qstd/memory.h" -#include "qstd/plat.h" +#include "memory.h" +#include "plat.h" +#include "str.h" } Pipeline_Builder::Pipeline_Builder(Arena* arena): @@ -30,9 +33,7 @@ Render_Pass& Pipeline_Builder::build_rp() { void Pipeline_Builder::begin() { pip = (Pipeline*)arena_alloc(arena, sizeof *pip); - pip->vertex_format = 0; - pip->shader = 0; - pip->descriptors = 0; + memset(pip, 0, sizeof *pip); } void Pipeline_Builder::shader(Shader_Id s) { @@ -44,16 +45,15 @@ void Pipeline_Builder::texture( Texture_Id t, Sampler_Id s ) { - Texture_Descriptor* d = (Texture_Descriptor*)arena_alloc( - arena, - sizeof *d - ); + Descriptor* d; + Texture_Descriptor* td; + assert(pip->descriptor_count < pipeline_max_descriptors); + d = &pip->descriptors[pip->descriptor_count++]; + td = (Texture_Descriptor*)d->payload; d->slot = binding; d->type = Descriptor::Type::texture; - d->sampler = s; - d->texture = t; - d->next = pip->descriptors; - pip->descriptors = d; + td->sampler = s; + td->texture = t; } void Pipeline_Builder::vertex_format(Vertex_Format_Id vf) { @@ -61,8 +61,33 @@ void Pipeline_Builder::vertex_format(Vertex_Format_Id vf) { } Pipeline& Pipeline_Builder::build() { +#define h(n, v) \ + n = fnv1a64_2(n, (uint8_t*)&v, sizeof v) validate(); + pip->pipeline_hash = fnv1a64(0, 0); + h(pip->pipeline_hash, pip->vertex_format); + h(pip->pipeline_hash, pip->shader); + h(pip->pipeline_hash, pip->descriptor_count); + { + int i, e = pip->descriptor_count; + pip->descriptor_resource_hash = fnv1a64(0, 0); + for (i = 0; i < e; i++) { + Descriptor* d = &pip->descriptors[i]; + h(pip->pipeline_hash, d->type); + h(pip->pipeline_hash, d->slot); + h(pip->descriptor_resource_hash, d->type); + h(pip->descriptor_resource_hash, d->slot); + switch (d->type) { + case Descriptor::Type::texture: { + auto td = (Texture_Descriptor*)d->payload; + h(pip->descriptor_resource_hash, td->sampler); + h(pip->descriptor_resource_hash, td->texture); + } break; + } + } + } return *pip; +#undef h } void Pipeline_Builder::validate() { diff --git a/qstd/str.c b/qstd/str.c index 49ad396..2c74d4b 100644 --- a/qstd/str.c +++ b/qstd/str.c @@ -11,6 +11,15 @@ uint64_t fnv1a64(uint8_t* buf, size_t size) { return hash; } +uint64_t fnv1a64_2(uint64_t hash, uint8_t* buf, size_t size) { + size_t i; + for (i = 0; i < size; i++, buf++) { + hash ^= *buf; + hash *= 0x100000001b3; + } + return hash; +} + uint32_t hash_string(const char* s) { uint32_t h = 2166136261u; for (; *s; s++) { diff --git a/qstd/str.h b/qstd/str.h index 308e92d..011fc27 100644 --- a/qstd/str.h +++ b/qstd/str.h @@ -7,6 +7,7 @@ struct Arena; uint64_t fnv1a64(uint8_t* buf, size_t size); +uint64_t fnv1a64_2(uint64_t h, uint8_t* buf, size_t size); uint32_t hash_string(const char* s); int string_equal(const char* a, const char* b); diff --git a/video.cpp b/video.cpp index 1ce0cba..57b5c73 100644 --- a/video.cpp +++ b/video.cpp @@ -7,6 +7,7 @@ #define max_vertex_formats 64 #define max_rpos 64 #define max_pipelines 64 +#define max_descriptor_sets 64 #define max_shaders 32 #define max_samplers 16 @@ -515,24 +516,27 @@ struct Renderpass_Vk { }; struct Pso_Key { - Pipeline pip; + Pipeline pso; Render_Pass rpo; bool operator==(const Pso_Key& other) const { - int size = sizeof *this, i; - uint8_t* bba = (uint8_t*)this; - uint8_t* bbb = (uint8_t*)&other; - for (i = 0; i < size; i++, bba++, bbb++) - if (*bba != *bbb) return false; - return true; + return + rpo == other.rpo && + pso.desc_layout_eq(other.pso); + } +}; + +struct Dso_Key { + Pipeline pip; + + bool operator==(const Dso_Key& other) const { + return pip.desc_resources_eq(other.pip); } }; struct Pipeline_Vk { VkPipeline pip; VkPipelineLayout lay; - VkDescriptorPool dp; - VkDescriptorSet dset; VkDescriptorSetLayout dlay; int age; @@ -593,9 +597,25 @@ struct Pipeline_Vk { ); void init_descriptors( Device_Vk* dev, - const Pipeline& desc, - int count + const Pipeline& desc + ); + + void on_submit() { + age = 0; + } +}; + +struct Descriptor_Set_Vk { + VkDescriptorPool dp; + VkDescriptorSet dset; + int age; + + void init( + Device_Vk* dev, + const Pipeline_Vk& pip, + const Pipeline& desc ); + void destroy(Device_Vk* dev); void on_submit() { age = 0; @@ -636,8 +656,20 @@ struct Hash_Function template<> struct Hash_Function { - size_t operator()(const Pso_Key& rp) const { - return (size_t)fnv1a64((uint8_t*)&rp, sizeof rp); + size_t operator()(const Pso_Key& k) const { + return fnv1a64_2( + k.pso.pipeline_hash, + (uint8_t*)&k.rpo, + sizeof k.rpo + ); + } +}; + +template<> +struct Hash_Function +{ + size_t operator()(const Dso_Key& k) const { + return k.pip.descriptor_resource_hash; } }; @@ -764,6 +796,7 @@ struct Device_Vk : public Device { Hash_Map rpo_cache; Hash_Map pso_cache; + Hash_Map dso_cache; Terminator* terminators; uint32_t terminator_index; @@ -793,6 +826,14 @@ struct Device_Vk : public Device { Renderpass_Vk& get_rpo(const Render_Pass& rp); Pipeline_Vk& create_pso(const Pso_Key& pip); Pipeline_Vk& get_pso(const Pso_Key& pop); + Descriptor_Set_Vk& create_dso( + const Pipeline_Vk& pip, + const Dso_Key& k + ); + Descriptor_Set_Vk& get_dso( + const Pipeline_Vk& pip, + const Dso_Key& k + ); void collect_garbage(); void queue_destroy(Late_Terminated* obj); @@ -1113,6 +1154,7 @@ void Device_Vk::init_internal() { sampler_count = 1; rpo_cache.init(); pso_cache.init(); + dso_cache.init(); shader_loader.init(this); texture_loader.init(this); register_asset_loader("CSH2", &shader_loader); @@ -1162,6 +1204,8 @@ void Device_Vk::deinit_internal() { i.second.destroy(this); for (auto i : pso_cache) i.second.destroy(this); + for (auto i : dso_cache) + i.second.destroy(this); for (i = 0; i < max_contexts; i++) { auto& context = contexts[i]; if (context.state & context_state_init) @@ -1226,6 +1270,26 @@ Pipeline_Vk& Device_Vk::get_pso(const Pso_Key& pip) { return *pso; } +Descriptor_Set_Vk& Device_Vk::create_dso( + const Pipeline_Vk& pip, + const Dso_Key& k +) { + Descriptor_Set_Vk dso; + dso.age = 0; + dso.init(this, pip, k.pip); + return dso_cache.set(k, dso); +} + +Descriptor_Set_Vk& Device_Vk::get_dso( + const Pipeline_Vk& pip, + const Dso_Key& k +) { + Descriptor_Set_Vk* dso = dso_cache.get(k); + if (!dso) + return create_dso(pip, k); + return *dso; +} + void Renderpass_Vk::destroy(Device_Vk* dev) { vkDestroyRenderPass(dev->dev, rpo, &dev->ac); vkDestroyFramebuffer(dev->dev, fbo, &dev->ac); @@ -1249,6 +1313,14 @@ void Device_Vk::collect_garbage() { pso_cache.remove(i.first); } } + for (const auto& i: dso_cache) { + auto& dso = i.second; + dso.age++; + if (dso.age > max_age) { + dso.destroy(this); + dso_cache.remove(i.first); + } + } } void Device_Vk::queue_destroy(Late_Terminated* obj) { @@ -1665,6 +1737,7 @@ void Context::submit( Vertex_Buffer_Binding* binding; Pso_Key pso_key = { p, rp }; Pipeline_Vk& pso = dev->get_pso(pso_key); + Descriptor_Set_Vk& dso = dev->get_dso(pso, *(Dso_Key*)&p); auto& rpo = ctx->begin_rp(rp); vkCmdBindPipeline( ctx->cb, @@ -1677,7 +1750,7 @@ void Context::submit( pso.lay, 0, 1, - &pso.dset, + &dso.dset, 0, 0 ); @@ -1695,6 +1768,7 @@ void Context::submit( ); ctx->end_rp(rpo); pso.on_submit(); + dso.on_submit(); } void Context::submit( @@ -2169,51 +2243,13 @@ void Pipeline_Vk::init_blending( void Pipeline_Vk::init_descriptors( Device_Vk* dev, - const Pipeline& desc, - int count + const Pipeline& desc ) { - int sampler_count = 0, cbuffer_count = 0; - const Descriptor* d = desc.descriptors; + const Descriptor* sdescs = desc.descriptors; Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader); - VkDescriptorPoolSize sizes[4]; VkResult r; - int size_count = 0, i; - for (; d; d = d->next) { - switch (d->type) { - case Descriptor::Type::texture: - sampler_count++; - break; - default: - assert(0); - break; - } - } - if (sampler_count) { - int idx = size_count++; - sizes[idx] = { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = (uint32_t)sampler_count - }; - } - if (cbuffer_count) { - int idx = size_count++; - sizes[idx] = { - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = (uint32_t)cbuffer_count - }; - } - { - VkDescriptorPoolCreateInfo di{}; - di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - di.poolSizeCount = (uint32_t)size_count; - di.pPoolSizes = sizes; - di.maxSets = (uint32_t)count; - r = vkCreateDescriptorPool(dev->dev, &di, &dev->ac, &dp); - if (r != VK_SUCCESS) { - print_err("Failed to create a descriptor pool.\n"); - pbreak(r); - } - } + int count = desc.descriptor_count; + int i; { VkDescriptorSetLayoutBinding* descs = (VkDescriptorSetLayoutBinding*)heap_alloc( @@ -2221,13 +2257,12 @@ void Pipeline_Vk::init_descriptors( count * sizeof *descs ); VkDescriptorSetLayoutCreateInfo di{}; - VkDescriptorSetAllocateInfo da{}; memset(descs, 0, count * sizeof *descs); - Descriptor* src = desc.descriptors; - for (i = count - 1; i >= 0; i--) { + for (i = 0; i < count; i++) { int j, stage; auto& dst = descs[i]; - switch (src->type) { + auto& src = sdescs[i]; + switch (src.type) { case Descriptor::Type::texture: dst.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; break; @@ -2235,16 +2270,15 @@ void Pipeline_Vk::init_descriptors( assert(0); break; } - dst.binding = src->slot; + dst.binding = src.slot; dst.descriptorCount = 1; dst.stageFlags = 0; - stage = shader.descriptor_stage(src->slot); + stage = shader.descriptor_stage(src.slot); for (j = 0; j < shader_type_count; j++) { if (stage & (1 << j)) { dst.stageFlags |= Shader_Vk::stage((Shader_Type)j); } } - src = src->next; } di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; di.bindingCount = (uint32_t)count; @@ -2259,47 +2293,6 @@ void Pipeline_Vk::init_descriptors( print_err("Failed to create descriptor set layout.\n"); pbreak(r); } - da.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - da.descriptorPool = dp; - da.descriptorSetCount = 1; - da.pSetLayouts = &dlay; - r = vkAllocateDescriptorSets( - dev->dev, - &da, - &dset - ); - /* todo this should be refactored to allow - * swapping the resources without recreating pipelines */ - src = desc.descriptors; - for (i = 0; i < count; i++) { - VkDescriptorImageInfo img{}; - VkWriteDescriptorSet wd{}; - wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - wd.dstSet = dset; - wd.dstBinding = src->slot; - wd.dstArrayElement = 0; - wd.descriptorCount = 1; - - switch (src->type) { - case Descriptor::Type::texture: { - Texture_Descriptor* td = (Texture_Descriptor*)src; - assert(td->texture); - assert(td->sampler); - Texture_Vk& t = *(Texture_Vk*)&dev->get_texture(td->texture); - Sampler_Vk& s = *(Sampler_Vk*)&dev->samplers[td->sampler]; - img.imageView = t.view; - img.sampler = s.sampler; - img.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - wd.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - wd.pImageInfo = &img; - } break; - default: - assert(0); - break; - } - vkUpdateDescriptorSets(dev->dev, 1, &wd, 0, 0); - src = src->next; - } heap_free(dev->heap, descs); } } @@ -2311,10 +2304,9 @@ void Pipeline_Vk::init_layout( VkResult r; VkPipelineLayoutCreateInfo li{}; (void)desc; - int desc_count = desc.count_descriptors(); - int set_count = desc_count? 1: 0; + int set_count = desc.descriptor_count? 1: 0; if (set_count) { - init_descriptors(dev, desc, desc_count); + init_descriptors(dev, desc); } li.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; li.setLayoutCount = set_count; @@ -2336,7 +2328,7 @@ void Pipeline_Vk::init(Device_Vk* dev, const Pso_Key& key) { char buffer[1024]; Arena scope; VkResult r; - const auto& desc = key.pip; + const auto& desc = key.pso; VkGraphicsPipelineCreateInfo info{}; init_arena(&scope, buffer, sizeof buffer); init_layout(dev, desc); @@ -2368,12 +2360,104 @@ void Pipeline_Vk::init(Device_Vk* dev, const Pso_Key& key) { } void Pipeline_Vk::destroy(Device_Vk* dev) { - vkDestroyDescriptorPool(dev->dev, dp, &dev->ac); vkDestroyDescriptorSetLayout(dev->dev, dlay, &dev->ac); vkDestroyPipelineLayout(dev->dev, lay, &dev->ac); vkDestroyPipeline(dev->dev, pip, &dev->ac); } +void Descriptor_Set_Vk::init( + Device_Vk* dev, + const Pipeline_Vk& pip, + const Pipeline& desc +) { + int count = desc.descriptor_count, i; + int sampler_count = 0, cbuffer_count = 0; + int size_count = 0; + VkDescriptorSetAllocateInfo da{}; + VkDescriptorPoolSize sizes[4]; + VkResult r; + for (i = 0; i < count; i++) { + auto& src = desc.descriptors[i]; + switch (src.type) { + case Descriptor::Type::texture: + sampler_count++; + break; + default: + assert(0); + break; + } + } + if (sampler_count) { + int idx = size_count++; + sizes[idx] = { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = (uint32_t)sampler_count + }; + } + if (cbuffer_count) { + int idx = size_count++; + sizes[idx] = { + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = (uint32_t)cbuffer_count + }; + } + { + VkDescriptorPoolCreateInfo di{}; + di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + di.poolSizeCount = (uint32_t)size_count; + di.pPoolSizes = sizes; + di.maxSets = (uint32_t)count; + r = vkCreateDescriptorPool(dev->dev, &di, &dev->ac, &dp); + if (r != VK_SUCCESS) { + print_err("Failed to create a descriptor pool.\n"); + pbreak(r); + } + } + da.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + da.descriptorPool = dp; + da.descriptorSetCount = 1; + da.pSetLayouts = &pip.dlay; + r = vkAllocateDescriptorSets( + dev->dev, + &da, + &dset + ); + if (r != VK_SUCCESS) { + print_err("Failed to allocate descriptor set.\n"); + pbreak(r); + } + for (i = 0; i < count; i++) { + VkDescriptorImageInfo img{}; + VkWriteDescriptorSet wd{}; + auto& src = desc.descriptors[i]; + wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + wd.dstSet = dset; + wd.dstBinding = src.slot; + wd.dstArrayElement = 0; + wd.descriptorCount = 1; + + switch (src.type) { + case Descriptor::Type::texture: { + Texture_Descriptor* td = (Texture_Descriptor*)src.payload; + assert(td->texture); + assert(td->sampler); + Texture_Vk& t = *(Texture_Vk*)&dev->get_texture(td->texture); + Sampler_Vk& s = *(Sampler_Vk*)&dev->samplers[td->sampler]; + img.imageView = t.view; + img.sampler = s.sampler; + img.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + wd.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + wd.pImageInfo = &img; + } break; + } + vkUpdateDescriptorSets(dev->dev, 1, &wd, 0, 0); + } +} + +void Descriptor_Set_Vk::destroy(Device_Vk* dev) { + vkDestroyDescriptorPool(dev->dev, dp, &dev->ac); +} + int Vertex_Format_Vk::svariable_type_size(SVariable_Type type) { switch (type) { case svariable_type_float: return 4; diff --git a/video.hpp b/video.hpp index dc0574e..658e37a 100644 --- a/video.hpp +++ b/video.hpp @@ -43,30 +43,68 @@ struct Sampler_Id : public Primitive_Id { using Primitive_Id::Primitive_Id; }; +#define descriptor_payload_size 8 + struct Descriptor { enum class Type { texture } type; int slot; - Descriptor* next; + uint8_t payload[descriptor_payload_size]; }; -struct Texture_Descriptor : public Descriptor { - Sampler_Id sampler; +struct Texture_Descriptor { Texture_Id texture; + Sampler_Id sampler; }; +static_assert(sizeof(Texture_Descriptor) <= descriptor_payload_size); + +#define pipeline_max_descriptors 16 + struct Pipeline { + uint64_t pipeline_hash; + uint64_t descriptor_resource_hash; Vertex_Format_Id vertex_format; Shader_Id shader; - Descriptor* descriptors; - - int count_descriptors() const { - const Descriptor* d = descriptors; - int c = 0; - for (; d; d = d->next) - c++; - return c; + Descriptor descriptors[pipeline_max_descriptors]; + int descriptor_count; + + bool desc_layout_eq(const Pipeline& other) const { + int i, c = descriptor_count; + if (other.pipeline_hash != pipeline_hash) + return false; + if (other.descriptor_count != c) + return false; + for (i = 0; i < c; i++) { + auto& a = descriptors[i]; + auto& b = other.descriptors[i]; + if (a.type != b.type) return false; + if (a.slot != b.slot) return false; + } + return true; + } + + bool desc_resources_eq(const Pipeline& other) const { + int i, c = descriptor_count; + if (other.descriptor_resource_hash != descriptor_resource_hash) + return false; + if (other.descriptor_count != c) return false; + for (i = 0; i < c; i++) { + auto& a = descriptors[i]; + auto& b = other.descriptors[i]; + if (a.type != b.type) return false; + if (a.slot != b.slot) return false; + switch (a.type) { + case Descriptor::Type::texture: { + Texture_Descriptor* ta = (Texture_Descriptor*)a.payload; + Texture_Descriptor* tb = (Texture_Descriptor*)b.payload; + if (ta->texture != tb->texture) return false; + if (ta->sampler != tb->sampler) return false; + } break; + } + } + return true; } }; -- cgit v1.2.3-54-g00ecf