diff options
Diffstat (limited to 'video.cpp')
-rw-r--r-- | video.cpp | 1189 |
1 files changed, 1120 insertions, 69 deletions
@@ -1,8 +1,13 @@ #include "app.hpp" #include "video.hpp" -#define device_heap_size (1024 * 1024 * 4) +#define device_heap_size (1024 * 1024 * 8) #define max_textures 1024 +#define max_buffers 1024 +#define max_vertex_formats 64 +#define max_rpos 64 +#define max_pipelines 64 +#define max_shaders 32 extern "C" { #include "memory.h" @@ -33,42 +38,140 @@ void app_destroy_vk_surface( ); } -template <typename ID, typename T, int size> -struct ID_Map -{ - T storage[size]; - ID keys[size]; +struct Device_Vk; + +template <typename T> +struct Hash_Function {}; + +template <typename Key, typename Value, int size> +struct Hash_Map { + enum { + flags_tombstone = 1 << 0, + flags_null = 1 << 1 + }; + + Key keys[size]; + Value values[size]; + uint8_t flags[size]; void init() { int i; + for (i = 0; i < size; i++) flags[i] = flags_null; + } + + int find(const Key& to_find) { + int tombstone = -1, i; + int bucket = (int)(Hash_Function<Key>{}(to_find) % (size_t)size); for (i = 0; i < size; i++) { - keys[i] = 0; + Key& k = keys[bucket]; + uint8_t flag = flags[bucket]; + if (flag & flags_null) { + if (flag & flags_tombstone) { + if (tombstone < 0) tombstone = bucket; + } else return tombstone >= 0? tombstone: bucket; + } else if (k == to_find) return bucket; + bucket = (bucket + 1) % size; } + if (tombstone >= 0) return tombstone; + return -1; } - std::pair<T*, uint32_t> bucket(ID id) { - int index = (int)id % size, i; - for (i = 0; i < size; i++) { - ID key = keys[index]; - if (!key || key == id) return { &storage[index], index }; - index = (index + 1) % size; + Value& set(const Key& k, const Value& v) { + int bucket = find(k); + assert(bucket >= 0); /* full */ + flags[bucket] = 0; + keys[bucket] = k; + values[bucket] = v; + return values[bucket]; + } + + Value* get(const Key& k) { + int bucket = find(k); + if (bucket < 0 || flags[bucket] & flags_null) return 0; + return &values[bucket]; + } + + Value& operator[](const Key& k) { + int bucket = find(k); + assert(bucket >= 0); + return values[bucket]; + } + + void remove(const Key& k) { + int bucket = find(k); + assert(bucket >= 0); + flags[bucket] = flags_null | flags_tombstone; + } + + int has(const Key& k) { + int bucket = find(k); + return bucket >= 0 && ~flags[bucket] & flags_null; + } + + template <typename Table> + struct iterator { + Table* table; + int bucket; + + void init_begin(Table* t) { + bucket = 0; + table = t; + while ( + bucket < size && + table->flags[bucket] & flags_null + ) bucket++; + } + void init_end(Table* t) { + bucket = size; + table = t; + } + bool equals(const iterator<Table>& other) { + return bucket == other.bucket && table == other.table; + } + bool operator==(const iterator<Table>& other) { + return equals(other); } - return { 0, 0 }; + bool operator!=(const iterator<Table>& other) { + return !equals(other); + } + iterator<Table> operator++() { + bucket++; + while ( + bucket < size && + table->flags[bucket] & flags_null + ) bucket++; + return *this; + } + std::pair<Key&, Value&> operator*() { + return { table->keys[bucket], table->values[bucket] }; + } + std::pair<const Key&, const Value&> operator*() const { + return { table->keys[bucket], table->values[bucket] }; + } + }; + + iterator<Hash_Map<Key, Value, size>> begin() { + iterator<Hash_Map<Key, Value, size>> r; + r.init_begin(this); + return r; } - T& set(ID id, const T& v) { - auto [b, index] = bucket(id); - assert(b != 0); - assert(!keys[index]); - keys[index] = id; - *b = v; - return *b; + iterator<Hash_Map<Key, Value, size>> end() { + iterator<Hash_Map<Key, Value, size>> r; + r.init_end(this); + return r; } - T& operator[](ID id) { - T* b = bucket(id).first; - assert(b != 0); - return *b; + iterator<const Hash_Map<Key, Value, size>> begin() const { + iterator<const Hash_Map<Key, Value, size>> r; + r.init_begin(this); + return r; + } + + iterator<const Hash_Map<Key, Value, size>> end() const { + iterator<const Hash_Map<Key, Value, size>> r; + r.init_end(this); + return r; } }; @@ -191,7 +294,6 @@ static void deinit_swap_cap( if (cap->pms) heap_free(d->heap, cap->pms); } -struct Device_Vk; struct Swapchain { VkSwapchainKHR swapchain; Texture_Id* textures; @@ -215,8 +317,37 @@ enum { }; struct Shader_Vk : public Shader { + struct Attribute { + char name[28]; + SVariable_Type type; + int index; + }; + + struct Binding { + char name[24]; + SBinding_Rate rate; + int attr_count; + int index; + int* attributes; + }; + + struct Vertex_Format { + Binding* bindings; + Attribute* attributes; + int attr_count; + int binding_count; + + bool init(Device_Vk* dev, FILE* f); + void destroy(Device_Vk* dev); + + int find_binding(const char* name); + int find_attribute(const char* name); + }; + SProgram_Type type; VkShaderModule modules[shader_type_count]; + char entrypoints[shader_type_count][24]; + Vertex_Format vfd; bool init(Device_Vk* dev, FILE* f); bool init_module( @@ -225,9 +356,26 @@ struct Shader_Vk : public Shader { char* buf, int size ); + bool init_vertex_format( + Device_Vk* dev, + FILE* f + ); void destroy_internal(Device_Vk* dev); + + static VkShaderStageFlagBits stage(Shader_Type type) { + switch (type) { + case shader_type_vertex: + return VK_SHADER_STAGE_VERTEX_BIT; + case shader_type_fragment: + return VK_SHADER_STAGE_FRAGMENT_BIT; + default: + assert(0); + return (VkShaderStageFlagBits)0; + } + } }; +struct Renderpass_Vk; struct Context_Vk : public Context { int state; VkCommandBuffer cb; @@ -243,6 +391,9 @@ struct Context_Vk : public Context { Context_Vk& acquire(Device_Vk* dev); void release(); void destroy(Device_Vk* dev); + + Renderpass_Vk& begin_rp(Device& d, const Render_Pass& rp); + void end_rp(Renderpass_Vk& rpo); }; struct Texture_Vk : public Texture { @@ -250,6 +401,43 @@ struct Texture_Vk : public Texture { VkImageView view; }; +struct Buffer_Vk : public Buffer { + VkBuffer buf; + VkDeviceMemory memory; + VkDeviceSize size; + int flags; + + void init(Device_Vk* dev, int flags, VkDeviceSize size); + void destroy(Device_Vk* dev); + + static VkBufferUsageFlags get_usage(int flags) { + VkBufferUsageFlags r = 0; + if (flags & Buffer_Flags::index_buffer) + r |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + if (flags & Buffer_Flags::vertex_buffer) + r |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (flags & Buffer_Flags::uniform_buffer) + r |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + if (flags & Buffer_Flags::storage_buffer) + r |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + if (flags & Buffer_Flags::copy_src) + r |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + if (flags & Buffer_Flags::copy_dst) + r |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + return r; + } + + static VkMemoryPropertyFlags get_memory_flags(int flags) { + VkMemoryPropertyFlags r = 0; + r |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + if (flags & Buffer_Flags::cpu_read) + r |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if (flags & Buffer_Flags::cpu_readwrite) + r |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + return r; + } +}; + struct Renderpass_Vk { VkRenderPass rpo; VkFramebuffer fbo; @@ -265,11 +453,146 @@ struct Renderpass_Vk { void destroy(Device_Vk* dev); }; +struct Pso_Key { + Pipeline pip; + Render_Pass rpo; + + bool operator==(const Pso_Key& other) const { + int size = sizeof *this, i; + uint8_t* bba = (uint8_t*)this; + uint8_t* bbb = (uint8_t*)&other; + for (i = 0; i < size; i++, bba++, bbb++) + if (*bba != *bbb) return false; + return true; + } +}; + +struct Pipeline_Vk { + VkPipeline pip; + VkPipelineLayout lay; + int age; + + void init(Device_Vk* dev, const Pso_Key& desc); + void destroy(Device_Vk* dev); + + void init_stages( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc + ); + void init_vertex_input( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc + ); + void init_input_assembly( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc + ); + void init_viewport( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Render_Pass& desc + ); + void init_rasterisation( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc + ); + void init_msaa( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc + ); + void init_depthstencil( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc + ); + void init_blending( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc + ); + void init_layout( + Device_Vk* dev, + const Pipeline& desc + ); + + void on_submit() { + age = 0; + } +}; + +struct Vertex_Format_Vk { + VkVertexInputBindingDescription* bindings; + int binding_count; + VkVertexInputAttributeDescription* attrs; + int attr_count; + + void from_shader(Device_Vk* dev, Shader_Vk& s); + void destroy(Device_Vk* dev); + static int svariable_type_size(SVariable_Type type); + static VkFormat format_from_svar_type(SVariable_Type type); +}; + template<> -struct std::hash<Render_Pass> +struct Hash_Function<Render_Pass> { size_t operator()(const Render_Pass& rp) const { - return fnv1a64((uint8_t*)&rp, sizeof rp); + return (size_t)fnv1a64((uint8_t*)&rp, sizeof rp); + } +}; + +template<> +struct Hash_Function<Pso_Key> +{ + size_t operator()(const Pso_Key& rp) const { + return (size_t)fnv1a64((uint8_t*)&rp, sizeof rp); + } +}; + +template<> +struct Hash_Function<Texture_Id> { + size_t operator()(Texture_Id id) const { + return id.index; + } +}; + +template<> +struct Hash_Function<Buffer_Id> { + size_t operator()(Buffer_Id id) const { + return id.index; + } +}; + +template<> +struct Hash_Function<Shader_Id> { + size_t operator()(Shader_Id id) const { + return id.index; + } +}; + +template<> +struct Hash_Function<Vertex_Format_Id> { + size_t operator()(Vertex_Format_Id id) const { + return id.index; + } +}; + +template<> +struct std::hash<Render_Pass> { + size_t operator()(const Render_Pass& rp) const { + return (size_t)fnv1a64((uint8_t*)&rp, sizeof rp); } }; @@ -282,6 +605,7 @@ struct Device_Vk : public Device { uint32_t backbuffer_index; Texture_Id backbuffer_id; Swap_Cap swap_cap; + VkPhysicalDeviceMemoryProperties mem_props; int queue_index; VkQueue queue; Swapchain swapchain; @@ -291,16 +615,32 @@ struct Device_Vk : public Device { VkDebugUtilsMessengerEXT msg; #endif - ID_Map<Texture_Id, Texture_Vk, max_textures> textures; - Texture_Id texture_count; - - std::unordered_map<Render_Pass, Renderpass_Vk> rpo_cache; + Hash_Map<Texture_Id, Texture_Vk, max_textures> textures; + Hash_Map<Buffer_Id, Buffer_Vk, max_buffers> buffers; + Hash_Map< + Vertex_Format_Id, + Vertex_Format_Vk, + max_vertex_formats + > vertex_formats; + Hash_Map<Shader_Id, Shader_Vk, max_shaders> shaders; + uint32_t texture_count; + uint32_t buffer_count; + uint32_t vertex_format_count; + uint32_t shader_count; + + Hash_Map<Render_Pass, Renderpass_Vk, max_rpos> rpo_cache; + Hash_Map<Pso_Key, Pipeline_Vk, max_pipelines> pso_cache; Texture_Id alloc_texture( VkImage img, VkImageView view, const Texture& copy ); + Buffer_Id alloc_buffer(); + Vertex_Format_Id alloc_vf(); + Vertex_Format_Id create_vf(Shader_Vk& shader); + void destroy_vf(Vertex_Format_Id id); + Shader_Id alloc_shader(); void init_internal(); void deinit_internal(); @@ -317,8 +657,15 @@ struct Device_Vk : public Device { Renderpass_Vk& create_rpo(const Render_Pass& rp); Renderpass_Vk& get_rpo(const Render_Pass& rp); + Pipeline_Vk& create_pso(const Pso_Key& pip); + Pipeline_Vk& get_pso(const Pso_Key& pop); void collect_garbage(); + + int find_memory_type( + uint32_t filter, + VkMemoryPropertyFlags flags + ); }; #ifdef DEBUG @@ -585,6 +932,7 @@ void Device_Vk::create_dev(Swap_Cap* swap_cap) { VkPhysicalDeviceFeatures pdf{}; VkResult r; phys_dev = get_phys_dev(this, swap_cap); + vkGetPhysicalDeviceMemoryProperties(phys_dev, &mem_props); qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; qi.queueFamilyIndex = queue_index; qi.queueCount = 1; @@ -615,6 +963,14 @@ void Device_Vk::init_internal() { gladLoaderLoadVulkan(0, 0, 0); textures.init(); texture_count = 1; + buffers.init(); + buffer_count = 1; + vertex_formats.init(); + vertex_format_count = 1; + shaders.init(); + shader_count = 1; + rpo_cache.init(); + pso_cache.init(); find_exts(exts, ext_count); init_ac(); create_inst(exts, ext_count); @@ -637,7 +993,9 @@ void Device_Vk::deinit_internal() { swapchain.destroy(this); deinit_swap_cap(this, &swap_cap); app_destroy_vk_surface(app, inst, surf); - for (auto& i : rpo_cache) + for (auto i : rpo_cache) + i.second.destroy(this); + for (auto i : pso_cache) i.second.destroy(this); for (i = 0; i < max_contexts; i++) { auto& context = contexts[i]; @@ -675,15 +1033,28 @@ Renderpass_Vk& Device_Vk::create_rpo(const Render_Pass& rp) { rpo.init_fb(this, rp); rpo.age = 0; rpo.clear = clear; - rpo_cache[rp] = rpo; - return rpo_cache[rp]; + return rpo_cache.set(rp, rpo); } Renderpass_Vk& Device_Vk::get_rpo(const Render_Pass& rp) { - auto rpo_index = rpo_cache.find(rp); - if (rpo_index == rpo_cache.end()) + Renderpass_Vk* rpo = rpo_cache.get(rp); + if (!rpo) return create_rpo(rp); - return rpo_index->second; + return *rpo; +} + +Pipeline_Vk& Device_Vk::create_pso(const Pso_Key& pip) { + Pipeline_Vk pso; + pso.age = 0; + pso.init(this, pip); + return pso_cache.set(pip, pso); +} + +Pipeline_Vk& Device_Vk::get_pso(const Pso_Key& pip) { + Pipeline_Vk* pso = pso_cache.get(pip); + if (!pso) + return create_pso(pip); + return *pso; } void Renderpass_Vk::destroy(Device_Vk* dev) { @@ -692,14 +1063,37 @@ void Renderpass_Vk::destroy(Device_Vk* dev) { } void Device_Vk::collect_garbage() { - for (auto i = rpo_cache.begin(); i != rpo_cache.end();) { - auto& rp = i->second; + for (const auto& i: rpo_cache) { + auto& rp = i.second; rp.age++; if (rp.age > 3) { rp.destroy(this); - i = rpo_cache.erase(i); - } else ++i; + rpo_cache.remove(i.first); + } + } + for (const auto& i: pso_cache) { + auto& pip = i.second; + pip.age++; + if (pip.age > 3) { + pip.destroy(this); + pso_cache.remove(i.first); + } + } +} + +int Device_Vk::find_memory_type( + uint32_t filter, + VkMemoryPropertyFlags flags +) { + int i, e = mem_props.memoryTypeCount; + auto* types = mem_props.memoryTypes; + for (i = 0; i < e; i++) { + if ( + (filter & (1 << i)) && + (types[i].propertyFlags & flags) == flags + ) return i; } + return -1; } void Renderpass_Vk::init_rp( @@ -711,6 +1105,7 @@ void Renderpass_Vk::init_rp( VkAttachmentReference ar{}; VkSubpassDescription sd{}; VkResult r; + (void)rp; ad.format = dev->swapchain.format.format; ad.samples = VK_SAMPLE_COUNT_1_BIT; @@ -856,7 +1251,6 @@ void Swapchain::initr(const App& app, Device_Vk* dev) { pbreak(r); } } - textures = (Texture_Id*)heap_alloc(dev->heap, sizeof *textures * image_count); get_images(dev); } @@ -870,16 +1264,22 @@ void Swapchain::recreate(const App& app, Device_Vk* dev) { void Swapchain::get_images(Device_Vk* dev) { unsigned count; int i; - VkImage* images = (VkImage*)heap_alloc( - dev->heap, - sizeof *images * image_count - ); + VkImage* images; Texture info{}; info.w = size.width; info.h = size.height; info.alias = true; + vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, 0); + image_count = count; + images = (VkImage*)heap_alloc( + dev->heap, + sizeof *images * image_count + ); + textures = (Texture_Id*)heap_alloc( + dev->heap, + sizeof *textures * image_count + ); vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, images); - assert(count == (unsigned)image_count); for (i = 0; i < image_count; i++) { VkImageView view = make_view(dev, images[i], @@ -991,9 +1391,8 @@ Texture_Id Device_Vk::alloc_texture( VkImageView view, const Texture& copy ) { - Texture_Id id = texture_count++; - Texture_Vk tex; - assert(id < max_textures); + Texture_Vk tex{}; + Texture_Id id(texture_count++); memcpy(&tex, ©, sizeof(Texture)); tex.image = img; tex.view = view; @@ -1001,12 +1400,46 @@ Texture_Id Device_Vk::alloc_texture( return id; } +Buffer_Id Device_Vk::alloc_buffer() { + Buffer_Vk buf{}; + Buffer_Id id(buffer_count++); + buffers.set(id, buf); + return id; +} + +Vertex_Format_Id Device_Vk::alloc_vf() { + Vertex_Format_Vk vf{}; + Vertex_Format_Id id(vertex_format_count++); + vertex_formats.set(id, vf); + return id; +} +Vertex_Format_Id Device_Vk::create_vf( + Shader_Vk& shader +) { + Vertex_Format_Id id = alloc_vf(); + vertex_formats[id].from_shader(this, shader); + return id; +} +void Device_Vk::destroy_vf(Vertex_Format_Id id) { + Vertex_Format_Vk& vf = vertex_formats[id]; + vf.destroy(this); +} + +Shader_Id Device_Vk::alloc_shader() { + Shader_Vk buf{}; + Shader_Id id(shader_count++); + assert(id.index < max_shaders); + shaders.set(id, buf); + return id; +} + void Device::destroy_texture(Texture_Id id) { Device_Vk* dev = (Device_Vk*)this; Texture_Vk& tex = dev->textures[id]; if (!tex.alias) vkDestroyImage(dev->dev, tex.image, &dev->ac); vkDestroyImageView(dev->dev, tex.view, &dev->ac); + dev->textures.remove(id); } void Context::wait(Device& d) { @@ -1028,12 +1461,30 @@ void Context::submit( const Render_Pass& rp ) { Device_Vk* dev = (Device_Vk*)&d; - (void)draw; - (void)p; - (void)rp; - (void)dev; - assert(0); - /* todo */ + Context_Vk* ctx = (Context_Vk*)this; + Vertex_Buffer_Binding* binding; + Pso_Key pso_key = { p, rp }; + Pipeline_Vk& pso = dev->get_pso(pso_key); + auto& rpo = ctx->begin_rp(d, rp); + vkCmdBindPipeline( + ctx->cb, + VK_PIPELINE_BIND_POINT_GRAPHICS, + pso.pip + ); + for (binding = draw.verts; binding->id; binding++) { + VkBuffer buf = ((Buffer_Vk*)&dev->get_buffer(binding->id))->buf; + VkDeviceSize offset = (VkDeviceSize)binding->offset; + vkCmdBindVertexBuffers(ctx->cb, 0, 1, &buf, &offset); + } + vkCmdDraw( + ctx->cb, + draw.vertex_count, + draw.instance_count, + draw.first_vertex, + draw.first_instance + ); + ctx->end_rp(rpo); + pso.on_submit(); } void Context::submit( @@ -1053,9 +1504,8 @@ void Context::submit( /* todo */ } -void Context::submit(Device& d, const Render_Pass& rp) { +Renderpass_Vk& Context_Vk::begin_rp(Device& d, const Render_Pass& rp) { Device_Vk* dev = (Device_Vk*)&d; - Context_Vk* ctx = (Context_Vk*)this; Renderpass_Vk& rpo = dev->get_rpo(rp); VkRenderPassBeginInfo rpbi{}; rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; @@ -1065,14 +1515,24 @@ void Context::submit(Device& d, const Render_Pass& rp) { rpbi.clearValueCount = 1; rpbi.pClearValues = &rpo.clear; vkCmdBeginRenderPass( - ctx->cb, + cb, &rpbi, VK_SUBPASS_CONTENTS_INLINE ); - vkCmdEndRenderPass(ctx->cb); + return rpo; +} + +void Context_Vk::end_rp(Renderpass_Vk& rpo) { + vkCmdEndRenderPass(cb); rpo.on_submit(); } +void Context::submit(Device& d, const Render_Pass& rp) { + Context_Vk* ctx = (Context_Vk*)this; + auto& rpo = ctx->begin_rp(d, rp); + ctx->end_rp(rpo); +} + void Context_Vk::init_pool(Device_Vk* dev) { VkCommandPoolCreateInfo pi{}; VkResult r; @@ -1171,10 +1631,362 @@ Context& Device::get_ctx() { return *vk->current_ctx; } +void Pipeline_Vk::init_stages( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc +) { + int count = 0, i; + Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader); + for (i = 0; i < shader_type_count; i++) { + if (shader.modules[i]) + count++; + } + VkPipelineShaderStageCreateInfo* sis = + (VkPipelineShaderStageCreateInfo*)arena_alloc( + &scope, + sizeof *sis * count + ); + memset(sis, 0, sizeof *sis * count); + for (i = 0, count = 0; i < shader_type_count; i++) { + if (shader.modules[i]) { + auto& si = sis[i]; + si.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + si.flags = 0; + si.stage = Shader_Vk::stage((Shader_Type)i); + si.module = shader.modules[i]; + si.pName = shader.entrypoints[i]; + count++; + } + } + info.stageCount = count; + info.pStages = sis; +} + +void Pipeline_Vk::init_vertex_input( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc +) { + Vertex_Format_Vk& vf = dev->vertex_formats[desc.vertex_format]; + VkPipelineVertexInputStateCreateInfo& vi = + *(VkPipelineVertexInputStateCreateInfo*)arena_alloc( + &scope, + sizeof vi + ); + memset(&vi, 0, sizeof vi); + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.vertexBindingDescriptionCount = vf.binding_count; + vi.pVertexBindingDescriptions = vf.bindings; + vi.vertexAttributeDescriptionCount = vf.attr_count; + vi.pVertexAttributeDescriptions = vf.attrs; + info.pVertexInputState = &vi; +} + +void Pipeline_Vk::init_input_assembly( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc +) { + VkPipelineInputAssemblyStateCreateInfo& ia = + *(VkPipelineInputAssemblyStateCreateInfo*)arena_alloc( + &scope, + sizeof ia + ); + (void)dev; + (void)desc; + (void)info; + memset(&ia, 0, sizeof ia); + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + info.pInputAssemblyState = &ia; +} + +void Pipeline_Vk::init_viewport( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Render_Pass& desc +) { + Texture& texture = dev->get_texture(desc.target); + VkPipelineViewportStateCreateInfo& vi = + *(VkPipelineViewportStateCreateInfo*)arena_alloc( + &scope, + sizeof vi + ); + VkRect2D& scissor = *(VkRect2D*)arena_alloc( + &scope, + sizeof scissor + ); + VkViewport& viewport = *(VkViewport*)arena_alloc( + &scope, + sizeof viewport + ); + memset(&vi, 0, sizeof vi); + memset(&scissor, 0, sizeof scissor); + memset(&viewport, 0, sizeof viewport); + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = texture.w; + scissor.extent.height = texture.h; + viewport.x = 0; + viewport.y = 0; + viewport.width = texture.w; + viewport.height = texture.h; + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vi.viewportCount = 1; + vi.pViewports = &viewport; + vi.scissorCount = 1; + vi.pScissors = &scissor; + info.pViewportState = &vi; +} + +void Pipeline_Vk::init_rasterisation( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc +) { + VkPipelineRasterizationStateCreateInfo& ri = + *(VkPipelineRasterizationStateCreateInfo*)arena_alloc( + &scope, + sizeof ri + ); + (void)dev; + (void)desc; + memset(&ri, 0, sizeof ri); + ri.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + ri.depthClampEnable = VK_FALSE; + ri.rasterizerDiscardEnable = VK_FALSE; + ri.polygonMode = VK_POLYGON_MODE_FILL; + ri.lineWidth = 1.0f; + ri.cullMode = VK_CULL_MODE_NONE; + ri.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + ri.depthBiasEnable = VK_FALSE; + info.pRasterizationState = &ri; +} + +void Pipeline_Vk::init_msaa( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc +) { + VkPipelineMultisampleStateCreateInfo& mi = + *(VkPipelineMultisampleStateCreateInfo*)arena_alloc( + &scope, + sizeof mi + ); + (void)dev; + (void)desc; + memset(&mi, 0, sizeof mi); + mi.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + mi.sampleShadingEnable = VK_FALSE; + mi.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + info.pMultisampleState = &mi; +} + +void Pipeline_Vk::init_depthstencil( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc +) { + VkPipelineDepthStencilStateCreateInfo& ds = + *(VkPipelineDepthStencilStateCreateInfo*)arena_alloc( + &scope, + sizeof ds + ); + (void)dev; + (void)desc; + memset(&ds, 0, sizeof ds); + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.depthTestEnable = VK_FALSE; + ds.depthWriteEnable = VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + info.pDepthStencilState = &ds; +} + +void Pipeline_Vk::init_blending( + Arena& scope, + Device_Vk* dev, + VkGraphicsPipelineCreateInfo& info, + const Pipeline& desc +) { + VkPipelineColorBlendStateCreateInfo& bi = + *(VkPipelineColorBlendStateCreateInfo*)arena_alloc( + &scope, + sizeof bi + ); + VkPipelineColorBlendAttachmentState& abs = + *(VkPipelineColorBlendAttachmentState*)arena_alloc( + &scope, + sizeof abs + ); + (void)dev; + (void)desc; + memset(&bi, 0, sizeof bi); + memset(&abs, 0, sizeof abs); + abs.colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT; + abs.blendEnable = VK_FALSE; + bi.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + bi.flags = 0; + bi.logicOpEnable = VK_FALSE; + bi.attachmentCount = 1; + bi.pAttachments = &abs; + info.pColorBlendState = &bi; +} + +void Pipeline_Vk::init_layout( + Device_Vk* dev, + const Pipeline& desc +) { + VkResult r; + VkPipelineLayoutCreateInfo li{}; + (void)desc; + li.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + li.setLayoutCount = 0; + li.pushConstantRangeCount = 0; /* todo */ + r = vkCreatePipelineLayout( + dev->dev, + &li, + &dev->ac, + &lay + ); + if (r != VK_SUCCESS) { + print_err("Failed to create a pipeline layout.\n"); + pbreak(r); + } +} + +void Pipeline_Vk::init(Device_Vk* dev, const Pso_Key& key) { + char buffer[1024]; + Arena scope; + VkResult r; + const auto& desc = key.pip; + VkGraphicsPipelineCreateInfo info{}; + init_arena(&scope, buffer, sizeof buffer); + init_layout(dev, desc); + init_stages(scope, dev, info, desc); + init_vertex_input(scope, dev, info, desc); + init_input_assembly(scope, dev, info, desc); + init_viewport(scope, dev, info, key.rpo); + init_rasterisation(scope, dev, info, desc); + init_msaa(scope, dev, info, desc); + init_depthstencil(scope, dev, info, desc); + init_blending(scope, dev, info, desc); + info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + info.flags = 0; + info.renderPass = dev->get_rpo(key.rpo).rpo; + info.subpass = 0; + info.layout = lay; + r = vkCreateGraphicsPipelines( + dev->dev, + VK_NULL_HANDLE, + 1, + &info, + &dev->ac, + &pip + ); + if (r != VK_SUCCESS) { + print_err("Failed to create a pipeline.\n"); + pbreak(r); + } +} + +void Pipeline_Vk::destroy(Device_Vk* dev) { + vkDestroyPipeline(dev->dev, pip, &dev->ac); + vkDestroyPipelineLayout(dev->dev, lay, &dev->ac); +} + +int Vertex_Format_Vk::svariable_type_size(SVariable_Type type) { + switch (type) { + case svariable_type_float: return 4; + case svariable_type_vec2: return 8; + case svariable_type_vec3: return 12; + case svariable_type_vec4: return 16; + default: assert(0); /* todo */ + } + return 0; +} + +VkFormat Vertex_Format_Vk::format_from_svar_type( + SVariable_Type type +) { + switch (type) { + case svariable_type_float: + return VK_FORMAT_R32_SFLOAT; + case svariable_type_vec2: + return VK_FORMAT_R32G32_SFLOAT; + case svariable_type_vec3: + return VK_FORMAT_R32G32B32_SFLOAT; + case svariable_type_vec4: + return VK_FORMAT_R32G32B32A32_SFLOAT; + default: assert(0); /* todo */ + } + return (VkFormat)0; +} + +void Vertex_Format_Vk::from_shader( + Device_Vk* dev, + Shader_Vk& s +) { + Shader_Vk::Vertex_Format& vfd = s.vfd; + int i; + binding_count = vfd.binding_count; + attr_count = vfd.attr_count; + bindings = (VkVertexInputBindingDescription*)heap_alloc( + dev->heap, + vfd.binding_count * sizeof *bindings + ); + attrs = (VkVertexInputAttributeDescription*)heap_alloc( + dev->heap, + vfd.attr_count * sizeof *attrs + ); + memset(bindings, 0, vfd.binding_count * sizeof *bindings); + memset(attrs, 0, vfd.attr_count * sizeof *attrs); + for (i = 0; i < vfd.binding_count; i++) { + int j, stride = 0; + auto& src = vfd.bindings[i]; + auto& dst = bindings[src.index]; + for (j = 0; j < src.attr_count; j++) { + auto& src_attr = vfd.attributes[src.attributes[j]]; + auto& dst_attr = attrs[src.attributes[j]]; + dst_attr.binding = src.index; + dst_attr.location = j; + dst_attr.format = format_from_svar_type(src_attr.type); + dst_attr.offset = stride; + stride += svariable_type_size(src_attr.type); + } + dst.binding = src.index; + dst.stride = stride; + dst.inputRate = + src.rate == sbinding_rate_instance? + VK_VERTEX_INPUT_RATE_INSTANCE: + VK_VERTEX_INPUT_RATE_VERTEX; + } +} + +void Vertex_Format_Vk::destroy(Device_Vk* dev) { + heap_free(dev->heap, attrs); + heap_free(dev->heap, bindings); +} + /* todo proper asset manager which will load this stuff */ bool Shader_Vk::init(Device_Vk* dev, FILE* f) { char magic[4]; - int attr_count, target_count, i; + int binding_count, target_count, i; fread(magic, 4, 1, f); if ( magic[0] != 'C' || @@ -1183,9 +1995,18 @@ bool Shader_Vk::init(Device_Vk* dev, FILE* f) { magic[3] != '2' ) return false; fread(&type, 4, 1, f); - fread(&attr_count, 4, 1, f); + fread(&binding_count, 4, 1, f); fread(&target_count, 4, 1, f); - fseek(f, 32 * attr_count + 32 * target_count, SEEK_CUR); + vfd.binding_count = binding_count; + assert(binding_count); + if (!vfd.init(dev, f)) + return false; + vf = dev->create_vf(*this); + fseek( + f, + 32 * target_count, + SEEK_CUR + ); for (i = 0; i < shader_type_count; i++) { int o, s; fread(&o, 4, 1, f); @@ -1203,6 +2024,7 @@ bool Shader_Vk::init(Device_Vk* dev, FILE* f) { } else { modules[i] = VK_NULL_HANDLE; } + fread(entrypoints[i], 1, 24, f); } return true; } @@ -1224,28 +2046,257 @@ bool Shader_Vk::init_module( return r == VK_SUCCESS; } +int Shader_Vk::Vertex_Format::find_binding(const char* name) { + int i; + int bucket = (int)(hash_string(name) % binding_count); + for (i = 0; i < binding_count; i++) { + Binding& binding = bindings[bucket]; + if ( + !binding.name[0] || + !strcmp(binding.name, name) + ) return bucket; + bucket = (bucket + 1) % binding_count; + } + return -1; +} + +int Shader_Vk::Vertex_Format::find_attribute(const char* name) { + int i; + int bucket = (int)(hash_string(name) % attr_count); + for (i = 0; i < attr_count; i++) { + Attribute& attr = attributes[bucket]; + if ( + !attr.name[0] || + !strcmp(attr.name, name) + ) return bucket; + bucket = (bucket + 1) % attr_count; + } + return -1; +} + +bool Shader_Vk::Vertex_Format::init( + Device_Vk* dev, + FILE* f +) { + int i, attr_index = 0; + int start = ftell(f); + attr_count = 0; + for (i = 0; i < binding_count; i++) { + char name[24]; + int count, j; + SBinding_Rate rate; + fread(name, 1, sizeof name, f); + fread(&rate, 4, 1, f); + fread(&count, 4, 1, f); + for (j = 0; j < count; j++) { + char aname[28]; + SVariable_Type type; + fread(aname, 1, sizeof aname, f); + fread(&type, 4, 1, f); + attr_count++; + } + } + fseek(f, start, SEEK_SET); + bindings = (Binding*)heap_alloc( + dev->heap, + binding_count * sizeof *bindings + ); + attributes = (Attribute*)heap_alloc( + dev->heap, + attr_count * sizeof *attributes + ); + for (i = 0; i < binding_count; i++) + bindings[i].name[0] = 0; + for (i = 0; i < attr_count; i++) + attributes[i].name[0] = 0; + for (i = 0; i < binding_count; i++) { + Binding* binding; + char name[24]; + int count, j; + SBinding_Rate rate; + fread(name, 1, sizeof name, f); + fread(&rate, 4, 1, f); + fread(&count, 4, 1, f); + binding = &bindings[find_binding(name)]; + strcpy(binding->name, name); + binding->rate = rate; + binding->attr_count = count; + binding->attributes = (int*)heap_alloc( + dev->heap, + count * sizeof *binding->attributes + ); + binding->index = i; + for (j = 0; j < count; j++, attr_index++) { + int bucket; + Attribute* attr; + char aname[28]; + SVariable_Type type; + fread(aname, 1, sizeof aname, f); + fread(&type, 4, 1, f); + bucket = find_attribute(aname); + binding->attributes[j] = bucket; + attr = &attributes[bucket]; + strcpy(attr->name, aname); + attr->index = j; + attr->type = type; + } + } + return true; +} + +void Shader_Vk::Vertex_Format::destroy(Device_Vk* dev) { + int i; + for (i = 0; i < binding_count; i++) + heap_free(dev->heap, bindings[i].attributes); + heap_free(dev->heap, bindings); + heap_free(dev->heap, attributes); +} + void Shader_Vk::destroy_internal(Device_Vk* dev) { int i; for (i = 0; i < shader_type_count; i++) if (modules[i]) vkDestroyShaderModule(dev->dev, modules[i], &dev->ac); + vfd.destroy(dev); + dev->destroy_vf(vf); } void Shader::destroy(Device* dev) { ((Shader_Vk*)this)->destroy_internal((Device_Vk*)dev); } -Shader* Device::load_shader(const char* fname) { +int Shader::binding_index(const char* name) { + int idx; + Shader_Vk* sh = (Shader_Vk*)this; + idx = sh->vfd.find_binding(name); + if (idx < 0 || !sh->vfd.bindings[idx].name[0]) return -1; + return idx; +} + +int Shader::attribute_index(const char* name) { + int idx; + Shader_Vk* sh = (Shader_Vk*)this; + idx = sh->vfd.find_attribute(name); + if (idx < 0 || !sh->vfd.attributes[idx].name[0]) return -1; + return idx; +} + +void Buffer_Vk::init( + Device_Vk* dev, + int flags, + VkDeviceSize size +) { + VkBufferCreateInfo bi{}; + VkMemoryRequirements req; + VkResult r; + bi.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bi.size = size; + bi.usage = get_usage(flags); + bi.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + r = vkCreateBuffer(dev->dev, &bi, &dev->ac, &buf); + if (r != VK_SUCCESS) { + print_err("Failed to create a buffer.\n"); + pbreak(r); + } + vkGetBufferMemoryRequirements(dev->dev, buf, &req); + { + VkMemoryPropertyFlags props = get_memory_flags(flags); + int mt = dev->find_memory_type(req.memoryTypeBits, props); + VkMemoryAllocateInfo ai{}; + if (mt < 0) { + print("Failed to find a satisfying memory type index.\n"); + pbreak(mt); + } + ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + ai.allocationSize = req.size; + ai.memoryTypeIndex = mt; + r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &memory); + if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) { + print_err("Out of video memory.\n"); + pbreak(r); + } + if (r != VK_SUCCESS) { + print_err("VRAM allocation failed.\n"); + pbreak(r); + } + } + vkBindBufferMemory(dev->dev, buf, memory, 0); +} + +void Buffer_Vk::destroy(Device_Vk* dev) { + vkDestroyBuffer(dev->dev, buf, &dev->ac); + vkFreeMemory(dev->dev, memory, &dev->ac); +} + +Buffer_Id Device::create_buffer(size_t size, int flags) { + Device_Vk* dev = (Device_Vk*)this; + Buffer_Id id = dev->alloc_buffer(); + Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); + buf.init(dev, flags, (VkDeviceSize)size); + return id; +} + +void Device::destroy_buffer(Buffer_Id id) { + Device_Vk* dev = (Device_Vk*)this; + Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); + buf.destroy(dev); + dev->buffers.remove(id); +} + +void* Device::map_buffer( + Buffer_Id id, + size_t offset, + size_t size +) { + Device_Vk* dev = (Device_Vk*)this; + Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); + void* ptr; + vkMapMemory( + dev->dev, + buf.memory, + (VkDeviceSize)offset, + (VkDeviceSize)size, + 0, + &ptr + ); + return ptr; +} + +void Device::unmap_buffer(Buffer_Id id) { + Device_Vk* dev = (Device_Vk*)this; + Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); + vkUnmapMemory(dev->dev, buf.memory); +} + +Buffer& Device::get_buffer(Buffer_Id id) { + return ((Device_Vk*)this)->buffers[id]; +} + +Shader& Device::get_shader(Shader_Id id) { + return ((Device_Vk*)this)->shaders[id]; +} + +Shader_Id Device::load_shader(const char* fname) { FILE* f = fopen(fname, "rb"); Shader_Vk* s; + Device_Vk* dev = (Device_Vk*)this; + Shader_Id id; bool r; - if (!f) return 0; - s = (Shader_Vk*)heap_alloc(heap, sizeof *s); - r = s->init((Device_Vk*)this, f); + if (!f) return Shader_Id(0); + id = dev->alloc_shader(); + s = (Shader_Vk*)&get_shader(id); + r = s->init(dev, f); fclose(f); if (!r) { heap_free(heap, s); return 0; } - return s; + return id; +} + +void Device::destroy_shader(Shader_Id id) { + Device_Vk* dev = (Device_Vk*)this; + Shader_Vk& buf = *(Shader_Vk*)&get_shader(id); + buf.destroy(dev); + dev->shaders.remove(id); } |