#include "app.hpp" #include "video.hpp" #define device_heap_size (1024 * 1024 * 8) #define max_textures 1024 #define max_buffers 1024 #define max_vertex_formats 64 #define max_rpos 64 #define max_fbos 64 #define max_pipelines 64 #define max_descriptor_sets 64 #define max_shaders 32 #define max_samplers 16 extern "C" { #include "memory.h" #include "pack.h" #include "plat.h" #include "sc/sh_enums.h" #include "sc/sh_helpers.h" #include "str.h" } #include #include #include #include #include "glad_vk.h" #include #ifdef min /* use std::min and max instead */ #undef min #endif #ifdef max #undef max #endif #if !defined(plat_win) #define __stdcall #endif const char* device_exts[] = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME }; extern "C" { VkSurfaceKHR app_create_vk_surface(App* app, VkInstance inst); void app_destroy_vk_surface( App* app, VkInstance inst, VkSurfaceKHR surf ); } struct Device_Vk; template struct Hash_Function {}; template struct Hash_Map { enum { flags_tombstone = 1 << 0, flags_null = 1 << 1 }; Key keys[size]; Value values[size]; uint8_t flags[size]; void init() { int i; for (i = 0; i < size; i++) flags[i] = flags_null; } int find(const Key& to_find) { int tombstone = -1, i; int bucket = (int)(Hash_Function{}(to_find) % (size_t)size); for (i = 0; i < size; i++) { Key& k = keys[bucket]; uint8_t flag = flags[bucket]; if (flag & flags_null) { if (flag & flags_tombstone) { if (tombstone < 0) tombstone = bucket; } else return tombstone >= 0? tombstone: bucket; } else if (k == to_find) return bucket; bucket = (bucket + 1) % size; } if (tombstone >= 0) return tombstone; return -1; } Value& set(const Key& k, const Value& v) { int bucket = find(k); assert(bucket >= 0); /* full */ flags[bucket] = 0; keys[bucket] = k; values[bucket] = v; return values[bucket]; } Value* get(const Key& k) { int bucket = find(k); if (bucket < 0 || flags[bucket] & flags_null) return 0; return &values[bucket]; } Value& operator[](const Key& k) { int bucket = find(k); assert(bucket >= 0); return values[bucket]; } Key* kaddr(const Key& k) { int bucket = find(k); if (bucket < 0 || flags[bucket] & flags_null) return 0; return &keys[bucket]; } void remove(const Key& k) { int bucket = find(k); assert(bucket >= 0); flags[bucket] = flags_null | flags_tombstone; } int has(const Key& k) { int bucket = find(k); return bucket >= 0 && ~flags[bucket] & flags_null; } template struct iterator { Table* table; int bucket; void init_begin(Table* t) { bucket = 0; table = t; while ( bucket < size && table->flags[bucket] & flags_null ) bucket++; } void init_end(Table* t) { bucket = size; table = t; } bool equals(const iterator& other) { return bucket == other.bucket && table == other.table; } bool operator==(const iterator
& other) { return equals(other); } bool operator!=(const iterator
& other) { return !equals(other); } iterator
operator++() { bucket++; while ( bucket < size && table->flags[bucket] & flags_null ) bucket++; return *this; } std::pair operator*() { return { table->keys[bucket], table->values[bucket] }; } std::pair operator*() const { return { table->keys[bucket], table->values[bucket] }; } }; iterator> begin() { iterator> r; r.init_begin(this); return r; } iterator> end() { iterator> r; r.init_end(this); return r; } iterator> begin() const { iterator> r; r.init_begin(this); return r; } iterator> end() const { iterator> r; r.init_end(this); return r; } }; struct Vram_Allocator { static constexpr int size_alignment = (1024 * 1024 * 32); struct Page; struct Chunk; struct Allocation { VkDeviceMemory mem; Page* page; Chunk* chunk; bool valid() const { return chunk != 0; } VkDeviceSize offset() const { return chunk->get_offset(); } void* map(VkDeviceSize off) { assert(page->mapping != 0); return (char*)page->mapping + offset() + off; } }; struct Chunk { VkDeviceSize offset; VkDeviceSize pad; VkDeviceSize size; Chunk* next; bool free; VkDeviceSize get_offset() { return offset + pad; } }; struct Page { VkDeviceMemory memory; VkDeviceSize size; int type; Chunk* chunks; Page* next; void* mapping; /* need something better for host-writable, * non-coherent mappings */ void init(Device_Vk* dev, VkDeviceSize size, int type); void defrag(Device_Vk* dev); Allocation imp_alloc(Device_Vk* dev, VkDeviceSize size); Allocation alloc( Device_Vk* dev, VkDeviceSize size, VkDeviceSize align ); }; Page* pages; Device_Vk* dev; void init(Device_Vk* d); void destroy(); Allocation alloc( int type, VkDeviceSize size, VkDeviceSize align ); void free(Allocation& alloc); }; static VkCullModeFlags get_vk_cull_mode(Cull_Mode mode) { switch (mode) { case Cull_Mode::none: return VK_CULL_MODE_NONE; case Cull_Mode::back: return VK_CULL_MODE_BACK_BIT; case Cull_Mode::front: return VK_CULL_MODE_FRONT_BIT; } assert(0); return VK_CULL_MODE_NONE; } static VkFormat get_vk_format(Texture_Format fmt) { switch (fmt) { case texture_format_r8i: return VK_FORMAT_R8_UNORM; case texture_format_r16f: return VK_FORMAT_R16_SFLOAT; case texture_format_r32f: return VK_FORMAT_R32_SFLOAT; case texture_format_rg8i: return VK_FORMAT_R8G8_UNORM; case texture_format_rg16f: return VK_FORMAT_R16G16_SFLOAT; case texture_format_rg32f: return VK_FORMAT_R32G32_SFLOAT; case texture_format_rgb8i: return VK_FORMAT_R8G8B8_UNORM; case texture_format_rgb16f: return VK_FORMAT_R16G16B16_SFLOAT; case texture_format_rgb32f: return VK_FORMAT_R32G32B32_SFLOAT; case texture_format_rgba8i: return VK_FORMAT_R8G8B8A8_UNORM; case texture_format_rgba8i_srgb: return VK_FORMAT_R8G8B8A8_SRGB; case texture_format_bgra8i_srgb: return VK_FORMAT_B8G8R8A8_SRGB; case texture_format_rgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT; case texture_format_rgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT; case texture_format_bc1: return VK_FORMAT_BC1_RGB_UNORM_BLOCK; case texture_format_bc4: return VK_FORMAT_BC4_UNORM_BLOCK; case texture_format_bc5: return VK_FORMAT_BC5_UNORM_BLOCK; case texture_format_d16: return VK_FORMAT_D16_UNORM; case texture_format_d24s8: return VK_FORMAT_D24_UNORM_S8_UINT; case texture_format_d32: return VK_FORMAT_D32_SFLOAT; case texture_format_count: break; } assert(0); return VK_FORMAT_UNDEFINED; } static VkBlendFactor get_vk_blend_factor(Blend_Factor mode) { switch (mode) { case Blend_Factor::zero: return VK_BLEND_FACTOR_ZERO; case Blend_Factor::one: return VK_BLEND_FACTOR_ONE; case Blend_Factor::src_colour: return VK_BLEND_FACTOR_SRC_COLOR; case Blend_Factor::inv_src_colour: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; case Blend_Factor::dst_colour: return VK_BLEND_FACTOR_DST_COLOR; case Blend_Factor::inv_dst_colour: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; case Blend_Factor::src_alpha: return VK_BLEND_FACTOR_SRC_ALPHA; case Blend_Factor::inv_src_alpha: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; case Blend_Factor::dst_alpha: return VK_BLEND_FACTOR_DST_ALPHA; case Blend_Factor::inv_dst_alpha: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; } assert(0); return VK_BLEND_FACTOR_ONE; } static VkBlendOp get_vk_blend_op(Blend_Mode mode) { switch (mode) { case Blend_Mode::add: return VK_BLEND_OP_ADD; case Blend_Mode::subtract: return VK_BLEND_OP_SUBTRACT; case Blend_Mode::reverse_subtract: return VK_BLEND_OP_SUBTRACT; case Blend_Mode::min: return VK_BLEND_OP_MIN; case Blend_Mode::max: return VK_BLEND_OP_MAX; } assert(0); return VK_BLEND_OP_ADD; } static VkImageUsageFlags get_texture_usage(int flags) { VkImageUsageFlags f = 0; if (flags & Texture_Flags::sampleable) f |= VK_IMAGE_USAGE_SAMPLED_BIT; if (flags & Texture_Flags::colour_target) f |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; if (flags & Texture_Flags::depth_stencil_target) f |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; if (flags & Texture_Flags::copy_src) f |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (flags & Texture_Flags::copy_dst) f |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; return f; } static VkImageAspectFlags get_image_aspect(int flags) { VkImageUsageFlags f = 0; if (flags & Texture_Flags::depth_stencil_target) f |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; else f |= VK_IMAGE_ASPECT_COLOR_BIT; return f; } VkImageLayout state_to_image_layout(Resource_State s) { switch (s) { case undefined: return VK_IMAGE_LAYOUT_UNDEFINED; case copy_dst: return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; case copy_src: return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; case shader_read: return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; case render_target: return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case presentable: return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; } assert(0); return VK_IMAGE_LAYOUT_UNDEFINED; } static void* __stdcall vk_alloc( void* uptr, size_t size, size_t alignment, VkSystemAllocationScope scope ) { Device* d = (Device*)uptr; void* r; (void)scope; if (!size) return 0; r = heap_alloc_aligned( d->heap, size, alignment ); if (!r) { print_err("Out of memory."); pbreak(4096); } return r; } static void __stdcall vk_free( void* uptr, void* ptr ) { Device* d = (Device*)uptr; if (!ptr) return; heap_free(d->heap, ptr); } static void* __stdcall vk_realloc( void* uptr, void* old, size_t size, size_t alignment, VkSystemAllocationScope scope ) { int os; void* na; (void)scope; if (!old) return vk_alloc(uptr, size, alignment, scope); if (!size) { vk_free(uptr, old); return 0; } os = heap_block_size(old); na = vk_alloc(uptr, size, alignment, scope); memcpy(na, old, std::min(os, (int)size)); vk_free(uptr, old); return na; } typedef struct { VkSurfaceCapabilitiesKHR cap; unsigned fmt_count, pm_count; VkSurfaceFormatKHR* fmts; VkPresentModeKHR* pms; } Swap_Cap; static void get_swap_cap( Device* d, VkPhysicalDevice dev, VkSurfaceKHR surf, Swap_Cap* cap ) { cap->fmts = 0; cap->pms = 0; vkGetPhysicalDeviceSurfaceCapabilitiesKHR( dev, surf, &cap->cap ); vkGetPhysicalDeviceSurfaceFormatsKHR( dev, surf, &cap->fmt_count, 0 ); if (cap->fmt_count) { cap->fmts = (VkSurfaceFormatKHR*)heap_alloc( d->heap, sizeof *cap->fmts * cap->fmt_count ); vkGetPhysicalDeviceSurfaceFormatsKHR( dev, surf, &cap->fmt_count, cap->fmts ); } vkGetPhysicalDeviceSurfacePresentModesKHR( dev, surf, &cap->pm_count, 0 ); if (cap->pm_count) { cap->pms = (VkPresentModeKHR*)heap_alloc( d->heap, sizeof *cap->pms * cap->pm_count ); vkGetPhysicalDeviceSurfacePresentModesKHR( dev, surf, &cap->pm_count, cap->pms ); } } static void deinit_swap_cap( Device* d, Swap_Cap* cap ) { if (cap->fmts) heap_free(d->heap, cap->fmts); if (cap->pms) heap_free(d->heap, cap->pms); } struct Late_Terminated { Late_Terminated* next; virtual void destroy(Device_Vk* dev) = 0; }; struct Swapchain { VkSwapchainKHR swapchain; Texture_Id* textures; VkSurfaceFormatKHR format; VkExtent2D size; VkPresentModeKHR mode; VkSemaphore image_avail; int image_count; void init(const App& app, Device_Vk* dev); void initr(const App& app, Device_Vk* dev); void recreate(const App& app, Device_Vk* dev); void get_images(Device_Vk* dev); void destroy(Device_Vk* dev); Texture_Id create_image( Device_Vk* dev, VkImage image, VkImageView view, int w, int h ); }; #define max_contexts 16 enum { context_state_avail = 1 << 0, context_state_init = 1 << 1 }; struct Shader_Vk : public Shader, public Late_Terminated { struct Attribute { char name[28]; SVariable_Type type; int index; }; struct Binding { char name[24]; SBinding_Rate rate; int attr_count; int index; int* attributes; }; struct Vertex_Format { Binding* bindings; Attribute* attributes; int attr_count; int binding_count; bool init(Device_Vk* dev, Pack_File* f); void destroy(Device_Vk* dev); int find_binding(const char* name); int find_attribute(const char* name); }; struct Desc { char name[24]; int slot; int stage; }; SProgram_Type type; VkShaderModule modules[shader_type_count]; char entrypoints[shader_type_count][24]; Vertex_Format vfd; Desc* descs; int desc_count; bool init(Device_Vk* dev, Pack_File* f); bool init_module( Device_Vk* dev, int stage, char* buf, int size ); void destroy(Device_Vk* dev) override; int find_descriptor(const char* name); static VkShaderStageFlagBits stage(Shader_Type type) { switch (type) { case shader_type_vertex: return VK_SHADER_STAGE_VERTEX_BIT; case shader_type_fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; default: assert(0); return (VkShaderStageFlagBits)0; } } }; struct Renderpass_Vk; struct Framebuffer_Vk; struct Context_Vk : public Context { int state; Device_Vk* dev; VkCommandBuffer cb; VkCommandPool pool; VkFence fence; VkSemaphore semaphore; VkPipeline last_pso; VkDescriptorSet last_dso; VkRenderPass last_rpo; VkFramebuffer last_fbo; void init_pool(); void init_cb(); void init_sync(); void init(Device_Vk* device); void begin_record(); Context_Vk& acquire(Device_Vk* device); void release(); void destroy(); std::pair begin_rp( const Render_Pass& rp ); void end_rp(Renderpass_Vk& rpo, Framebuffer_Vk& fbo); void check_end_rp(); }; struct Texture_Vk : public Texture, public Late_Terminated { VkImage image; VkImageView view; Vram_Allocator::Allocation memory; Resource_State state; void destroy(Device_Vk*) override; void set_name(Device_Vk* dev, const char* name); }; struct Buffer_Vk : public Buffer, public Late_Terminated { VkBuffer buf; VkDeviceSize size; Vram_Allocator::Allocation memory; int flags; void init(Device_Vk* dev, int flags, VkDeviceSize size); void destroy(Device_Vk* dev) override; void set_name(Device_Vk* dev, const char* name); static VkBufferUsageFlags get_usage(int flags) { VkBufferUsageFlags r = 0; if (flags & Buffer_Flags::index_buffer) r |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; if (flags & Buffer_Flags::vertex_buffer) r |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; if (flags & Buffer_Flags::constant_buffer) r |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; if (flags & Buffer_Flags::storage_buffer) r |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; if (flags & Buffer_Flags::copy_src) r |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; if (flags & Buffer_Flags::copy_dst) r |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; return r; } static VkMemoryPropertyFlags get_memory_flags(int flags) { VkMemoryPropertyFlags r = 0; r |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; if (flags & Buffer_Flags::cpu_read) r |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; if (flags & Buffer_Flags::cpu_readwrite) r |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; return r; } }; struct Rpo_Key { bool is_first; Render_Pass rpo; bool operator==(const Rpo_Key& other) const { return is_first == other.is_first && rpo.layout_eq(other.rpo); } }; struct Fbo_Key { Render_Pass rpo; bool operator==(const Fbo_Key& other) const { return rpo.resources_eq(other.rpo); } }; struct Pso_Key { Pipeline pso; Rpo_Key rpo; bool operator==(const Pso_Key& other) const { return rpo == other.rpo && pso.pipeline_eq(other.pso) && pso.desc_layout_eq(other.pso); } }; struct Dso_Key { Pipeline pip; bool operator==(const Dso_Key& other) const { return pip.desc_resources_eq(other.pip); } }; struct Renderpass_Vk { VkRenderPass rpo; int age; void on_submit() { age = 0; } VkAttachmentLoadOp load_op_from_mode(Clear_Mode m); void init(Device_Vk* dev, const Rpo_Key& rp); void destroy(Device_Vk* dev); }; struct Framebuffer_Vk { VkFramebuffer fbo; int age; void on_submit() { age = 0; } void init( Device_Vk* dev, const Renderpass_Vk& rpo, const Render_Pass& rp ); void destroy(Device_Vk* dev); }; struct Pipeline_Vk { VkPipeline pip; VkPipelineLayout lay; VkDescriptorSetLayout dlay; int age; void init(Device_Vk* dev, const Pso_Key& desc); void destroy(Device_Vk* dev); void init_stages( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_vertex_input( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_input_assembly( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_viewport( Arena& scope, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_rasterisation( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_msaa( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_depthstencil( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_blending( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Render_Pass& rp, const Pipeline& desc ); void init_layout( Device_Vk* dev, const Pipeline& desc ); void init_descriptors( Device_Vk* dev, const Pipeline& desc ); static VkCompareOp get_compare_op(Depth_Mode m); void on_submit() { age = 0; } }; struct Descriptor_Set_Vk { VkDescriptorPool dp; VkDescriptorSet dset; int age; void init( Device_Vk* dev, const Pipeline_Vk& pip, const Pipeline& desc ); void destroy(Device_Vk* dev); void on_submit() { age = 0; } }; struct Vertex_Format_Vk { VkVertexInputBindingDescription* bindings; int binding_count; VkVertexInputAttributeDescription* attrs; int attr_count; void init(Device_Vk* dev, const Vertex_Format_Desc& desc); void destroy(Device_Vk* dev); static VkFormat format_from_svar_type(SVariable_Type type); void clone(Arena* arena); void optimise(const Vertex_Format_Vk* shadervf); }; struct Sampler_Vk : public Late_Terminated { VkSampler sampler; Sampler_Id id; void init(Device_Vk* dev, const Sampler_State& s); void destroy(Device_Vk* dev) override; void set_name(Device_Vk* dev, const char* name); static VkFilter get_filter(Filter_Mode mode); static VkSamplerMipmapMode get_mipmap_mode(Filter_Mode mode); static VkSamplerAddressMode get_mode(Address_Mode mode); }; template<> struct Hash_Function { size_t operator()(const Rpo_Key& k) const { return (size_t)fnv1a64_2( k.rpo.layout_hash, (uint8_t*)&k.is_first, 1 ); } }; template<> struct Hash_Function { size_t operator()(const Fbo_Key& k) const { return k.rpo.resource_hash; } }; template<> struct Hash_Function { size_t operator()(const Pso_Key& k) const { uint64_t rpoh = Hash_Function{}(k.rpo); return fnv1a64_2( k.pso.pipeline_hash, (uint8_t*)&rpoh, sizeof rpoh ); } }; template<> struct Hash_Function { size_t operator()(const Dso_Key& k) const { return k.pip.descriptor_resource_hash; } }; template<> struct Hash_Function { size_t operator()(Texture_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Buffer_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Shader_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Vertex_Format_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Sampler_Id id) const { return id.index; } }; struct Shader_Loader : public Asset_Loader { Device_Vk* dev; void init(Device_Vk* d); Asset* load( Arena* a, Arena* s, const char* filename, Pack_File* f ) override; void unload(Asset* a) override; }; struct Texture_Loader : public Asset_Loader { Device_Vk* dev; static size_t calc_size(Texture_Format fmt, int w, int h); void init(Device_Vk* d); Asset* load( Arena* a, Arena* s, const char* filename, Pack_File* f ) override; void unload(Asset* a) override; }; struct Terminator { Late_Terminated* queue; void execute(Device_Vk* dev) { Late_Terminated* obj = queue; for (; obj; obj = obj->next) obj->destroy(dev); queue = 0; } void add(Late_Terminated* obj) { if (queue) { obj->next = queue; queue = obj; } else { obj->next = 0; queue = obj; } } }; struct Device_Vk : public Device { VkAllocationCallbacks ac; VkInstance inst; VkDevice dev; VkPhysicalDevice phys_dev; VkSurfaceKHR surf; uint32_t backbuffer_index; Texture_Id backbuffer_id; Swap_Cap swap_cap; VkPhysicalDeviceMemoryProperties mem_props; int queue_index; VkQueue queue; Swapchain swapchain; Context_Vk contexts[max_contexts]; Context_Vk* current_ctx; Shader_Loader shader_loader; Texture_Loader texture_loader; Vram_Allocator vrama; #ifdef DEBUG VkDebugUtilsMessengerEXT msg; #endif Hash_Map textures; Hash_Map buffers; Hash_Map< Vertex_Format_Id, Vertex_Format_Vk, max_vertex_formats > vertex_formats; Hash_Map shaders; Hash_Map samplers; uint32_t texture_count; uint32_t buffer_count; uint32_t vertex_format_count; uint32_t shader_count; uint32_t sampler_count; Hash_Map rpo_cache; Hash_Map fbo_cache; Hash_Map pso_cache; Hash_Map dso_cache; Terminator* terminators; uint32_t terminator_index; bool first_rp; Texture_Id depth; Texture_Id alloc_texture(); Buffer_Id alloc_buffer(); Vertex_Format_Id alloc_vf(); Shader_Id alloc_shader(); Sampler_Id alloc_sampler(); void init_internal(); void deinit_internal(); void init_ac(); void create_inst(const char** exts, int count); void create_dev(Swap_Cap* swap_cap); void find_exts(const char** exts, int& count); bool has_validation(); void init_validation(); void create_surf(); void on_resize_internal(int w, int h); Renderpass_Vk& create_rpo(const Rpo_Key& rp); Renderpass_Vk& get_rpo(const Rpo_Key& rp); Framebuffer_Vk& create_fbo( const Renderpass_Vk& rpo, const Fbo_Key& fb ); Framebuffer_Vk& get_fbo( const Renderpass_Vk& rpo, const Fbo_Key& fb ); Pipeline_Vk& create_pso(const Pso_Key& pip); Pipeline_Vk& get_pso(const Pso_Key& pop); Descriptor_Set_Vk& create_dso( const Pipeline_Vk& pip, const Dso_Key& k ); Descriptor_Set_Vk& get_dso( const Pipeline_Vk& pip, const Dso_Key& k ); template void collect_objects(List& list, int max_age, F f); void collect_garbage(); void queue_destroy(Late_Terminated* obj); void create_terminators(); void create_depth(int w, int h); int find_memory_type( uint32_t filter, VkMemoryPropertyFlags flags ); }; #ifdef DEBUG static VkBool32 debug_callback( VkDebugUtilsMessageSeverityFlagBitsEXT sev, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, void* uptr ) { (void)sev; (void)uptr; if (sev <= VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) return 0; switch (sev) { case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: print("%s\n", data->pMessage); break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: print_war("%s\n", data->pMessage); break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: print_err("%s\n", data->pMessage); pbreak((int)type); break; default: break; } return 0; } static VkResult create_dmesg( Device_Vk* d, const VkDebugUtilsMessengerCreateInfoEXT* information, const VkAllocationCallbacks* allocator, VkDebugUtilsMessengerEXT* messenger ) { PFN_vkCreateDebugUtilsMessengerEXT f; f = (PFN_vkCreateDebugUtilsMessengerEXT) vkGetInstanceProcAddr( d->inst, "vkCreateDebugUtilsMessengerEXT" ); return f? f(d->inst, information, allocator, messenger): VK_ERROR_EXTENSION_NOT_PRESENT; } static void destroy_dmesg( VkInstance instance, VkDebugUtilsMessengerEXT messenger, const VkAllocationCallbacks* allocator ) { PFN_vkDestroyDebugUtilsMessengerEXT f; f = (PFN_vkDestroyDebugUtilsMessengerEXT) vkGetInstanceProcAddr( instance, "vkDestroyDebugUtilsMessengerEXT" ); if (f) f(instance, messenger, allocator); } void Device_Vk::init_validation() { VkDebugUtilsMessengerCreateInfoEXT mi{}; VkResult r; mi.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; mi.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; mi.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; mi.pfnUserCallback = debug_callback; r = create_dmesg( this, &mi, &ac, &msg ); if (r != VK_SUCCESS) { print_err("Failed to create debug messenger.\n"); pbreak(r); } } #endif bool Device_Vk::has_validation() { unsigned count, i; int f; VkLayerProperties* props; VkResult r; r = vkEnumerateInstanceLayerProperties(&count, 0); if (!count || r != VK_SUCCESS) return 0; props = (VkLayerProperties*)heap_alloc(heap, count * sizeof *props); vkEnumerateInstanceLayerProperties(&count, props); for (f = 0, i = 0; i < count; i++) { if (strcmp( props[i].layerName, "VK_LAYER_KHRONOS_validation" )) { f = 1; break; } } heap_free(heap, props); return f; } void Device_Vk::find_exts(const char** exts, int& count) { app->get_vk_exts(exts, count); exts[count++] = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME; #ifdef DEBUG exts[count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; #endif } void Device_Vk::init_ac() { ac.pUserData = this; ac.pfnAllocation = vk_alloc; ac.pfnReallocation = vk_realloc; ac.pfnFree = vk_free; ac.pfnInternalAllocation = 0; ac.pfnInternalFree = 0; } void Device_Vk::create_inst(const char** exts, int ext_count) { VkInstanceCreateInfo ci{}; VkApplicationInfo ai{}; VkResult r; #ifdef DEBUG const char* vln = "VK_LAYER_KHRONOS_validation"; #endif ai.apiVersion = VK_API_VERSION_1_0; ai.pApplicationName = "C2"; ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; ci.pApplicationInfo = &ai; ci.enabledExtensionCount = (unsigned)ext_count; ci.ppEnabledExtensionNames = exts; #ifdef DEBUG ci.enabledLayerCount = has_validation(); ci.ppEnabledLayerNames = &vln; if (!ci.enabledLayerCount) print_war("No validation layers."); #endif r = vkCreateInstance(&ci, &ac, &inst); if (r != VK_SUCCESS) { print_err("Failed to create a Vulkan instance\n"); pbreak(r); } } static int proc_swap( Device_Vk* d, VkPhysicalDevice dev, Swap_Cap* sc ) { get_swap_cap(d, dev, d->surf, sc); return sc->fmt_count > 0 && sc->pm_count > 0; } int proc_qf(Device_Vk* d, VkPhysicalDevice dev) { unsigned fc, i; int r = 0; VkBool32 press; VkQueueFamilyProperties* fs, * p; vkGetPhysicalDeviceQueueFamilyProperties( dev, &fc, 0 ); fs = (VkQueueFamilyProperties*)heap_alloc(d->heap, (int)fc * sizeof *fs); vkGetPhysicalDeviceQueueFamilyProperties( dev, &fc, fs ); for (i = 0; i < fc; i++) { p = &fs[i]; vkGetPhysicalDeviceSurfaceSupportKHR( dev, i, d->surf, &press ); if ( p->queueFlags & VK_QUEUE_GRAPHICS_BIT && press ) { d->queue_index = (int)i; r = 1; goto fin; } } fin: heap_free(d->heap, fs); return r; } static int sup_exts(Device_Vk* d, VkPhysicalDevice dev) { int r = 0, i, f; unsigned c, j; int extc = sizeof *device_exts / sizeof *device_exts; VkExtensionProperties* avail; vkEnumerateDeviceExtensionProperties(dev, 0, &c, 0); avail = (VkExtensionProperties*)heap_alloc(d->heap, c * sizeof *avail); vkEnumerateDeviceExtensionProperties( dev, 0, &c, avail ); for (i = 0; i < extc; i++) { f = 0; for (j = 0; j < c; j++) { if (!strcmp(device_exts[i], avail[j].extensionName)) { f = 1; break; } } if (!f) goto fin; } r = 1; fin: heap_free(d->heap, avail); return r; } VkPhysicalDevice get_phys_dev(Device_Vk* d, Swap_Cap* sc) { unsigned dc, i; VkPhysicalDevice* devs, dev; vkEnumeratePhysicalDevices(d->inst, &dc, 0); if (!dc) { print_err( "Couldn't find any vulkan-capable graphics hardware.\n" ); pbreak(400); } devs = (VkPhysicalDevice*)heap_alloc(d->heap, (int)dc * sizeof *devs); vkEnumeratePhysicalDevices(d->inst, &dc, devs); for (i = 0; i < dc; i++) { dev = devs[i]; if ( proc_swap(d, dev, sc) && proc_qf(d, dev) && sup_exts(d, dev) ) { heap_free(d->heap, devs); return dev; } deinit_swap_cap(d, sc); } print_err("Couldn't find a suitable GPU.\n"); pbreak(401); heap_free(d->heap, devs); return 0; } void Device_Vk::create_dev(Swap_Cap* swap_cap) { const float priority = 0.0f; VkDeviceQueueCreateInfo qi{}; VkPhysicalDeviceCustomBorderColorFeaturesEXT border{}; VkDeviceCreateInfo di{}; VkPhysicalDeviceFeatures pdf{}; VkResult r; phys_dev = get_phys_dev(this, swap_cap); vkGetPhysicalDeviceMemoryProperties(phys_dev, &mem_props); border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; border.customBorderColors = true; qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; qi.queueFamilyIndex = queue_index; qi.queueCount = 1; qi.pQueuePriorities = &priority; di.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; di.pQueueCreateInfos = &qi; di.queueCreateInfoCount = 1; di.pEnabledFeatures = &pdf; di.enabledExtensionCount = sizeof device_exts / sizeof *device_exts; di.ppEnabledExtensionNames = device_exts; di.pNext = &border; r = vkCreateDevice( phys_dev, &di, &ac, &dev ); if (r != VK_SUCCESS) { print_err("Failed to create a Vulkan device.\n"); pbreak(r); } } void Device_Vk::init_internal() { const char* exts[16]; int ext_count = 0, i; gladLoaderLoadVulkan(0, 0, 0); textures.init(); texture_count = 1; buffers.init(); buffer_count = 1; vertex_formats.init(); vertex_format_count = 1; shaders.init(); shader_count = 1; samplers.init(); sampler_count = 1; rpo_cache.init(); fbo_cache.init(); pso_cache.init(); dso_cache.init(); shader_loader.init(this); texture_loader.init(this); register_asset_loader("CSH2", &shader_loader); register_asset_loader("TXTR", &texture_loader); find_exts(exts, ext_count); init_ac(); create_inst(exts, ext_count); #ifdef DEBUG if (has_validation()) init_validation(); #endif surf = app_create_vk_surface(app, inst); create_dev(&swap_cap); vrama.init(this); gladLoaderLoadVulkan(inst, phys_dev, dev); vkGetDeviceQueue(dev, (uint32_t)queue_index, 0, &queue); terminators = 0; terminator_index = 0; for (i = 0; i < max_contexts; i++) contexts[i].state = context_state_avail; swapchain.init(*app, this); create_terminators(); depth = 0; create_depth(swapchain.size.width, swapchain.size.height); } void Device_Vk::create_terminators() { int i, count = swapchain.image_count; if (terminators) { for (i = 0; i < count; i++) terminators[i].execute(this); heap_free(heap, terminators); } terminators = (Terminator*)heap_alloc( heap, count * sizeof *terminators ); for (i = 0; i < count; i++) { terminators[i].queue = 0; } } void Device_Vk::create_depth(int w, int h) { if (depth) destroy_texture(depth); depth = create_texture( "default depth", texture_format_d24s8, Texture_Flags::sampleable | Texture_Flags::depth_stencil_target, w, h, 0 ); } void Device_Vk::deinit_internal() { int i, image_count = swapchain.image_count; vkDeviceWaitIdle(dev); destroy_texture(depth); swapchain.destroy(this); deinit_swap_cap(this, &swap_cap); app_destroy_vk_surface(app, inst, surf); for (auto i : rpo_cache) i.second.destroy(this); for (auto i : fbo_cache) i.second.destroy(this); for (auto i : pso_cache) i.second.destroy(this); for (auto i : dso_cache) i.second.destroy(this); for (i = 0; i < max_contexts; i++) { auto& context = contexts[i]; if (context.state & context_state_init) context.destroy(); } for (i = 0; i < image_count; i++) { terminators[i].execute(this); } vrama.destroy(); vkDestroyDevice(dev, &ac); #ifdef DEBUG destroy_dmesg( inst, msg, &ac ); #endif vkDestroyInstance(inst, &ac); } void Device_Vk::on_resize_internal(int w, int h) { (void)w; (void)h; vkDeviceWaitIdle(dev); deinit_swap_cap(this, &swap_cap); get_swap_cap(this, phys_dev, surf, &swap_cap); swapchain.recreate(*app, this); create_terminators(); create_depth(swapchain.size.width, swapchain.size.height); } Renderpass_Vk& Device_Vk::create_rpo(const Rpo_Key& k) { Renderpass_Vk rpo; rpo.init(this, k); rpo.age = 0; auto& r = rpo_cache.set(k, rpo); #ifdef DEBUG if (hooks) hooks->on_rpo_create(rpo_cache.kaddr(k)->rpo); #endif return r; } Renderpass_Vk& Device_Vk::get_rpo(const Rpo_Key& rp) { Renderpass_Vk* rpo = rpo_cache.get(rp); if (!rpo) return create_rpo(rp); return *rpo; } Framebuffer_Vk& Device_Vk::create_fbo( const Renderpass_Vk& rpo, const Fbo_Key& k ) { auto& fb = k.rpo; Framebuffer_Vk fbo; fbo.init(this, rpo, fb); fbo.age = 0; auto& r = fbo_cache.set(k, fbo); #ifdef DEBUG if (hooks) hooks->on_fbo_create(fbo_cache.kaddr(k)->rpo); #endif return r; } Framebuffer_Vk& Device_Vk::get_fbo( const Renderpass_Vk& rpo, const Fbo_Key& fb ) { Framebuffer_Vk* fbo = fbo_cache.get(fb); if (!fbo) return create_fbo(rpo, fb); return *fbo; } Pipeline_Vk& Device_Vk::create_pso(const Pso_Key& pip) { Pipeline_Vk pso; pso.age = 0; pso.init(this, pip); auto& r = pso_cache.set(pip, pso); #ifdef DEBUG if (hooks) hooks->on_pso_create(pso_cache.kaddr(pip)->pso); #endif return r; } Pipeline_Vk& Device_Vk::get_pso(const Pso_Key& pip) { Pipeline_Vk* pso = pso_cache.get(pip); if (!pso) return create_pso(pip); return *pso; } Descriptor_Set_Vk& Device_Vk::create_dso( const Pipeline_Vk& pip, const Dso_Key& k ) { Descriptor_Set_Vk dso; dso.age = 0; dso.init(this, pip, k.pip); auto& r = dso_cache.set(k, dso); #ifdef DEBUG if (hooks) hooks->on_dso_create(dso_cache.kaddr(k)->pip); #endif return r; } Descriptor_Set_Vk& Device_Vk::get_dso( const Pipeline_Vk& pip, const Dso_Key& k ) { Descriptor_Set_Vk* dso = dso_cache.get(k); if (!dso) return create_dso(pip, k); return *dso; } template void Device_Vk::collect_objects(List& list, int max_age, F f) { for (auto i : list) { auto& obj = i.second; obj.age++; if (obj.age > max_age) { f(i); obj.destroy(this); list.remove(i.first); } } } void Device_Vk::collect_garbage() { int max_age = swapchain.image_count + 3; collect_objects(rpo_cache, max_age, [this](auto i){ #ifdef DEBUG if (hooks) hooks->on_rpo_destroy(rpo_cache.kaddr(i.first)->rpo); #endif }); collect_objects(fbo_cache, max_age, [this](auto i){ #ifdef DEBUG if (hooks) hooks->on_fbo_destroy(fbo_cache.kaddr(i.first)->rpo); #endif }); collect_objects(pso_cache, max_age, [this](auto i){ #ifdef DEBUG if (hooks) hooks->on_pso_destroy(pso_cache.kaddr(i.first)->pso); #endif }); collect_objects(dso_cache, max_age, [this](auto i){ #ifdef DEBUG if (hooks) hooks->on_dso_destroy(dso_cache.kaddr(i.first)->pip); #endif }); } void Device_Vk::queue_destroy(Late_Terminated* obj) { terminators[terminator_index].add(obj); } int Device_Vk::find_memory_type( uint32_t filter, VkMemoryPropertyFlags flags ) { int i, e = mem_props.memoryTypeCount; auto* types = mem_props.memoryTypes; for (i = 0; i < e; i++) { if ( (filter & (1 << i)) && (types[i].propertyFlags & flags) == flags ) return i; } return -1; } VkAttachmentLoadOp Renderpass_Vk::load_op_from_mode( Clear_Mode m ) { switch (m) { case Clear_Mode::discard: return VK_ATTACHMENT_LOAD_OP_DONT_CARE; case Clear_Mode::clear: return VK_ATTACHMENT_LOAD_OP_CLEAR; case Clear_Mode::restore: return VK_ATTACHMENT_LOAD_OP_LOAD; } assert(0); return VK_ATTACHMENT_LOAD_OP_DONT_CARE; } void Renderpass_Vk::init( Device_Vk* dev, const Rpo_Key& rpk ) { VkRenderPassCreateInfo ri{}; VkAttachmentDescription ads[2]; VkAttachmentReference car, dar; VkSubpassDescription sd{}; VkResult r; auto& rp = rpk.rpo; bool has_depth = rp.depth.id; int count = 0, i, c = rp.colour_count; zero(ads, sizeof ads); for (i = 0; i < c; i++) { int index = count++; auto& colour = rp.colours[i]; auto& ad = ads[index]; ad.format = get_vk_format(colour.fmt); ad.samples = VK_SAMPLE_COUNT_1_BIT; ad.loadOp = load_op_from_mode(colour.mode); ad.storeOp = VK_ATTACHMENT_STORE_OP_STORE; ad.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; if (rpk.is_first) ad.initialLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; else ad.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; ad.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; car.attachment = index; car.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; } if (has_depth) { int i = count++; auto& ad = ads[i]; ad.format = get_vk_format(dev->get_texture(rp.depth.id).fmt); ad.samples = VK_SAMPLE_COUNT_1_BIT; ad.loadOp = load_op_from_mode(rp.depth.mode); ad.storeOp = VK_ATTACHMENT_STORE_OP_STORE; ad.stencilLoadOp = ad.loadOp; ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; if (rp.depth.mode != Clear_Mode::restore) ad.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; else ad.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; ad.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; dar.attachment = i; dar.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; } sd.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; sd.colorAttachmentCount = rp.colour_count; sd.pColorAttachments = &car; sd.pDepthStencilAttachment = has_depth? &dar: 0; ri.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; ri.attachmentCount = count; ri.pAttachments = ads; ri.subpassCount = 1; ri.pSubpasses = &sd; r = vkCreateRenderPass(dev->dev, &ri, &dev->ac, &rpo); if (r != VK_SUCCESS) { print_err("Failed to create a render pass\n"); pbreak(r); } } void Renderpass_Vk::destroy(Device_Vk* dev) { vkDestroyRenderPass(dev->dev, rpo, &dev->ac); } void Framebuffer_Vk::init( Device_Vk* dev, const Renderpass_Vk& rpo, const Render_Pass& rp ) { bool has_depth = rp.depth.id; int i, count = 0, w, h; VkImageView atts[2]; VkResult r; VkFramebufferCreateInfo fbi{}; for (i = 0; i < rp.colour_count; i++) { const auto& tar = rp.colours[i]; const Texture_Vk& texture = *(const Texture_Vk*)&dev->get_texture(tar.id); atts[count++] = texture.view; w = texture.w; h = texture.h; } if (has_depth) { const Texture_Vk& texture = *(const Texture_Vk*)&dev->get_texture(rp.depth.id); atts[count++] = texture.view; w = texture.w; h = texture.h; } fbi.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fbi.renderPass = rpo.rpo; fbi.width = w; fbi.height = h; fbi.layers = 1; fbi.attachmentCount = count; fbi.pAttachments = atts; r = vkCreateFramebuffer(dev->dev, &fbi, &dev->ac, &fbo); if (r != VK_SUCCESS) { print_err("Failed to create a framebuffer.\n"); pbreak(r); } } void Framebuffer_Vk::destroy(Device_Vk* dev) { vkDestroyFramebuffer(dev->dev, fbo, &dev->ac); } static int get_image_count(const Swap_Cap& s) { const VkSurfaceCapabilitiesKHR& cap = s.cap; return cap.minImageCount + (cap.minImageCount < cap.maxImageCount); } static VkExtent2D choose_swap_extent(const App& app, const VkSurfaceCapabilitiesKHR& cap) { VkExtent2D r = { (uint32_t)app.w, (uint32_t)app.h }; r.width = std::min(r.width, cap.maxImageExtent.width); r.height = std::min(r.height, cap.maxImageExtent.height); r.width = std::max(r.width, cap.minImageExtent.width); r.height = std::max(r.height, cap.minImageExtent.height); return r; } static VkSurfaceFormatKHR choose_swap_format(const Swap_Cap& cap) { unsigned i; for (i = 0; i < cap.fmt_count; i++) { const auto& fmt = cap.fmts[i]; if ( fmt.format == VK_FORMAT_B8G8R8A8_SRGB && fmt.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR ) return fmt; } print_err("Failed to find a surface that supports VK_FORMAT_B8G8R8A8_SRGB.\n"); return cap.fmts[0]; } static VkPresentModeKHR choose_swap_mode(const Swap_Cap& cap, bool vsync) { (void)vsync; (void)cap; /* todo */ return VK_PRESENT_MODE_FIFO_KHR; } static VkImageView make_view( Device_Vk* dev, VkImage image, VkFormat fmt, VkImageAspectFlags flags ) { VkImageViewCreateInfo vi{}; VkResult r; VkImageView view; vi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; vi.image = image; vi.viewType = VK_IMAGE_VIEW_TYPE_2D; vi.format = fmt; vi.subresourceRange.aspectMask = flags; vi.subresourceRange.baseMipLevel = 0; vi.subresourceRange.levelCount = 1; vi.subresourceRange.baseArrayLayer = 0; vi.subresourceRange.layerCount = 1; r = vkCreateImageView(dev->dev, &vi, &dev->ac, &view); if (r != VK_SUCCESS) { print_err("Failed to make image view.\n"); pbreak((int)r); } return view; } void Swapchain::init(const App& app, Device_Vk* dev) { swapchain = (VkSwapchainKHR)0; textures = 0; initr(app, dev); } void Swapchain::initr(const App& app, Device_Vk* dev) { image_count = get_image_count(dev->swap_cap); size = choose_swap_extent(app, dev->swap_cap.cap); format = choose_swap_format(dev->swap_cap); mode = choose_swap_mode(dev->swap_cap, false); { VkResult r; VkSwapchainCreateInfoKHR si{}; si.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, si.surface = dev->surf; si.minImageCount = image_count; si.imageFormat = format.format; si.imageColorSpace = format.colorSpace; si.imageExtent = size; si.imageArrayLayers = 1; si.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; si.preTransform = dev->swap_cap.cap.currentTransform; si.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; si.presentMode = mode; si.clipped = VK_TRUE; si.oldSwapchain = swapchain; si.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; r = vkCreateSwapchainKHR(dev->dev, &si, &dev->ac, &swapchain); if (r != VK_SUCCESS) { print_err("Failed to create swapchain.\n"); pbreak(r); } } { VkResult r; VkSemaphoreCreateInfo si{}; si.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; r = vkCreateSemaphore(dev->dev, &si, &dev->ac, &image_avail); if (r != VK_SUCCESS) { print_err("Failed to create a semaphore.\n"); pbreak(r); } } get_images(dev); } void Swapchain::recreate(const App& app, Device_Vk* dev) { Swapchain old = *this; vkDeviceWaitIdle(dev->dev); initr(app, dev); old.destroy(dev); } Texture_Id Swapchain::create_image( Device_Vk* dev, VkImage image, VkImageView view, int w, int h ) { Texture_Id id = dev->alloc_texture(); Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id); tex.image = image; tex.view = view; tex.w = w; tex.h = h; tex.fmt = texture_format_bgra8i_srgb; tex.alias = true; return id; } void Swapchain::get_images(Device_Vk* dev) { unsigned count; int i; VkImage* images; vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, 0); Context& ctx = dev->acquire(); image_count = count; images = (VkImage*)heap_alloc( dev->heap, sizeof *images * image_count ); textures = (Texture_Id*)heap_alloc( dev->heap, sizeof *textures * image_count ); vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, images); for (i = 0; i < image_count; i++) { VkImageView view = make_view(dev, images[i], format.format, VK_IMAGE_ASPECT_COLOR_BIT ); textures[i] = create_image( dev, images[i], view, size.width, size.height ); /* needs to be presentable since the first renderpass * will expect it to be presentable from "last" frame */ ctx.transition(textures[i], Resource_State::presentable); } dev->submit(ctx); heap_free(dev->heap, images); } void Swapchain::destroy(Device_Vk* dev) { int i; for (i = 0; i < image_count; i++) dev->destroy_texture(textures[i]); vkDestroySemaphore(dev->dev, image_avail, &dev->ac); vkDestroySwapchainKHR(dev->dev, swapchain, &dev->ac); heap_free(dev->heap, textures); textures = 0; } Device* Device::create(Arena* a, App* ap) { Device_Vk* d = (Device_Vk*)arena_alloc(a, sizeof *d); new(d) Device_Vk(); d->init(a, ap); return d; } void Device::init(Arena* a, App* ap) { void* hm; arena = a; app = ap; hm = arena_alloc(a, device_heap_size); heap = (Heap*)arena_alloc(a, sizeof *heap); init_heap(heap, hm, device_heap_size); hooks = 0; ((Device_Vk*)this)->init_internal(); } void Device::destroy() { ((Device_Vk*)this)->deinit_internal(); } void Device::register_hooks(Device_Debug_Hooks* h) { h->dev = this; hooks = h; } void Device::on_resize() { ((Device_Vk*)this)->on_resize_internal(app->w, app->h); } void Device::begin_frame() { Device_Vk* dev = (Device_Vk*)this; dev->collect_garbage(); dev->current_ctx = (Context_Vk*)&acquire(); dev->terminator_index++; dev->terminator_index %= dev->swapchain.image_count; dev->terminators[dev->terminator_index].execute(dev); vkAcquireNextImageKHR( dev->dev, dev->swapchain.swapchain, UINT64_MAX, dev->swapchain.image_avail, VK_NULL_HANDLE, &dev->backbuffer_index ); dev->backbuffer_id = dev->swapchain.textures[dev->backbuffer_index]; dev->first_rp = true; } void Device::submit(Context& ctx_) { Context_Vk* ctx = (Context_Vk*)&ctx_; Device_Vk* dev = (Device_Vk*)this; VkSubmitInfo si{}; si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; /* si.waitSemaphoreCount = 1; si.pWaitSemaphores = &ctx->semaphore; si.pWaitDstStageMask = &stage; si.signalSemaphoreCount = 1; si.pSignalSemaphores = &ctx->semaphore;*/ si.commandBufferCount = 1; si.pCommandBuffers = &ctx->cb; ctx->check_end_rp(); vkEndCommandBuffer(ctx->cb); vkQueueSubmit(dev->queue, 1, &si, ctx->fence); ctx->wait(); #ifdef DEBUG if (dev->hooks) dev->hooks->on_submit(*ctx); #endif ctx->release(); } void Device::present() { Device_Vk* dev = (Device_Vk*)this; Context_Vk* ctx = dev->current_ctx; VkPresentInfoKHR pi{}; VkSubmitInfo si{}; VkPipelineStageFlags stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; ctx->check_end_rp(); ctx->transition( dev->get_backbuffer(), Resource_State::presentable ); si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; si.waitSemaphoreCount = 1; si.pWaitSemaphores = &dev->swapchain.image_avail; si.pWaitDstStageMask = &stage; si.signalSemaphoreCount = 1; si.pSignalSemaphores = &ctx->semaphore; si.commandBufferCount = 1; si.pCommandBuffers = &ctx->cb; vkEndCommandBuffer(ctx->cb); vkQueueSubmit(dev->queue, 1, &si, ctx->fence); pi.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; pi.waitSemaphoreCount = 1; pi.pWaitSemaphores = &ctx->semaphore; pi.swapchainCount = 1; pi.pSwapchains = &dev->swapchain.swapchain; pi.pImageIndices = &dev->backbuffer_index; vkQueuePresentKHR(dev->queue, &pi); #ifdef DEBUG if (hooks) { hooks->on_submit(*ctx); hooks->on_present(*ctx); } #endif ctx->release(); } Texture_Id Device::get_backbuffer() { return ((Device_Vk*)this)->backbuffer_id; } Texture_Id Device::get_depth_target() { return ((Device_Vk*)this)->depth; } Texture& Device::get_texture(Texture_Id id) { assert(id.index); return ((Device_Vk*)this)->textures[id]; } Texture_Id Device_Vk::alloc_texture() { Texture_Vk tex{}; Texture_Id id(texture_count++); tex.id = id; textures.set(id, tex); return id; } Buffer_Id Device_Vk::alloc_buffer() { Buffer_Vk buf{}; Buffer_Id id(buffer_count++); buf.id = id; buffers.set(id, buf); return id; } Vertex_Format_Id Device_Vk::alloc_vf() { Vertex_Format_Vk vf{}; Vertex_Format_Id id(vertex_format_count++); vertex_formats.set(id, vf); return id; } Vertex_Format_Id Device::create_vertex_format( const Vertex_Format_Desc& desc ) { Device_Vk* dev = (Device_Vk*)this; Vertex_Format_Id id = dev->alloc_vf(); dev->vertex_formats[id].init(dev, desc); return id; } void Device::destroy_vertex_format(Vertex_Format_Id id) { Device_Vk* dev = (Device_Vk*)this; Vertex_Format_Vk& vf = dev->vertex_formats[id]; vf.destroy(dev); } Shader_Id Device_Vk::alloc_shader() { Shader_Vk s{}; Shader_Id id(shader_count++); s.id = id; shaders.set(id, s); return id; } Sampler_Id Device_Vk::alloc_sampler() { Sampler_Vk s{}; Sampler_Id id(sampler_count++); samplers.set(id, s); return id; } void Device::destroy_texture(Texture_Id id) { Device_Vk* dev = (Device_Vk*)this; dev->queue_destroy((Texture_Vk*)&dev->get_texture(id)); } void Device::destroy_texturei(Texture_Id id) { Device_Vk* dev = (Device_Vk*)this; ((Texture_Vk*)&dev->get_texture(id))->destroy(dev); } void Context::wait() { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; vkWaitForFences( dev->dev, 1, &ctx->fence, VK_TRUE, UINT64_MAX ); } void Context::submit( const Draw& draw, const Pipeline& p, const Render_Pass& rp ) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Vertex_Buffer_Binding* binding; Rpo_Key rpo_key = { dev->first_rp, rp }; Pso_Key pso_key = { p, rpo_key }; Pipeline_Vk& pso = dev->get_pso(pso_key); Descriptor_Set_Vk* dso = 0; if (p.descriptor_count) dso = &dev->get_dso(pso, *(Dso_Key*)&p); auto [rpo, fbo] = ctx->begin_rp(rp); Texture_Vk& target = *(Texture_Vk*)&dev->get_texture( dev->get_backbuffer() ); target.state = Resource_State::render_target; if (pso.pip != ctx->last_pso) vkCmdBindPipeline( ctx->cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pso.pip ); ctx->last_pso = pso.pip; if (dso && dso->dset != ctx->last_dso) { vkCmdBindDescriptorSets( ctx->cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pso.lay, 0, 1, &dso->dset, 0, 0 ); ctx->last_dso = dso->dset; } for (binding = draw.verts; binding->id; binding++) { VkBuffer buf = ((Buffer_Vk*)&dev->get_buffer(binding->id))->buf; VkDeviceSize offset = (VkDeviceSize)binding->offset; vkCmdBindVertexBuffers(ctx->cb, 0, 1, &buf, &offset); } if (draw.inds.id) { const Index_Buffer_Binding& inds = draw.inds; VkBuffer buf = ((Buffer_Vk*)&dev->get_buffer(inds.id))->buf; VkDeviceSize offset = (VkDeviceSize)inds.offset; vkCmdBindIndexBuffer( ctx->cb, buf, offset, VK_INDEX_TYPE_UINT16 ); vkCmdDrawIndexed( ctx->cb, draw.vertex_count, draw.instance_count, draw.first_vertex, draw.vertex_offset, draw.first_instance ); } else { vkCmdDraw( ctx->cb, draw.vertex_count, draw.instance_count, draw.first_vertex, draw.first_instance ); } ctx->end_rp(rpo, fbo); pso.on_submit(); if (dso) dso->on_submit(); } void Context::submit( const Draw* draws, int count, const Pipeline& p, const Render_Pass& rp ) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; (void)draws; (void)count; (void)p; (void)rp; (void)dev; assert(0); /* todo */ } void Context::copy(Buffer_Id dst, Buffer_Id src) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Buffer_Vk& a = *(Buffer_Vk*)&dev->get_buffer(dst); Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src); VkBufferCopy region{}; region.srcOffset = 0; region.dstOffset = 0; region.size = b.size; ctx->check_end_rp(); vkCmdCopyBuffer( ctx->cb, b.buf, a.buf, 1, ®ion ); } void Context::copy(Texture_Id dst, Buffer_Id src) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Texture_Vk& a = *(Texture_Vk*)&dev->get_texture(dst); Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src); VkBufferImageCopy c{}; c.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; c.imageSubresource.layerCount = 1; c.imageExtent.width = a.w; c.imageExtent.height = a.h; c.imageExtent.depth = 1; ctx->check_end_rp(); vkCmdCopyBufferToImage( ctx->cb, b.buf, a.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &c ); } void Context::transition(Texture_Id id, Resource_State state) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id); VkImageMemoryBarrier b{}; VkImageLayout src_layout = state_to_image_layout(tex.state); VkImageLayout dst_layout = state_to_image_layout(state); VkPipelineStageFlags src_stage, dst_stage; if (tex.state == state) return; tex.state = state; b.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; b.oldLayout = src_layout; b.newLayout = dst_layout; b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; b.image = tex.image; b.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; b.subresourceRange.baseMipLevel = 0; b.subresourceRange.levelCount = 1; b.subresourceRange.baseArrayLayer = 0; b.subresourceRange.layerCount = 1; if ( tex.fmt == texture_format_d16 || tex.fmt == texture_format_d24s8 || tex.fmt == texture_format_d32 ) { if (src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) src_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; if (dst_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) dst_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; b.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; b.oldLayout = src_layout; b.newLayout = dst_layout; } if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ) { b.srcAccessMask = 0; b.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ) { b.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ) { b.srcAccessMask = 0; b.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ) { b.srcAccessMask = 0; b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ) { b.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; b.dstAccessMask = 0; src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ) { b.srcAccessMask = 0; b.dstAccessMask = 0; src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ) { b.srcAccessMask = 0; b.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; src_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; dst_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } else { print_err("Bad resource transition.\n"); pbreak(389); } vkCmdPipelineBarrier( ctx->cb, src_stage, dst_stage, 0, 0, 0, 0, 0, 1, &b ); } void Context::debug_push(const char* name) { #ifdef DEBUG VkDebugUtilsLabelEXT l{}; Context_Vk* ctx = (Context_Vk*)this; l.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; l.pLabelName = name; vkCmdBeginDebugUtilsLabelEXT(ctx->cb, &l); #else (void)name; #endif } void Context::debug_pop() { #ifdef DEBUG Context_Vk* ctx = (Context_Vk*)this; vkCmdEndDebugUtilsLabelEXT(ctx->cb); #endif } std::pair Context_Vk::begin_rp( const Render_Pass& rp ) { Renderpass_Vk& rpo = dev->get_rpo({ dev->first_rp, rp}); Framebuffer_Vk& fbo = dev->get_fbo(rpo, { rp }); VkRenderPassBeginInfo rpbi{}; VkClearValue clears[max_colour_attachments + 1]; int i, c = rp.colour_count, clear_count = 0; bool has_depth = rp.depth.id; if (last_rpo == rpo.rpo && last_fbo == fbo.fbo) return { rpo, fbo }; check_end_rp(); last_rpo = rpo.rpo; last_fbo = fbo.fbo; for (i = 0; i < c; i++) { VkClearValue clear{}; const auto& tar = rp.colours[i]; const auto col = tar.clear.colour; clear.color.float32[0] = (float)col.r / 255.0f; clear.color.float32[1] = (float)col.g / 255.0f; clear.color.float32[2] = (float)col.b / 255.0f; clear.color.float32[3] = (float)col.a / 255.0f; clears[clear_count++] = clear; } if (has_depth) { VkClearValue dc{}; dc.depthStencil.depth = rp.depth.clear.depth; dc.depthStencil.stencil = 0; /* todo */ clears[clear_count++] = dc; if (rp.depth.mode == Clear_Mode::restore) transition(rp.depth.id, Resource_State::render_target); } rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rpbi.renderPass = rpo.rpo; rpbi.framebuffer = fbo.fbo; rpbi.renderArea.extent = dev->swapchain.size; rpbi.clearValueCount = clear_count; rpbi.pClearValues = clears; vkCmdBeginRenderPass( cb, &rpbi, VK_SUBPASS_CONTENTS_INLINE ); dev->first_rp = false; return { rpo, fbo }; } void Context_Vk::end_rp(Renderpass_Vk& rpo, Framebuffer_Vk& fbo) { rpo.on_submit(); fbo.on_submit(); } void Context_Vk::check_end_rp() { if (last_rpo) { vkCmdEndRenderPass(cb); last_rpo = 0; } } void Context::submit(const Render_Pass& rp) { Context_Vk* ctx = (Context_Vk*)this; auto [rpo, fbo] = ctx->begin_rp(rp); ctx->end_rp(rpo, fbo); } void Context_Vk::init_pool() { VkCommandPoolCreateInfo pi{}; VkResult r; pi.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; pi.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; pi.queueFamilyIndex = (uint32_t)dev->queue_index; r = vkCreateCommandPool(dev->dev, &pi, &dev->ac, &pool); if (r != VK_SUCCESS) { print_err("Failed to create a command pool.\n"); pbreak(r); } } void Context_Vk::init_cb() { VkCommandBufferAllocateInfo ci{}; VkResult r; ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; ci.commandPool = pool; ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; ci.commandBufferCount = 1; r = vkAllocateCommandBuffers(dev->dev, &ci, &cb); if (r != VK_SUCCESS) { print_err("Failed to allocate a command buffer.\n"); pbreak(r); } } void Context_Vk::init_sync() { VkFenceCreateInfo fi{}; VkSemaphoreCreateInfo si{}; VkResult r; fi.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fi.flags = VK_FENCE_CREATE_SIGNALED_BIT; r = vkCreateFence(dev->dev, &fi, &dev->ac, &fence); if (r != VK_SUCCESS) { print_err("Failed to create a fence.\n"); pbreak(r); } si.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; r = vkCreateSemaphore(dev->dev, &si, &dev->ac, &semaphore); if (r != VK_SUCCESS) { print_err("Failed to create a semaphore.\n"); pbreak(r); } } void Context_Vk::init(Device_Vk* device) { dev = device; init_pool(); init_cb(); init_sync(); state |= context_state_init; } void Context_Vk::begin_record() { VkCommandBufferBeginInfo bi{}; bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; wait(); vkResetFences(dev->dev, 1, &fence); vkResetCommandBuffer(cb, 0); vkBeginCommandBuffer(cb, &bi); last_pso = 0; last_dso = 0; last_rpo = 0; last_fbo = 0; } Context_Vk& Context_Vk::acquire(Device_Vk* device) { if (~state & context_state_init) init(device); state &= ~context_state_avail; begin_record(); #ifdef DEBUG if (device->hooks) device->hooks->on_acquire(*this); #endif return *this; } void Context_Vk::release() { state |= context_state_avail; } void Context_Vk::destroy() { state &= ~context_state_init; vkDestroyCommandPool(dev->dev, pool, &dev->ac); vkDestroySemaphore(dev->dev, semaphore, &dev->ac); vkDestroyFence(dev->dev, fence, &dev->ac); } Context& Device::acquire() { Device_Vk* vk = (Device_Vk*)this; int i; for (i = 0; i < max_contexts; i++) { if (vk->contexts[i].state & context_state_avail) return vk->contexts[i].acquire(vk); } print_err("Too many active contexts!\n"); print("Probably a submit was missed.\n"); pbreak(10000); return vk->contexts[0]; } Context& Device::get_ctx() { Device_Vk* vk = (Device_Vk*)this; return *vk->current_ctx; } void Pipeline_Vk::init_stages( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { int count = 0, i; Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader); for (i = 0; i < shader_type_count; i++) { if (shader.modules[i]) count++; } VkPipelineShaderStageCreateInfo* sis = (VkPipelineShaderStageCreateInfo*)arena_alloc( &scope, sizeof *sis * count ); zero(sis, sizeof *sis * count); for (i = 0, count = 0; i < shader_type_count; i++) { if (shader.modules[i]) { auto& si = sis[i]; si.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; si.flags = 0; si.stage = Shader_Vk::stage((Shader_Type)i); si.module = shader.modules[i]; si.pName = shader.entrypoints[i]; count++; } } info.stageCount = count; info.pStages = sis; } void Pipeline_Vk::init_vertex_input( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { Vertex_Format_Vk vf = dev->vertex_formats[desc.vertex_format]; VkPipelineVertexInputStateCreateInfo& vi = *(VkPipelineVertexInputStateCreateInfo*)arena_alloc( &scope, sizeof vi ); zero(&vi, sizeof vi); auto& shader = dev->get_shader(desc.shader); if (shader.vf != desc.vertex_format) { auto shader_vf = (Vertex_Format_Vk*)&dev->vertex_formats[shader.vf]; vf.clone(&scope); vf.optimise(shader_vf); } vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; vi.vertexBindingDescriptionCount = vf.binding_count; vi.pVertexBindingDescriptions = vf.bindings; vi.vertexAttributeDescriptionCount = vf.attr_count; vi.pVertexAttributeDescriptions = vf.attrs; info.pVertexInputState = &vi; } void Pipeline_Vk::init_input_assembly( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineInputAssemblyStateCreateInfo& ia = *(VkPipelineInputAssemblyStateCreateInfo*)arena_alloc( &scope, sizeof ia ); (void)dev; (void)desc; (void)info; zero(&ia, sizeof ia); ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; info.pInputAssemblyState = &ia; } void Pipeline_Vk::init_viewport( Arena& scope, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineViewportStateCreateInfo& vi = *(VkPipelineViewportStateCreateInfo*)arena_alloc( &scope, sizeof vi ); VkRect2D& scissor = *(VkRect2D*)arena_alloc( &scope, sizeof scissor ); VkViewport& viewport = *(VkViewport*)arena_alloc( &scope, sizeof viewport ); zero(&vi, sizeof vi); zero(&scissor, sizeof scissor); zero(&viewport, sizeof viewport); scissor.offset.x = desc.scissor[0]; scissor.offset.y = desc.scissor[1]; scissor.extent.width = desc.scissor[2]; scissor.extent.height = desc.scissor[3]; viewport.x = desc.viewport[0]; viewport.y = desc.viewport[1]; viewport.width = desc.viewport[2]; viewport.height = desc.viewport[3]; viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; vi.viewportCount = 1; vi.pViewports = &viewport; vi.scissorCount = 1; vi.pScissors = &scissor; info.pViewportState = &vi; } void Pipeline_Vk::init_rasterisation( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineRasterizationStateCreateInfo& ri = *(VkPipelineRasterizationStateCreateInfo*)arena_alloc( &scope, sizeof ri ); (void)dev; zero(&ri, sizeof ri); ri.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; ri.depthClampEnable = VK_FALSE; ri.rasterizerDiscardEnable = VK_FALSE; ri.polygonMode = VK_POLYGON_MODE_FILL; ri.lineWidth = 1.0f; ri.cullMode = get_vk_cull_mode(desc.cull_mode); ri.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; ri.depthBiasEnable = VK_FALSE; info.pRasterizationState = &ri; } void Pipeline_Vk::init_msaa( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineMultisampleStateCreateInfo& mi = *(VkPipelineMultisampleStateCreateInfo*)arena_alloc( &scope, sizeof mi ); (void)dev; (void)desc; zero(&mi, sizeof mi); mi.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; mi.sampleShadingEnable = VK_FALSE; mi.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; info.pMultisampleState = &mi; } void Pipeline_Vk::init_depthstencil( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineDepthStencilStateCreateInfo& ds = *(VkPipelineDepthStencilStateCreateInfo*)arena_alloc( &scope, sizeof ds ); (void)dev; (void)desc; zero(&ds, sizeof ds); ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; ds.depthTestEnable = desc.depth_test; ds.depthWriteEnable = desc.depth_write; ds.depthCompareOp = get_compare_op(desc.depth_mode); ds.depthBoundsTestEnable = VK_FALSE; ds.stencilTestEnable = VK_FALSE; info.pDepthStencilState = &ds; } void Pipeline_Vk::init_blending( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Render_Pass& rp, const Pipeline& desc ) { VkPipelineColorBlendStateCreateInfo& bi = *(VkPipelineColorBlendStateCreateInfo*)arena_alloc( &scope, sizeof bi ); VkPipelineColorBlendAttachmentState* abs; (void)dev; (void)desc; zero(&bi, sizeof bi); if (rp.colour_count) { int i, c = rp.colour_count; abs = (VkPipelineColorBlendAttachmentState*)arena_alloc( &scope, sizeof abs * c ); zero(abs, sizeof *abs); for (i = 0; i < c; i++) { auto& ab = abs[i]; ab.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; ab.blendEnable = desc.blend_enable; if (desc.blend_enable) { ab.srcColorBlendFactor = get_vk_blend_factor(desc.blend_src); ab.dstColorBlendFactor = get_vk_blend_factor(desc.blend_dst); ab.srcAlphaBlendFactor = get_vk_blend_factor(desc.blend_src_alpha); ab.dstAlphaBlendFactor = get_vk_blend_factor(desc.blend_dst_alpha); ab.colorBlendOp = get_vk_blend_op(desc.blend_mode); ab.alphaBlendOp = get_vk_blend_op(desc.blend_mode_alpha); } } } bi.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; bi.flags = 0; bi.logicOpEnable = VK_FALSE; bi.attachmentCount = rp.colour_count; bi.pAttachments = abs; info.pColorBlendState = &bi; } void Pipeline_Vk::init_descriptors( Device_Vk* dev, const Pipeline& desc ) { const Descriptor* sdescs = desc.descriptors; Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader); VkResult r; int count = desc.descriptor_count; int i; { VkDescriptorSetLayoutBinding* descs = (VkDescriptorSetLayoutBinding*)heap_alloc( dev->heap, count * sizeof *descs ); VkDescriptorSetLayoutCreateInfo di{}; zero(descs, count * sizeof *descs); for (i = 0; i < count; i++) { int j, stage; auto& dst = descs[i]; auto& src = sdescs[i]; switch (src.type) { case Descriptor::Type::texture: dst.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; break; case Descriptor::Type::constant_buffer: dst.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; break; } dst.binding = src.slot; dst.descriptorCount = 1; dst.stageFlags = 0; stage = shader.descriptor_stage(src.slot); for (j = 0; j < shader_type_count; j++) { if (stage & (1 << j)) { dst.stageFlags |= Shader_Vk::stage((Shader_Type)j); } } } di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; di.bindingCount = (uint32_t)count; di.pBindings = descs; r = vkCreateDescriptorSetLayout( dev->dev, &di, &dev->ac, &dlay ); if (r != VK_SUCCESS) { print_err("Failed to create descriptor set layout.\n"); pbreak(r); } heap_free(dev->heap, descs); } } VkCompareOp Pipeline_Vk::get_compare_op(Depth_Mode m) { switch (m) { case Depth_Mode::less: return VK_COMPARE_OP_LESS; case Depth_Mode::less_equal: return VK_COMPARE_OP_LESS_OR_EQUAL; case Depth_Mode::equal: return VK_COMPARE_OP_EQUAL; case Depth_Mode::greater: return VK_COMPARE_OP_GREATER; case Depth_Mode::greater_equal: return VK_COMPARE_OP_GREATER_OR_EQUAL; case Depth_Mode::always: return VK_COMPARE_OP_ALWAYS; case Depth_Mode::never: return VK_COMPARE_OP_NEVER; } assert(0); return VK_COMPARE_OP_LESS; } void Pipeline_Vk::init_layout( Device_Vk* dev, const Pipeline& desc ) { VkResult r; VkPipelineLayoutCreateInfo li{}; (void)desc; int set_count = desc.descriptor_count? 1: 0; if (set_count) init_descriptors(dev, desc); else dlay = VK_NULL_HANDLE; li.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; li.setLayoutCount = set_count; li.pSetLayouts = &dlay; li.pushConstantRangeCount = 0; r = vkCreatePipelineLayout( dev->dev, &li, &dev->ac, &lay ); if (r != VK_SUCCESS) { print_err("Failed to create a pipeline layout.\n"); pbreak(r); } } void Pipeline_Vk::init(Device_Vk* dev, const Pso_Key& key) { char buffer[1024]; Arena scope; VkResult r; const auto& desc = key.pso; VkGraphicsPipelineCreateInfo info{}; init_arena(&scope, buffer, sizeof buffer); init_layout(dev, desc); init_stages(scope, dev, info, desc); init_vertex_input(scope, dev, info, desc); init_input_assembly(scope, dev, info, desc); init_viewport(scope, info, desc); init_rasterisation(scope, dev, info, desc); init_msaa(scope, dev, info, desc); init_depthstencil(scope, dev, info, desc); init_blending(scope, dev, info, key.rpo.rpo, desc); info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.flags = 0; info.renderPass = dev->get_rpo(key.rpo).rpo; info.subpass = 0; info.layout = lay; r = vkCreateGraphicsPipelines( dev->dev, VK_NULL_HANDLE, 1, &info, &dev->ac, &pip ); if (r != VK_SUCCESS) { print_err("Failed to create a pipeline.\n"); pbreak(r); } } void Pipeline_Vk::destroy(Device_Vk* dev) { if (dlay) vkDestroyDescriptorSetLayout(dev->dev, dlay, &dev->ac); vkDestroyPipelineLayout(dev->dev, lay, &dev->ac); vkDestroyPipeline(dev->dev, pip, &dev->ac); } void Descriptor_Set_Vk::init( Device_Vk* dev, const Pipeline_Vk& pip, const Pipeline& desc ) { int count = desc.descriptor_count, i; int sampler_count = 0, cbuffer_count = 0; int size_count = 0; VkDescriptorSetAllocateInfo da{}; VkDescriptorPoolSize sizes[4]; VkResult r; for (i = 0; i < count; i++) { auto& src = desc.descriptors[i]; switch (src.type) { case Descriptor::Type::texture: sampler_count++; break; case Descriptor::Type::constant_buffer: cbuffer_count++; break; } } if (sampler_count) { int idx = size_count++; sizes[idx] = { .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = (uint32_t)sampler_count }; } if (cbuffer_count) { int idx = size_count++; sizes[idx] = { .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .descriptorCount = (uint32_t)cbuffer_count }; } { VkDescriptorPoolCreateInfo di{}; di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, di.poolSizeCount = (uint32_t)size_count; di.pPoolSizes = sizes; di.maxSets = (uint32_t)count; r = vkCreateDescriptorPool(dev->dev, &di, &dev->ac, &dp); if (r != VK_SUCCESS) { print_err("Failed to create a descriptor pool.\n"); pbreak(r); } } da.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; da.descriptorPool = dp; da.descriptorSetCount = 1; da.pSetLayouts = &pip.dlay; r = vkAllocateDescriptorSets( dev->dev, &da, &dset ); if (r != VK_SUCCESS) { print_err("Failed to allocate descriptor set.\n"); pbreak(r); } for (i = 0; i < count; i++) { VkDescriptorImageInfo img{}; VkDescriptorBufferInfo buf{}; VkWriteDescriptorSet wd{}; auto& src = desc.descriptors[i]; wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; wd.dstSet = dset; wd.dstBinding = src.slot; wd.dstArrayElement = 0; wd.descriptorCount = 1; switch (src.type) { case Descriptor::Type::texture: { Texture_Descriptor* td = (Texture_Descriptor*)src.payload; Texture_Vk& t = *(Texture_Vk*)&dev->get_texture(td->texture); Sampler_Vk& s = *(Sampler_Vk*)&dev->samplers[td->sampler]; assert(td->texture); assert(td->sampler); img.imageView = t.view; img.sampler = s.sampler; img.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; wd.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; wd.pImageInfo = &img; } break; case Descriptor::Type::constant_buffer: { Constant_Buffer_Descriptor* cd = (Constant_Buffer_Descriptor*)src.payload; Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(cd->buffer); assert(cd->buffer); buf.buffer = b.buf; buf.offset = cd->offset; buf.range = cd->size? cd->size: b.size; wd.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; wd.pBufferInfo = &buf; } break; } vkUpdateDescriptorSets(dev->dev, 1, &wd, 0, 0); } } void Descriptor_Set_Vk::destroy(Device_Vk* dev) { vkDestroyDescriptorPool(dev->dev, dp, &dev->ac); } VkFormat Vertex_Format_Vk::format_from_svar_type( SVariable_Type type ) { switch (type) { case svariable_type_float: return VK_FORMAT_R32_SFLOAT; case svariable_type_vec2: return VK_FORMAT_R32G32_SFLOAT; case svariable_type_vec3: return VK_FORMAT_R32G32B32_SFLOAT; case svariable_type_vec4: return VK_FORMAT_R32G32B32A32_SFLOAT; default: assert(0); /* todo */ } return (VkFormat)0; } void Vertex_Format_Vk::init( Device_Vk* dev, const Vertex_Format_Desc& desc ) { int i; binding_count = desc.binding_count; attr_count = desc.attribute_count; bindings = (VkVertexInputBindingDescription*)heap_alloc( dev->heap, binding_count * sizeof *bindings ); attrs = (VkVertexInputAttributeDescription*)heap_alloc( dev->heap, attr_count * sizeof *attrs ); zero(bindings, binding_count * sizeof *bindings); zero(attrs, attr_count * sizeof *attrs); for (i = 0; i < binding_count; i++) { auto& dst = bindings[i]; const auto& src = desc.bindings[i]; dst.binding = src.binding; dst.stride = src.stride; dst.inputRate = src.rate == sbinding_rate_instance? VK_VERTEX_INPUT_RATE_INSTANCE: VK_VERTEX_INPUT_RATE_VERTEX; } for (i = 0; i < attr_count; i++) { auto& dst = attrs[i]; auto& src = desc.attributes[i]; dst.binding = src.binding; dst.location = src.index; dst.format = format_from_svar_type(src.type); dst.offset = src.offset; } } void Vertex_Format_Vk::destroy(Device_Vk* dev) { heap_free(dev->heap, attrs); heap_free(dev->heap, bindings); } void Vertex_Format_Vk::clone(Arena* arena) { int bc = binding_count * sizeof *bindings; int ac = attr_count * sizeof *attrs; auto nb = (VkVertexInputBindingDescription*)arena_alloc( arena, bc ); auto na = (VkVertexInputAttributeDescription*)arena_alloc( arena, ac ); memcpy(nb, bindings, bc); memcpy(na, attrs, ac); bindings = nb; attrs = na; } void Vertex_Format_Vk::optimise(const Vertex_Format_Vk* shadervf) { int i; if (shadervf->attr_count >= attr_count) return; for (i = attr_count - 1; i >= 0; i--) { auto& a = attrs[i]; int j, idx = -1; for (j = 0; j < shadervf->attr_count; j++) { auto& b = shadervf->attrs[j]; if (b.binding == a.binding && b.location == a.location) { idx = j; break; } } if (idx == -1) { int last = attr_count - 1; attrs[i] = attrs[last]; attr_count = last; } } } bool Shader_Vk::init_module( Device_Vk* dev, int stage, char* buf, int size ) { VkResult r; VkShaderModule m; VkShaderModuleCreateInfo mi{}; mi.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; mi.codeSize = size; mi.pCode = (uint32_t*)buf; r = vkCreateShaderModule(dev->dev, &mi, &dev->ac, &m); modules[stage] = m; return r == VK_SUCCESS; } int Shader_Vk::Vertex_Format::find_binding(const char* name) { int i; int bucket = (int)(hash_string(name) % binding_count); for (i = 0; i < binding_count; i++) { Binding& binding = bindings[bucket]; if ( !binding.name[0] || !strcmp(binding.name, name) ) return bucket; bucket = (bucket + 1) % binding_count; } return -1; } int Shader_Vk::Vertex_Format::find_attribute(const char* name) { int i; int bucket = (int)(hash_string(name) % attr_count); for (i = 0; i < attr_count; i++) { Attribute& attr = attributes[bucket]; if ( !attr.name[0] || !strcmp(attr.name, name) ) return bucket; bucket = (bucket + 1) % attr_count; } return -1; } bool Shader_Vk::Vertex_Format::init( Device_Vk* dev, Pack_File* f ) { int i, attr_index = 0; int start = pack_tell(f); attr_count = 0; for (i = 0; i < binding_count; i++) { char name[24]; int count, j; SBinding_Rate rate; pack_read(f, name, sizeof name); pack_read(f, &rate, 4); pack_read(f, &count, 4); for (j = 0; j < count; j++) { char aname[28]; SVariable_Type type; pack_read(f, aname, sizeof aname); pack_read(f, &type, 4); attr_count++; } } pack_seek(f, start, seek_rel_start); bindings = (Binding*)heap_alloc( dev->heap, binding_count * sizeof *bindings ); attributes = (Attribute*)heap_alloc( dev->heap, attr_count * sizeof *attributes ); for (i = 0; i < binding_count; i++) bindings[i].name[0] = 0; for (i = 0; i < attr_count; i++) attributes[i].name[0] = 0; for (i = 0; i < binding_count; i++) { Binding* binding; char name[24]; int count, j; SBinding_Rate rate; pack_read(f, name, sizeof name); pack_read(f, &rate, 4); pack_read(f, &count, 4); binding = &bindings[find_binding(name)]; strcpy(binding->name, name); binding->rate = rate; binding->attr_count = count; binding->attributes = (int*)heap_alloc( dev->heap, count * sizeof *binding->attributes ); binding->index = i; for (j = 0; j < count; j++, attr_index++) { int bucket; Attribute* attr; char aname[28]; SVariable_Type type; pack_read(f, aname, sizeof aname); pack_read(f, &type, 4); bucket = find_attribute(aname); binding->attributes[j] = bucket; attr = &attributes[bucket]; strcpy(attr->name, aname); attr->index = j; attr->type = type; } } return true; } void Shader_Vk::Vertex_Format::destroy(Device_Vk* dev) { int i; for (i = 0; i < binding_count; i++) heap_free(dev->heap, bindings[i].attributes); heap_free(dev->heap, bindings); heap_free(dev->heap, attributes); } void Shader_Vk::destroy(Device_Vk* dev) { int i; for (i = 0; i < shader_type_count; i++) if (modules[i]) vkDestroyShaderModule(dev->dev, modules[i], &dev->ac); vfd.destroy(dev); heap_free(dev->heap, descs); dev->destroy_vertex_format(vf); dev->shaders.remove(id); } int Shader::binding_index(const char* name) { int idx; Shader_Vk* sh = (Shader_Vk*)this; idx = sh->vfd.find_binding(name); if (idx < 0 || !sh->vfd.bindings[idx].name[0]) return -1; return sh->vfd.bindings[idx].index; } int Shader::attribute_index(const char* name) { int idx; Shader_Vk* sh = (Shader_Vk*)this; idx = sh->vfd.find_attribute(name); if (idx < 0 || !sh->vfd.attributes[idx].name[0]) return -1; return sh->vfd.attributes[idx].index; } int Shader::descriptor_binding(const char* name) { int idx; Shader_Vk* sh = (Shader_Vk*)this; idx = sh->find_descriptor(name); if (idx < 0 || !sh->descs[idx].name[0]) return -1; return sh->descs[idx].slot; } int Shader::descriptor_stage(int slot) { Shader_Vk* sh = (Shader_Vk*)this; int i; for (i = 0; i < sh->desc_count; i++) { if (sh->descs[i].slot == slot) { return sh->descs[i].stage; } } return 0; } void Buffer_Vk::init( Device_Vk* dev, int flags, VkDeviceSize s ) { VkBufferCreateInfo bi{}; VkMemoryRequirements req; VkResult r; size = s; bi.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bi.size = size; bi.usage = get_usage(flags); bi.sharingMode = VK_SHARING_MODE_EXCLUSIVE; r = vkCreateBuffer(dev->dev, &bi, &dev->ac, &buf); if (r != VK_SUCCESS) { print_err("Failed to create a buffer.\n"); pbreak(r); } vkGetBufferMemoryRequirements(dev->dev, buf, &req); { VkMemoryPropertyFlags props = get_memory_flags(flags); int mt = dev->find_memory_type(req.memoryTypeBits, props); memory = dev->vrama.alloc(mt, req.size, req.alignment); if (!memory.valid()) { print_err("Failed to allocate memory for buffer.\n"); pbreak(900); } } vkBindBufferMemory(dev->dev, buf, memory.mem, memory.offset()); } void Buffer_Vk::destroy(Device_Vk* dev) { vkDestroyBuffer(dev->dev, buf, &dev->ac); dev->vrama.free(memory); dev->buffers.remove(id); } void Buffer_Vk::set_name(Device_Vk* dev, const char* name) { #ifdef DEBUG VkDebugUtilsObjectNameInfoEXT i{}; i.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; i.pObjectName = name; i.objectType = VK_OBJECT_TYPE_BUFFER; i.objectHandle = (uint64_t)buf; vkSetDebugUtilsObjectNameEXT(dev->dev, &i); #else (void)dev; (void)name; #endif } Buffer_Id Device::create_buffer( const char* name, size_t size, int flags ) { Device_Vk* dev = (Device_Vk*)this; Buffer_Id id = dev->alloc_buffer(); Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); buf.init(dev, flags, (VkDeviceSize)size); buf.set_name(dev, name); return id; } void Device::destroy_buffer(Buffer_Id id) { Device_Vk* dev = (Device_Vk*)this; Buffer_Vk* buf = (Buffer_Vk*)&get_buffer(id); dev->queue_destroy(buf); } void Device::destroy_bufferi(Buffer_Id id) { Device_Vk* dev = (Device_Vk*)this; Buffer_Vk* buf = (Buffer_Vk*)&get_buffer(id); buf->destroy(dev); } void* Device::map_buffer( Buffer_Id id, size_t offset, size_t size ) { Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); (void)size; return buf.memory.map(offset); } void Device::unmap_buffer(Buffer_Id id) { (void)id; /* Device_Vk* dev = (Device_Vk*)this; Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); vkUnmapMemory(dev->dev, buf.memory.mem);*/ } Buffer& Device::get_buffer(Buffer_Id id) { Device_Vk* dev = (Device_Vk*)this; assert(id.index); assert(dev->buffers.has(id)); return dev->buffers[id]; } Texture_Id Device::create_texture( const char* name, Texture_Format fmt, int flags, int w, int h, Buffer_Id init ) { VkImageCreateInfo ii{}; VkResult r; Device_Vk* dev = (Device_Vk*)this; Texture_Id id = dev->alloc_texture(); Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id); VkMemoryRequirements req; VkImageAspectFlags aspect = get_image_aspect(flags); tex.state = Resource_State::undefined; ii.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; ii.imageType = VK_IMAGE_TYPE_2D; ii.extent.width = w; ii.extent.height = h; ii.extent.depth = 1; ii.mipLevels = 1; ii.arrayLayers = 1; ii.format = get_vk_format(fmt); ii.tiling = VK_IMAGE_TILING_OPTIMAL; ii.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; ii.usage = get_texture_usage(flags); ii.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ii.samples = VK_SAMPLE_COUNT_1_BIT; r = vkCreateImage(dev->dev, &ii, &dev->ac, &tex.image); if (r != VK_SUCCESS) { print_err("Failed to create an image.\n"); } vkGetImageMemoryRequirements(dev->dev, tex.image, &req); { VkMemoryPropertyFlags props = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; int mt = dev->find_memory_type(req.memoryTypeBits, props); if (mt < 0) { print("Failed to find a satisfying memory type index.\n"); pbreak(mt); } tex.memory = dev->vrama.alloc(mt, req.size, req.alignment); if (!tex.memory.valid()) { print_err("Failed to allocate memory for texture.\n"); pbreak(900); } } vkBindImageMemory( dev->dev, tex.image, tex.memory.mem, tex.memory.offset() ); tex.w = w; tex.h = h; tex.fmt = fmt; tex.alias = false; tex.view = make_view( dev, tex.image, ii.format, aspect ); if (init) { Context& ctx = dev->acquire(); ctx.transition(id, Resource_State::copy_dst); ctx.copy(id, init); ctx.transition(id, Resource_State::shader_read); dev->submit(ctx); } tex.set_name(dev, name); return id; } void Texture_Vk::set_name(Device_Vk* dev, const char* name) { #ifdef DEBUG VkDebugUtilsObjectNameInfoEXT i{}; i.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; i.pObjectName = name; i.objectType = VK_OBJECT_TYPE_IMAGE; i.objectHandle = (uint64_t)image; vkSetDebugUtilsObjectNameEXT(dev->dev, &i); i.objectType = VK_OBJECT_TYPE_IMAGE_VIEW; i.objectHandle = (uint64_t)view; vkSetDebugUtilsObjectNameEXT(dev->dev, &i); #else (void)dev; (void)name; #endif } Shader& Device::get_shader(Shader_Id id) { Device_Vk* dev = (Device_Vk*)this; assert(id.index); assert(dev->shaders.has(id)); return dev->shaders[id]; } Sampler_Id Device::create_sampler( const char* name, const Sampler_State& state ) { Device_Vk* dev = (Device_Vk*)this; Sampler_Id id = dev->alloc_sampler(); Sampler_Vk& s = dev->samplers[id]; s.init(dev, state); s.set_name(dev, name); return id; } void Device::destroy_sampler(Sampler_Id id) { Device_Vk* dev = (Device_Vk*)this; Sampler_Vk& s = dev->samplers[id]; dev->queue_destroy(&s); } void Device::destroy_sampleri(Sampler_Id id) { Device_Vk* dev = (Device_Vk*)this; Sampler_Vk& s = dev->samplers[id]; s.destroy(dev); } void Shader_Loader::init(Device_Vk* d) { dev = d; } Asset* Shader_Loader::load( Arena* a, Arena* s, const char* filename, Pack_File* f ) { Shader_Vk* shader; Shader_Id id; (void)s; (void)a; (void)filename; id = dev->alloc_shader(); shader = (Shader_Vk*)&dev->get_shader(id); if (!shader->init(dev, f)) { dev->shaders.remove(id); return 0; } return shader; } void Shader_Loader::unload(Asset* a) { Shader_Vk* sh = (Shader_Vk*)a; dev->queue_destroy(sh); } int Shader_Vk::find_descriptor(const char* name) { int i; int bucket = (int)(hash_string(name) % desc_count); for (i = 0; i < desc_count; i++) { Desc& desc = descs[bucket]; if ( !desc.name[0] || !strcmp(desc.name, name) ) return bucket; bucket = (bucket + 1) % desc_count; } return -1; } bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { char magic[4]; int binding_count, target_count, i; pack_read(f, magic, 4); if ( magic[0] != 'C' || magic[1] != 'S' || magic[2] != 'H' || magic[3] != '2' ) return false; pack_read(f, &type, 4); pack_read(f, &binding_count, 4); pack_read(f, &target_count, 4); pack_read(f, &desc_count, 4); assert(binding_count); vfd.binding_count = binding_count; if (!vfd.init(dev, f)) return false; { Vertex_Format_Desc desc{}; desc.binding_count = vfd.binding_count; desc.attribute_count = vfd.attr_count; desc.bindings = (Vertex_Format_Desc::Binding*)heap_alloc( dev->heap, sizeof *desc.bindings ); desc.attributes = (Vertex_Format_Desc::Attribute*)heap_alloc( dev->heap, sizeof *desc.attributes * desc.attribute_count ); for (i = 0; i < vfd.binding_count; i++) { int j, stride = 0; auto& src = vfd.bindings[i]; auto& dst = desc.bindings[src.index]; for (j = 0; j < src.attr_count; j++) { auto& src_attr = vfd.attributes[src.attributes[j]]; auto& dst_attr = desc.attributes[src.attributes[j]]; dst_attr.binding = src.index; dst_attr.index = j; dst_attr.type = src_attr.type; dst_attr.offset = stride; stride += svariable_type_size(src_attr.type); } dst.binding = src.index; dst.stride = stride; dst.rate = src.rate; } vf = dev->create_vertex_format(desc); heap_free(dev->heap, desc.attributes); heap_free(dev->heap, desc.bindings); } pack_seek( f, 32 * target_count, seek_rel_cur ); descs = (Desc*)heap_alloc( dev->heap, desc_count * sizeof *descs ); pack_read(f, descs, desc_count * sizeof *descs); for (i = 0; i < shader_type_count; i++) { int o, s; pack_read(f, &o, 4); pack_read(f, &s, 4); if (o) { bool r; int before = pack_tell(f); char* buf = (char*)heap_alloc(dev->heap, s); pack_seek(f, o, seek_rel_start); pack_read(f, buf, s); r = init_module(dev, i, buf, s); heap_free(dev->heap, buf); pack_seek(f, before, seek_rel_start); if (!r) return false; } else { modules[i] = VK_NULL_HANDLE; } pack_read(f, entrypoints[i], 24); } return true; } void Texture_Loader::init(Device_Vk* d) { dev = d; } size_t Texture_Loader::calc_size( Texture_Format fmt, int w, int h ) { switch (fmt) { case texture_format_bc1: case texture_format_bc4: return (w / 4) * (h / 4) * 8; case texture_format_bc5: return (w / 4) * (h / 4) * 16; case texture_format_r8i: return w * h; default: print_err("Can't load this texture format.\n"); pbreak(45498); return 0; } } Asset* Texture_Loader::load( Arena* a, Arena* s, const char* filename, Pack_File* f ) { char magic[4]; int w, h; size_t size; Texture_Format fmt; (void)a; (void)s; pack_read(f, magic, 4); pack_read(f, &w, 4); pack_read(f, &h, 4); pack_read(f, &fmt, 4); size = calc_size(fmt, w, h); { Buffer_Id buf = dev->create_buffer( "texture stage", size, Buffer_Flags::copy_src | Buffer_Flags::cpu_readwrite ); void* mem = dev->map_buffer(buf, 0, size); pack_read(f, mem, size); dev->unmap_buffer(buf); Texture_Id tex = dev->create_texture( filename, fmt, Texture_Flags::sampleable | Texture_Flags::copy_dst, w, h, buf ); dev->destroy_buffer(buf); return &dev->get_texture(tex); } } void Texture_Loader::unload(Asset* a) { Texture_Vk* tex = (Texture_Vk*)a; dev->destroy_texture(tex->id); } void Texture_Vk::destroy(Device_Vk* dev) { if (!alias) { vkDestroyImage(dev->dev, image, &dev->ac); dev->vrama.free(memory); } vkDestroyImageView(dev->dev, view, &dev->ac); dev->textures.remove(id); } VkFilter Sampler_Vk::get_filter(Filter_Mode mode) { switch (mode) { case Filter_Mode::point: return VK_FILTER_NEAREST; case Filter_Mode::linear: return VK_FILTER_LINEAR; } assert(0); return (VkFilter)0; } VkSamplerMipmapMode Sampler_Vk::get_mipmap_mode( Filter_Mode mode ) { switch (mode) { case Filter_Mode::point: return VK_SAMPLER_MIPMAP_MODE_NEAREST; case Filter_Mode::linear: return VK_SAMPLER_MIPMAP_MODE_LINEAR; } assert(0); return (VkSamplerMipmapMode)0; } VkSamplerAddressMode Sampler_Vk::get_mode( Address_Mode mode ) { switch (mode) { case Address_Mode::repeat: return VK_SAMPLER_ADDRESS_MODE_REPEAT; case Address_Mode::mirror: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; case Address_Mode::clamp: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Address_Mode::border: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; } assert(0); return (VkSamplerAddressMode)0; } void Sampler_Vk::init(Device_Vk* dev, const Sampler_State& s) { VkSamplerCreateInfo si{}; VkSamplerCustomBorderColorCreateInfoEXT bi{}; VkClearColorValue col{}; VkResult r; col.float32[0] = s.border[0]; col.float32[1] = s.border[1]; col.float32[2] = s.border[2]; col.float32[3] = s.border[3]; si.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; si.magFilter = get_filter(s.mag); si.minFilter = get_filter(s.min); si.mipmapMode = get_mipmap_mode(s.mip); si.addressModeU = get_mode(s.address_u); si.addressModeV = get_mode(s.address_v); si.addressModeW = get_mode(s.address_w); si.borderColor = VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; si.pNext = &bi; bi.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; bi.customBorderColor = col; bi.format = VK_FORMAT_R32G32B32A32_SFLOAT; r = vkCreateSampler(dev->dev, &si, &dev->ac, &sampler); if (r != VK_SUCCESS) { print_err("Failed to create a sampler.\n"); pbreak(r); } } void Sampler_Vk::destroy(Device_Vk* dev) { vkDestroySampler(dev->dev, sampler, &dev->ac); dev->samplers.remove(id); } void Sampler_Vk::set_name(Device_Vk* dev, const char* name) { #ifdef DEBUG VkDebugUtilsObjectNameInfoEXT i{}; i.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; i.pObjectName = name; i.objectType = VK_OBJECT_TYPE_SAMPLER; i.objectHandle = (uint64_t)sampler; vkSetDebugUtilsObjectNameEXT(dev->dev, &i); #else (void)dev; (void)name; #endif } void Vram_Allocator::Page::init( Device_Vk* dev, VkDeviceSize s, int t ) { VkMemoryAllocateInfo ai{}; Chunk* chunk; VkResult r; const auto& props = dev->mem_props.memoryTypes[t]; ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; ai.allocationSize = s; ai.memoryTypeIndex = t; size = s; type = t; next = 0; r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &memory); if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) { print_err("Out of VRAM.\n"); pbreak(r); } if (r != VK_SUCCESS) { print_err("vkAllocateMemory failed.\n"); pbreak(r); } if ( props.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT || props.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ) { vkMapMemory( dev->dev, memory, 0, size, 0, &mapping ); } else mapping = 0; chunk = (Chunk*)heap_alloc(dev->heap, sizeof *chunk); chunk->offset = 0; chunk->pad = 0; chunk->size = s; chunk->next = 0; chunk->free = true; chunks = chunk; #ifdef DEBUG if (dev->hooks) dev->hooks->on_page_alloc(s); #endif } Vram_Allocator::Allocation Vram_Allocator::Page::imp_alloc( Device_Vk* dev, VkDeviceSize asize ) { Chunk* chunk; for (chunk = chunks; chunk; chunk = chunk->next) { if (chunk->free) { if (chunk->size == asize) { chunk->free = false; return { memory, 0, chunk }; } else if (chunk->size > asize) { Chunk* nc = (Chunk*)heap_alloc(dev->heap, sizeof *nc); nc->offset = chunk->offset + asize; nc->pad = 0; nc->size = chunk->size - asize; nc->next = chunk->next; nc->free = true; chunk->next = nc; chunk->size = asize; chunk->pad = 0; chunk->free = false; return { memory, 0, chunk }; } } } return { 0, 0, 0 }; } void Vram_Allocator::Page::defrag(Device_Vk* dev) { Chunk* chunk; for (chunk = chunks; chunk;) { if (chunk->free) { Chunk* end = chunk->next; VkDeviceSize csize = chunk->size; for (; end && end->free;) { Chunk* next = end->next; csize += end->size; heap_free(dev->heap, end); end = next; } chunk->next = end; chunk->size = csize; if (end) { chunk = end->next; } else chunk = 0; } else chunk = chunk->next; } } Vram_Allocator::Allocation Vram_Allocator::Page::alloc( Device_Vk* dev, VkDeviceSize asize, VkDeviceSize align ) { VkDeviceSize as = asize + align; VkDeviceSize al; Allocation a = imp_alloc(dev, as); if (!a.chunk) { defrag(dev); a = imp_alloc(dev, as); } if (!a.chunk) return a; al = align_address((uintptr_t)a.chunk->offset, (size_t)align); a.chunk->pad = al - a.chunk->offset; #if DEBUG if (dev->hooks) dev->hooks->on_vram_alloc(as, align); #endif return a; } void Vram_Allocator::init(Device_Vk* d) { pages = 0; dev = d; } void Vram_Allocator::destroy() { Page* page = pages; for (; page; page = page->next) { Chunk* chunk = page->chunks; if (page->mapping) vkUnmapMemory(dev->dev, page->memory); vkFreeMemory(dev->dev, page->memory, &dev->ac); for (; chunk; chunk = chunk->next) heap_free(dev->heap, chunk); heap_free(dev->heap, page); } } Vram_Allocator::Allocation Vram_Allocator::alloc( int type, VkDeviceSize size, VkDeviceSize align ) { Page* page = pages; for (; page; page = page->next) { if (page->type == type) { auto a = page->alloc(dev, size, align); if (a.chunk) { a.page = page; return a; } } } page = (Page*)heap_alloc(dev->heap, sizeof *page); page->init( dev, (VkDeviceSize)align_address( (uintptr_t)size + 1, (size_t)size_alignment ), type ); page->next = pages; pages = page; auto a = page->alloc(dev, size, align); if (a.chunk) a.page = page; return a; } void Vram_Allocator::free(Allocation& alloc) { alloc.chunk->free = true; } void Staged_Buffer::init( Device* dev, const char* name, int s, int flags ) { size = s; stage = dev->create_buffer( name, size, Buffer_Flags::copy_src | Buffer_Flags::cpu_readwrite ); gpuonly = dev->create_buffer( name, size, Buffer_Flags::copy_dst | flags ); } void Staged_Buffer::destroy(Device* dev) { dev->destroy_buffer(stage); dev->destroy_buffer(gpuonly); } void* Staged_Buffer::map(Device* dev) { return dev->map_buffer(stage, 0, size); } void Staged_Buffer::unmap(Device* dev) { dev->unmap_buffer(stage); } void Staged_Buffer::update(Context& ctx) { ctx.copy(gpuonly, stage); } void Device_Debug_Hooks::on_rpo_create(const Render_Pass& rpo) { (void)rpo; } void Device_Debug_Hooks::on_rpo_destroy(const Render_Pass& rpo) { (void)rpo; } void Device_Debug_Hooks::on_fbo_create(const Render_Pass& pass) { (void)pass; } void Device_Debug_Hooks::on_fbo_destroy(const Render_Pass& pass) { (void)pass; } void Device_Debug_Hooks::on_pso_create(const Pipeline& pso) { (void)pso; } void Device_Debug_Hooks::on_pso_destroy(const Pipeline& pso) { (void)pso; } void Device_Debug_Hooks::on_dso_create(const Pipeline& pso) { (void)pso; } void Device_Debug_Hooks::on_dso_destroy(const Pipeline& pso) { (void)pso; } void Device_Debug_Hooks::on_acquire(Context& ctx) { (void)ctx; } void Device_Debug_Hooks::on_submit(Context& ctx) { (void)ctx; } void Device_Debug_Hooks::on_present(Context& ctx) { (void)ctx; } void Device_Debug_Hooks::on_page_alloc(size_t size) { (void)size; } void Device_Debug_Hooks::on_vram_alloc(size_t size, size_t align) { (void)size; (void)align; }