#include "app.hpp" #include "video.hpp" #define device_heap_size (1024 * 1024 * 8) #define max_textures 1024 #define max_buffers 1024 #define max_vertex_formats 64 #define max_rpos 64 #define max_pipelines 64 #define max_descriptor_sets 64 #define max_shaders 32 #define max_samplers 16 extern "C" { #include "memory.h" #include "pack.h" #include "plat.h" #include "sc/sh_enums.h" #include "str.h" } #include #include #include #include #define VK_USE_PLATFORM_XLIB_KHR #define GLAD_VULKAN_IMPLEMENTATION #include "glad_vk.h" const char* device_exts[] = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME }; extern "C" { VkSurfaceKHR app_create_vk_surface(App* app, VkInstance inst); void app_destroy_vk_surface( App* app, VkInstance inst, VkSurfaceKHR surf ); } struct Device_Vk; template struct Hash_Function {}; template struct Hash_Map { enum { flags_tombstone = 1 << 0, flags_null = 1 << 1 }; Key keys[size]; Value values[size]; uint8_t flags[size]; void init() { int i; for (i = 0; i < size; i++) flags[i] = flags_null; } int find(const Key& to_find) { int tombstone = -1, i; int bucket = (int)(Hash_Function{}(to_find) % (size_t)size); for (i = 0; i < size; i++) { Key& k = keys[bucket]; uint8_t flag = flags[bucket]; if (flag & flags_null) { if (flag & flags_tombstone) { if (tombstone < 0) tombstone = bucket; } else return tombstone >= 0? tombstone: bucket; } else if (k == to_find) return bucket; bucket = (bucket + 1) % size; } if (tombstone >= 0) return tombstone; return -1; } Value& set(const Key& k, const Value& v) { int bucket = find(k); assert(bucket >= 0); /* full */ flags[bucket] = 0; keys[bucket] = k; values[bucket] = v; return values[bucket]; } Value* get(const Key& k) { int bucket = find(k); if (bucket < 0 || flags[bucket] & flags_null) return 0; return &values[bucket]; } Value& operator[](const Key& k) { int bucket = find(k); assert(bucket >= 0); return values[bucket]; } void remove(const Key& k) { int bucket = find(k); assert(bucket >= 0); flags[bucket] = flags_null | flags_tombstone; } int has(const Key& k) { int bucket = find(k); return bucket >= 0 && ~flags[bucket] & flags_null; } template struct iterator { Table* table; int bucket; void init_begin(Table* t) { bucket = 0; table = t; while ( bucket < size && table->flags[bucket] & flags_null ) bucket++; } void init_end(Table* t) { bucket = size; table = t; } bool equals(const iterator& other) { return bucket == other.bucket && table == other.table; } bool operator==(const iterator
& other) { return equals(other); } bool operator!=(const iterator
& other) { return !equals(other); } iterator
operator++() { bucket++; while ( bucket < size && table->flags[bucket] & flags_null ) bucket++; return *this; } std::pair operator*() { return { table->keys[bucket], table->values[bucket] }; } std::pair operator*() const { return { table->keys[bucket], table->values[bucket] }; } }; iterator> begin() { iterator> r; r.init_begin(this); return r; } iterator> end() { iterator> r; r.init_end(this); return r; } iterator> begin() const { iterator> r; r.init_begin(this); return r; } iterator> end() const { iterator> r; r.init_end(this); return r; } }; static VkFormat get_vk_format(Texture_Format fmt) { switch (fmt) { case texture_format_r8i: return VK_FORMAT_R8_UNORM; case texture_format_r16f: return VK_FORMAT_R16_SFLOAT; case texture_format_r32f: return VK_FORMAT_R32_SFLOAT; case texture_format_rg8i: return VK_FORMAT_R8G8_UNORM; case texture_format_rg16f: return VK_FORMAT_R16G16_SFLOAT; case texture_format_rg32f: return VK_FORMAT_R32G32_SFLOAT; case texture_format_rgb8i: return VK_FORMAT_R8G8B8_UNORM; case texture_format_rgb16f: return VK_FORMAT_R16G16B16_SFLOAT; case texture_format_rgb32f: return VK_FORMAT_R32G32B32_SFLOAT; case texture_format_rgba8i: return VK_FORMAT_R8G8B8A8_UNORM; case texture_format_rgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT; case texture_format_rgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT; case texture_format_bc1: return VK_FORMAT_BC1_RGB_UNORM_BLOCK; default: assert(0); return VK_FORMAT_UNDEFINED; } } VkImageLayout state_to_image_layout(Resource_State s) { switch (s) { case undefined: return VK_IMAGE_LAYOUT_UNDEFINED; case copy_dst: return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; case copy_src: return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; case shader_read: return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; case render_target: return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case presentable: return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; } assert(0); return VK_IMAGE_LAYOUT_UNDEFINED; } static void* vk_alloc( void* uptr, size_t size, size_t alignment, VkSystemAllocationScope scope ) { Device* d = (Device*)uptr; void* r; (void)scope; if (!size) return 0; r = heap_alloc_aligned( d->heap, size, alignment ); if (!r) { print_err("Out of memory."); pbreak(4096); } return r; } static void vk_free( void* uptr, void* ptr ) { Device* d = (Device*)uptr; heap_free(d->heap, ptr); } static void* vk_realloc( void* uptr, void* old, size_t size, size_t alignment, VkSystemAllocationScope scope ) { int os; void* na; (void)scope; if (!old) return vk_alloc(uptr, size, alignment, scope); if (!size) { vk_free(uptr, old); return 0; } os = heap_block_size(old); na = vk_alloc(uptr, size, alignment, scope); memcpy(na, old, std::min(os, (int)size)); vk_free(uptr, old); return na; } typedef struct { VkSurfaceCapabilitiesKHR cap; unsigned fmt_count, pm_count; VkSurfaceFormatKHR* fmts; VkPresentModeKHR* pms; } Swap_Cap; static void get_swap_cap( Device* d, VkPhysicalDevice dev, VkSurfaceKHR surf, Swap_Cap* cap ) { cap->fmts = 0; cap->pms = 0; vkGetPhysicalDeviceSurfaceCapabilitiesKHR( dev, surf, &cap->cap ); vkGetPhysicalDeviceSurfaceFormatsKHR( dev, surf, &cap->fmt_count, 0 ); if (cap->fmt_count) { cap->fmts = (VkSurfaceFormatKHR*)heap_alloc( d->heap, sizeof *cap->fmts * cap->fmt_count ); vkGetPhysicalDeviceSurfaceFormatsKHR( dev, surf, &cap->fmt_count, cap->fmts ); } vkGetPhysicalDeviceSurfacePresentModesKHR( dev, surf, &cap->pm_count, 0 ); if (cap->pm_count) { cap->pms = (VkPresentModeKHR*)heap_alloc( d->heap, sizeof *cap->pms * cap->pm_count ); vkGetPhysicalDeviceSurfacePresentModesKHR( dev, surf, &cap->pm_count, cap->pms ); } } static void deinit_swap_cap( Device* d, Swap_Cap* cap ) { if (cap->fmts) heap_free(d->heap, cap->fmts); if (cap->pms) heap_free(d->heap, cap->pms); } struct Late_Terminated { Late_Terminated* next; virtual void destroy(Device_Vk* dev) = 0; }; struct Swapchain { VkSwapchainKHR swapchain; Texture_Id* textures; VkSurfaceFormatKHR format; VkExtent2D size; VkPresentModeKHR mode; int image_count; void init(const App& app, Device_Vk* dev); void initr(const App& app, Device_Vk* dev); void recreate(const App& app, Device_Vk* dev); void get_images(Device_Vk* dev); void destroy(Device_Vk* dev); Texture_Id create_image( Device_Vk* dev, VkImage image, VkImageView view, int w, int h ); }; #define max_contexts 16 enum { context_state_avail = 1 << 0, context_state_init = 1 << 1 }; struct Shader_Vk : public Shader, public Late_Terminated { struct Attribute { char name[28]; SVariable_Type type; int index; }; struct Binding { char name[24]; SBinding_Rate rate; int attr_count; int index; int* attributes; }; struct Vertex_Format { Binding* bindings; Attribute* attributes; int attr_count; int binding_count; bool init(Device_Vk* dev, Pack_File* f); void destroy(Device_Vk* dev); int find_binding(const char* name); int find_attribute(const char* name); }; struct Desc { char name[24]; int slot; int stage; }; SProgram_Type type; VkShaderModule modules[shader_type_count]; char entrypoints[shader_type_count][24]; Vertex_Format vfd; Desc* descs; int desc_count; bool init(Device_Vk* dev, Pack_File* f); bool init_module( Device_Vk* dev, int stage, char* buf, int size ); bool init_vertex_format( Device_Vk* dev, FILE* f ); void destroy(Device_Vk* dev) override; int find_descriptor(const char* name); static VkShaderStageFlagBits stage(Shader_Type type) { switch (type) { case shader_type_vertex: return VK_SHADER_STAGE_VERTEX_BIT; case shader_type_fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; default: assert(0); return (VkShaderStageFlagBits)0; } } static int svariable_type_size(SVariable_Type type); }; struct Renderpass_Vk; struct Context_Vk : public Context { int state; Device_Vk* dev; VkCommandBuffer cb; VkCommandPool pool; VkFence fence; VkSemaphore semaphore; void init_pool(); void init_cb(); void init_sync(); void init(Device_Vk* device); void begin_record(); Context_Vk& acquire(Device_Vk* device); void release(); void destroy(); Renderpass_Vk& begin_rp(const Render_Pass& rp); void end_rp(Renderpass_Vk& rpo); }; struct Texture_Vk : public Texture, public Late_Terminated { VkImage image; VkImageView view; VkDeviceMemory memory; Resource_State state; void destroy(Device_Vk*) override; }; struct Buffer_Vk : public Buffer, public Late_Terminated { VkBuffer buf; VkDeviceMemory memory; VkDeviceSize size; int flags; void init(Device_Vk* dev, int flags, VkDeviceSize size); void destroy(Device_Vk* dev) override; static VkBufferUsageFlags get_usage(int flags) { VkBufferUsageFlags r = 0; if (flags & Buffer_Flags::index_buffer) r |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; if (flags & Buffer_Flags::vertex_buffer) r |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; if (flags & Buffer_Flags::constant_buffer) r |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; if (flags & Buffer_Flags::storage_buffer) r |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; if (flags & Buffer_Flags::copy_src) r |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; if (flags & Buffer_Flags::copy_dst) r |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; return r; } static VkMemoryPropertyFlags get_memory_flags(int flags) { VkMemoryPropertyFlags r = 0; r |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; if (flags & Buffer_Flags::cpu_read) r |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; if (flags & Buffer_Flags::cpu_readwrite) r |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; return r; } }; struct Rpo_Key { bool is_first; Render_Pass rpo; bool operator==(const Rpo_Key& other) const { return is_first == other.is_first && rpo == other.rpo; } }; struct Pso_Key { Pipeline pso; Rpo_Key rpo; bool operator==(const Pso_Key& other) const { return rpo == other.rpo && pso.pipeline_eq(other.pso) && pso.desc_layout_eq(other.pso); } }; struct Dso_Key { Pipeline pip; bool operator==(const Dso_Key& other) const { return pip.desc_resources_eq(other.pip); } }; struct Renderpass_Vk { VkRenderPass rpo; VkFramebuffer fbo; VkClearValue clear; int age; void on_submit() { age = 0; } VkAttachmentLoadOp load_op_from_mode(Clear_Mode m); void init_rp(Device_Vk* dev, const Rpo_Key& rp); void init_fb(Device_Vk* dev, const Render_Pass& rp); void destroy(Device_Vk* dev); }; struct Pipeline_Vk { VkPipeline pip; VkPipelineLayout lay; VkDescriptorSetLayout dlay; int age; void init(Device_Vk* dev, const Pso_Key& desc); void destroy(Device_Vk* dev); void init_stages( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_vertex_input( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_input_assembly( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_viewport( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_rasterisation( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_msaa( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_depthstencil( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_blending( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ); void init_layout( Device_Vk* dev, const Pipeline& desc ); void init_descriptors( Device_Vk* dev, const Pipeline& desc ); void on_submit() { age = 0; } }; struct Descriptor_Set_Vk { VkDescriptorPool dp; VkDescriptorSet dset; int age; void init( Device_Vk* dev, const Pipeline_Vk& pip, const Pipeline& desc ); void destroy(Device_Vk* dev); void on_submit() { age = 0; } }; struct Vertex_Format_Vk { VkVertexInputBindingDescription* bindings; int binding_count; VkVertexInputAttributeDescription* attrs; int attr_count; void init(Device_Vk* dev, const Vertex_Format_Desc& desc); void destroy(Device_Vk* dev); static VkFormat format_from_svar_type(SVariable_Type type); }; struct Sampler_Vk : public Late_Terminated { VkSampler sampler; void init(Device_Vk* dev, const Sampler_State& s); void destroy(Device_Vk* dev) override; static VkFilter get_filter(Filter_Mode mode); static VkSamplerMipmapMode get_mipmap_mode(Filter_Mode mode); static VkSamplerAddressMode get_mode(Address_Mode mode); }; template<> struct Hash_Function { size_t operator()(const Rpo_Key& k) const { return (size_t)fnv1a64_2(fnv1a64( (uint8_t*)&k.rpo, sizeof k.rpo ), (uint8_t*)&k.is_first, 1); } }; template<> struct Hash_Function { size_t operator()(const Pso_Key& k) const { uint64_t rpoh = Hash_Function{}(k.rpo); return fnv1a64_2( k.pso.pipeline_hash, (uint8_t*)&rpoh, sizeof rpoh ); } }; template<> struct Hash_Function { size_t operator()(const Dso_Key& k) const { return k.pip.descriptor_resource_hash; } }; template<> struct Hash_Function { size_t operator()(Texture_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Buffer_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Shader_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Vertex_Format_Id id) const { return id.index; } }; template<> struct Hash_Function { size_t operator()(Sampler_Id id) const { return id.index; } }; template<> struct std::hash { size_t operator()(const Render_Pass& rp) const { return (size_t)fnv1a64((uint8_t*)&rp, sizeof rp); } }; struct Shader_Loader : public Asset_Loader { Device_Vk* dev; void init(Device_Vk* d); Asset* load(Arena* a, Arena* s, Pack_File* f) override; void unload(Asset* a) override; }; struct Texture_Loader : public Asset_Loader { Device_Vk* dev; static size_t calc_size(Texture_Format fmt, int w, int h); void init(Device_Vk* d); Asset* load(Arena* a, Arena* s, Pack_File* f) override; void unload(Asset* a) override; Buffer_Id upload(void* buf, size_t size); }; struct Terminator { Late_Terminated* queue; void execute(Device_Vk* dev) { Late_Terminated* obj = queue; for (; obj; obj = obj->next) obj->destroy(dev); queue = 0; } void add(Late_Terminated* obj) { if (queue) { obj->next = queue; queue = obj; } else { obj->next = 0; queue = obj; } } }; struct Device_Vk : public Device { VkAllocationCallbacks ac; VkInstance inst; VkDevice dev; VkPhysicalDevice phys_dev; VkSurfaceKHR surf; uint32_t backbuffer_index; Texture_Id backbuffer_id; Swap_Cap swap_cap; VkPhysicalDeviceMemoryProperties mem_props; int queue_index; VkQueue queue; Swapchain swapchain; Context_Vk contexts[max_contexts]; Context_Vk* current_ctx; Shader_Loader shader_loader; Texture_Loader texture_loader; #ifdef DEBUG VkDebugUtilsMessengerEXT msg; #endif Hash_Map textures; Hash_Map buffers; Hash_Map< Vertex_Format_Id, Vertex_Format_Vk, max_vertex_formats > vertex_formats; Hash_Map shaders; Hash_Map samplers; uint32_t texture_count; uint32_t buffer_count; uint32_t vertex_format_count; uint32_t shader_count; uint32_t sampler_count; Hash_Map rpo_cache; Hash_Map pso_cache; Hash_Map dso_cache; Terminator* terminators; uint32_t terminator_index; bool first_rp; Texture_Id alloc_texture(); Buffer_Id alloc_buffer(); Vertex_Format_Id alloc_vf(); Shader_Id alloc_shader(); Sampler_Id alloc_sampler(); void init_internal(); void deinit_internal(); void init_ac(); void create_inst(const char** exts, int count); void create_dev(Swap_Cap* swap_cap); void find_exts(const char** exts, int& count); bool has_validation(); void init_validation(); void create_surf(); void on_resize_internal(int w, int h); Renderpass_Vk& create_rpo(const Rpo_Key& rp); Renderpass_Vk& get_rpo(const Rpo_Key& rp); Pipeline_Vk& create_pso(const Pso_Key& pip); Pipeline_Vk& get_pso(const Pso_Key& pop); Descriptor_Set_Vk& create_dso( const Pipeline_Vk& pip, const Dso_Key& k ); Descriptor_Set_Vk& get_dso( const Pipeline_Vk& pip, const Dso_Key& k ); void collect_garbage(); void queue_destroy(Late_Terminated* obj); void create_terminators(); int find_memory_type( uint32_t filter, VkMemoryPropertyFlags flags ); }; #ifdef DEBUG static VkBool32 debug_callback( VkDebugUtilsMessageSeverityFlagBitsEXT sev, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, void* uptr ) { (void)sev; (void)uptr; if (sev <= VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) return 0; switch (sev) { case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: print("%s\n", data->pMessage); break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: print_war("%s\n", data->pMessage); break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: print_err("%s\n", data->pMessage); break; default: break; } pbreak((int)type); return 0; } static VkResult create_dmesg( Device_Vk* d, const VkDebugUtilsMessengerCreateInfoEXT* information, const VkAllocationCallbacks* allocator, VkDebugUtilsMessengerEXT* messenger ) { PFN_vkCreateDebugUtilsMessengerEXT f; f = (PFN_vkCreateDebugUtilsMessengerEXT) vkGetInstanceProcAddr( d->inst, "vkCreateDebugUtilsMessengerEXT" ); return f? f(d->inst, information, allocator, messenger): VK_ERROR_EXTENSION_NOT_PRESENT; } static void destroy_dmesg( VkInstance instance, VkDebugUtilsMessengerEXT messenger, const VkAllocationCallbacks* allocator ) { PFN_vkDestroyDebugUtilsMessengerEXT f; f = (PFN_vkDestroyDebugUtilsMessengerEXT) vkGetInstanceProcAddr( instance, "vkDestroyDebugUtilsMessengerEXT" ); if (f) f(instance, messenger, allocator); } void Device_Vk::init_validation() { VkDebugUtilsMessengerCreateInfoEXT mi{}; VkResult r; mi.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; mi.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; mi.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; mi.pfnUserCallback = debug_callback; r = create_dmesg( this, &mi, &ac, &msg ); if (r != VK_SUCCESS) { print_err("Failed to create debug messenger.\n"); pbreak(r); } } #endif bool Device_Vk::has_validation() { unsigned count, i; int f; VkLayerProperties* props; VkResult r; r = vkEnumerateInstanceLayerProperties(&count, 0); if (!count || r != VK_SUCCESS) return 0; props = (VkLayerProperties*)heap_alloc(heap, count * sizeof *props); vkEnumerateInstanceLayerProperties(&count, props); for (f = 0, i = 0; i < count; i++) { if (strcmp( props[i].layerName, "VK_LAYER_KHRONOS_validation" )) { f = 1; break; } } heap_free(heap, props); return f; } void Device_Vk::find_exts(const char** exts, int& count) { app->get_vk_exts(exts, count); exts[count++] = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME; #ifdef DEBUG exts[count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; #endif } void Device_Vk::init_ac() { ac.pUserData = this; ac.pfnAllocation = vk_alloc; ac.pfnReallocation = vk_realloc; ac.pfnFree = vk_free; ac.pfnInternalAllocation = 0; ac.pfnInternalFree = 0; } void Device_Vk::create_inst(const char** exts, int ext_count) { VkInstanceCreateInfo ci{}; VkApplicationInfo ai{}; VkResult r; #ifdef DEBUG const char* vln = "VK_LAYER_KHRONOS_validation"; #endif ai.apiVersion = VK_API_VERSION_1_0; ai.pApplicationName = "C2"; ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; ci.pApplicationInfo = &ai; ci.enabledExtensionCount = (unsigned)ext_count; ci.ppEnabledExtensionNames = exts; #ifdef DEBUG ci.enabledLayerCount = has_validation(); ci.ppEnabledLayerNames = &vln; if (!ci.enabledLayerCount) print_war("No validation layers."); #endif r = vkCreateInstance(&ci, &ac, &inst); if (r != VK_SUCCESS) { print_err("Failed to create a Vulkan instance\n"); pbreak(r); } } static int proc_swap( Device_Vk* d, VkPhysicalDevice dev, Swap_Cap* sc ) { get_swap_cap(d, dev, d->surf, sc); return sc->fmt_count > 0 && sc->pm_count > 0; } int proc_qf(Device_Vk* d, VkPhysicalDevice dev) { unsigned fc, i; int r = 0; VkBool32 press; VkQueueFamilyProperties* fs, * p; vkGetPhysicalDeviceQueueFamilyProperties( dev, &fc, 0 ); fs = (VkQueueFamilyProperties*)heap_alloc(d->heap, (int)fc * sizeof *fs); vkGetPhysicalDeviceQueueFamilyProperties( dev, &fc, fs ); for (i = 0; i < fc; i++) { p = &fs[i]; vkGetPhysicalDeviceSurfaceSupportKHR( dev, i, d->surf, &press ); if ( p->queueFlags & VK_QUEUE_GRAPHICS_BIT && press ) { d->queue_index = (int)i; r = 1; goto fin; } } fin: heap_free(d->heap, fs); return r; } static int sup_exts(Device_Vk* d, VkPhysicalDevice dev) { int r = 0, i, f; unsigned c, j; int extc = sizeof *device_exts / sizeof *device_exts; VkExtensionProperties* avail; vkEnumerateDeviceExtensionProperties(dev, 0, &c, 0); avail = (VkExtensionProperties*)heap_alloc(d->heap, c * sizeof *avail); vkEnumerateDeviceExtensionProperties( dev, 0, &c, avail ); for (i = 0; i < extc; i++) { f = 0; for (j = 0; j < c; j++) { if (!strcmp(device_exts[i], avail[j].extensionName)) { f = 1; break; } } if (!f) goto fin; } r = 1; fin: heap_free(d->heap, avail); return r; } VkPhysicalDevice get_phys_dev(Device_Vk* d, Swap_Cap* sc) { unsigned dc, i; VkPhysicalDevice* devs, dev; vkEnumeratePhysicalDevices(d->inst, &dc, 0); if (!dc) { print_err( "Couldn't find any vulkan-capable graphics hardware.\n" ); pbreak(400); } devs = (VkPhysicalDevice*)heap_alloc(d->heap, (int)dc * sizeof *devs); vkEnumeratePhysicalDevices(d->inst, &dc, devs); for (i = 0; i < dc; i++) { dev = devs[i]; if ( proc_swap(d, dev, sc) && proc_qf(d, dev) && sup_exts(d, dev) ) { heap_free(d->heap, devs); return dev; } deinit_swap_cap(d, sc); } print_err("Couldn't find a suitable GPU.\n"); pbreak(401); heap_free(d->heap, devs); return 0; } void Device_Vk::create_dev(Swap_Cap* swap_cap) { const float priority = 0.0f; VkDeviceQueueCreateInfo qi{}; VkPhysicalDeviceCustomBorderColorFeaturesEXT border{}; VkDeviceCreateInfo di{}; VkPhysicalDeviceFeatures pdf{}; VkResult r; phys_dev = get_phys_dev(this, swap_cap); vkGetPhysicalDeviceMemoryProperties(phys_dev, &mem_props); border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; border.customBorderColors = true; qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; qi.queueFamilyIndex = queue_index; qi.queueCount = 1; qi.pQueuePriorities = &priority; di.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; di.pQueueCreateInfos = &qi; di.queueCreateInfoCount = 1; di.pEnabledFeatures = &pdf; di.enabledExtensionCount = sizeof device_exts / sizeof *device_exts; di.ppEnabledExtensionNames = device_exts; di.pNext = &border; r = vkCreateDevice( phys_dev, &di, &ac, &dev ); if (r != VK_SUCCESS) { print_err("Failed to create a Vulkan device.\n"); pbreak(r); } } void Device_Vk::init_internal() { const char* exts[16]; int ext_count = 0, i; gladLoaderLoadVulkan(0, 0, 0); textures.init(); texture_count = 1; buffers.init(); buffer_count = 1; vertex_formats.init(); vertex_format_count = 1; shaders.init(); shader_count = 1; samplers.init(); sampler_count = 1; rpo_cache.init(); pso_cache.init(); dso_cache.init(); shader_loader.init(this); texture_loader.init(this); register_asset_loader("CSH2", &shader_loader); register_asset_loader("TXTR", &texture_loader); find_exts(exts, ext_count); init_ac(); create_inst(exts, ext_count); #ifdef DEBUG if (has_validation()) init_validation(); #endif surf = app_create_vk_surface(app, inst); create_dev(&swap_cap); gladLoaderLoadVulkan(inst, phys_dev, dev); vkGetDeviceQueue(dev, (uint32_t)queue_index, 0, &queue); terminators = 0; terminator_index = 0; for (i = 0; i < max_contexts; i++) contexts[i].state = context_state_avail; swapchain.init(*app, this); create_terminators(); } void Device_Vk::create_terminators() { int i, count = swapchain.image_count; if (terminators) { for (i = 0; i < count; i++) terminators[i].execute(this); heap_free(heap, terminators); } terminators = (Terminator*)heap_alloc( heap, count * sizeof *terminators ); for (i = 0; i < count; i++) { terminators[i].queue = 0; } } void Device_Vk::deinit_internal() { int i, image_count = swapchain.image_count; vkDeviceWaitIdle(dev); swapchain.destroy(this); deinit_swap_cap(this, &swap_cap); app_destroy_vk_surface(app, inst, surf); for (auto i : rpo_cache) i.second.destroy(this); for (auto i : pso_cache) i.second.destroy(this); for (auto i : dso_cache) i.second.destroy(this); for (i = 0; i < max_contexts; i++) { auto& context = contexts[i]; if (context.state & context_state_init) context.destroy(); } for (i = 0; i < image_count; i++) { terminators[i].execute(this); } vkDestroyDevice(dev, &ac); #ifdef DEBUG destroy_dmesg( inst, msg, &ac ); #endif vkDestroyInstance(inst, &ac); } void Device_Vk::on_resize_internal(int w, int h) { (void)w; (void)h; vkDeviceWaitIdle(dev); deinit_swap_cap(this, &swap_cap); get_swap_cap(this, phys_dev, surf, &swap_cap); swapchain.recreate(*app, this); create_terminators(); } Renderpass_Vk& Device_Vk::create_rpo(const Rpo_Key& k) { VkClearValue clear{}; auto& rp = k.rpo; clear.color.float32[0] = (float)rp.clear.r / 255.0f; clear.color.float32[1] = (float)rp.clear.g / 255.0f; clear.color.float32[2] = (float)rp.clear.b / 255.0f; clear.color.float32[3] = (float)rp.clear.a / 255.0f; Renderpass_Vk rpo; rpo.init_rp(this, k); rpo.init_fb(this, rp); rpo.age = 0; rpo.clear = clear; return rpo_cache.set(k, rpo); } Renderpass_Vk& Device_Vk::get_rpo(const Rpo_Key& rp) { Renderpass_Vk* rpo = rpo_cache.get(rp); if (!rpo) return create_rpo(rp); return *rpo; } Pipeline_Vk& Device_Vk::create_pso(const Pso_Key& pip) { Pipeline_Vk pso; pso.age = 0; pso.init(this, pip); return pso_cache.set(pip, pso); } Pipeline_Vk& Device_Vk::get_pso(const Pso_Key& pip) { Pipeline_Vk* pso = pso_cache.get(pip); if (!pso) return create_pso(pip); return *pso; } Descriptor_Set_Vk& Device_Vk::create_dso( const Pipeline_Vk& pip, const Dso_Key& k ) { Descriptor_Set_Vk dso; dso.age = 0; dso.init(this, pip, k.pip); return dso_cache.set(k, dso); } Descriptor_Set_Vk& Device_Vk::get_dso( const Pipeline_Vk& pip, const Dso_Key& k ) { Descriptor_Set_Vk* dso = dso_cache.get(k); if (!dso) return create_dso(pip, k); return *dso; } void Renderpass_Vk::destroy(Device_Vk* dev) { vkDestroyRenderPass(dev->dev, rpo, &dev->ac); vkDestroyFramebuffer(dev->dev, fbo, &dev->ac); } void Device_Vk::collect_garbage() { int max_age = swapchain.image_count + 3; for (const auto& i: rpo_cache) { auto& rp = i.second; rp.age++; if (rp.age > max_age) { rp.destroy(this); rpo_cache.remove(i.first); } } for (const auto& i: pso_cache) { auto& pip = i.second; pip.age++; if (pip.age > max_age) { pip.destroy(this); pso_cache.remove(i.first); } } for (const auto& i: dso_cache) { auto& dso = i.second; dso.age++; if (dso.age > max_age) { dso.destroy(this); dso_cache.remove(i.first); } } } void Device_Vk::queue_destroy(Late_Terminated* obj) { terminators[terminator_index].add(obj); } int Device_Vk::find_memory_type( uint32_t filter, VkMemoryPropertyFlags flags ) { int i, e = mem_props.memoryTypeCount; auto* types = mem_props.memoryTypes; for (i = 0; i < e; i++) { if ( (filter & (1 << i)) && (types[i].propertyFlags & flags) == flags ) return i; } return -1; } VkAttachmentLoadOp Renderpass_Vk::load_op_from_mode( Clear_Mode m ) { switch (m) { case Clear_Mode::discard: return VK_ATTACHMENT_LOAD_OP_DONT_CARE; case Clear_Mode::clear: return VK_ATTACHMENT_LOAD_OP_CLEAR; case Clear_Mode::restore: return VK_ATTACHMENT_LOAD_OP_LOAD; } assert(0); return VK_ATTACHMENT_LOAD_OP_DONT_CARE; } void Renderpass_Vk::init_rp( Device_Vk* dev, const Rpo_Key& rpk ) { VkRenderPassCreateInfo ri{}; VkAttachmentDescription ad{}; VkAttachmentReference ar{}; VkSubpassDescription sd{}; VkResult r; auto& rp = rpk.rpo; ad.format = dev->swapchain.format.format; ad.samples = VK_SAMPLE_COUNT_1_BIT; ad.loadOp = load_op_from_mode(rp.mode); ad.storeOp = VK_ATTACHMENT_STORE_OP_STORE; ad.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; if (rpk.is_first) ad.initialLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; else ad.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; ad.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; ar.attachment = 0; ar.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; sd.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; sd.colorAttachmentCount = 1; sd.pColorAttachments = &ar; ri.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; ri.attachmentCount = 1; ri.pAttachments = &ad; ri.subpassCount = 1; ri.pSubpasses = &sd; r = vkCreateRenderPass(dev->dev, &ri, &dev->ac, &rpo); if (r != VK_SUCCESS) { print_err("Failed to create a render pass\n"); pbreak(r); } } void Renderpass_Vk::init_fb( Device_Vk* dev, const Render_Pass& rp ) { const Texture_Vk& texture = *(const Texture_Vk*)&dev->get_texture(rp.target); VkResult r; VkFramebufferCreateInfo fbi{}; fbi.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fbi.renderPass = rpo; fbi.width = texture.w; fbi.height = texture.h; fbi.layers = 1; fbi.attachmentCount = 1; fbi.pAttachments = &texture.view; r = vkCreateFramebuffer(dev->dev, &fbi, &dev->ac, &fbo); if (r != VK_SUCCESS) { print_err("Failed to create a framebuffer.\n"); pbreak(r); } } static int get_image_count(const Swap_Cap& s) { const VkSurfaceCapabilitiesKHR& cap = s.cap; return cap.minImageCount + (cap.minImageCount < cap.maxImageCount); } static VkExtent2D choose_swap_extent(const App& app, const VkSurfaceCapabilitiesKHR& cap) { VkExtent2D r = { (uint32_t)app.w, (uint32_t)app.h }; r.width = std::min(r.width, cap.maxImageExtent.width); r.height = std::min(r.height, cap.maxImageExtent.height); r.width = std::max(r.width, cap.minImageExtent.width); r.height = std::max(r.height, cap.minImageExtent.height); return r; } static VkSurfaceFormatKHR choose_swap_format(const Swap_Cap& cap) { unsigned i; for (i = 0; i < cap.fmt_count; i++) { const auto& fmt = cap.fmts[i]; if ( fmt.format == VK_FORMAT_B8G8R8A8_SRGB && fmt.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR ) return fmt; } print_err("Failed to find a surface that supports VK_FORMAT_B8G8R8A8_SRGB.\n"); return cap.fmts[0]; } static VkPresentModeKHR choose_swap_mode(const Swap_Cap& cap, bool vsync) { (void)vsync; (void)cap; /* todo */ return VK_PRESENT_MODE_FIFO_KHR; } static VkImageView make_view( Device_Vk* dev, VkImage image, VkFormat fmt, VkImageAspectFlagBits flags ) { VkImageViewCreateInfo vi{}; VkResult r; VkImageView view; vi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; vi.image = image; vi.viewType = VK_IMAGE_VIEW_TYPE_2D; vi.format = fmt; vi.subresourceRange.aspectMask = flags; vi.subresourceRange.baseMipLevel = 0; vi.subresourceRange.levelCount = 1; vi.subresourceRange.baseArrayLayer = 0; vi.subresourceRange.layerCount = 1; r = vkCreateImageView(dev->dev, &vi, &dev->ac, &view); if (r != VK_SUCCESS) { print_err("Failed to make image view.\n"); pbreak((int)r); } return view; } void Swapchain::init(const App& app, Device_Vk* dev) { swapchain = (VkSwapchainKHR)0; textures = 0; initr(app, dev); } void Swapchain::initr(const App& app, Device_Vk* dev) { image_count = get_image_count(dev->swap_cap); size = choose_swap_extent(app, dev->swap_cap.cap); format = choose_swap_format(dev->swap_cap); mode = choose_swap_mode(dev->swap_cap, false); { VkResult r; VkSwapchainCreateInfoKHR si{}; si.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, si.surface = dev->surf; si.minImageCount = image_count; si.imageFormat = format.format; si.imageColorSpace = format.colorSpace; si.imageExtent = size; si.imageArrayLayers = 1; si.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; si.preTransform = dev->swap_cap.cap.currentTransform; si.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; si.presentMode = mode; si.clipped = VK_TRUE; si.oldSwapchain = swapchain; si.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; r = vkCreateSwapchainKHR(dev->dev, &si, &dev->ac, &swapchain); if (r != VK_SUCCESS) { print_err("Failed to create swapchain.\n"); pbreak(r); } } get_images(dev); } void Swapchain::recreate(const App& app, Device_Vk* dev) { Swapchain old = *this; vkDeviceWaitIdle(dev->dev); initr(app, dev); old.destroy(dev); } Texture_Id Swapchain::create_image( Device_Vk* dev, VkImage image, VkImageView view, int w, int h ) { Texture_Id id = dev->alloc_texture(); Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id); tex.image = image; tex.view = view; tex.w = w; tex.h = h; tex.alias = true; return id; } void Swapchain::get_images(Device_Vk* dev) { unsigned count; int i; VkImage* images; vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, 0); Context& ctx = dev->acquire(); image_count = count; images = (VkImage*)heap_alloc( dev->heap, sizeof *images * image_count ); textures = (Texture_Id*)heap_alloc( dev->heap, sizeof *textures * image_count ); vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, images); for (i = 0; i < image_count; i++) { VkImageView view = make_view(dev, images[i], format.format, VK_IMAGE_ASPECT_COLOR_BIT ); textures[i] = create_image( dev, images[i], view, size.width, size.height ); /* needs to be presentable since the first renderpass * will expect it to be presentable from "last" frame */ ctx.transition(textures[i], Resource_State::presentable); } dev->submit(ctx); heap_free(dev->heap, images); } void Swapchain::destroy(Device_Vk* dev) { int i; for (i = 0; i < image_count; i++) dev->destroy_texture(textures[i]); vkDestroySwapchainKHR(dev->dev, swapchain, &dev->ac); heap_free(dev->heap, textures); textures = 0; } Device* Device::create(Arena* a, App* ap) { Device_Vk* d = (Device_Vk*)arena_alloc(a, sizeof *d); new(d) Device_Vk(); d->init(a, ap); return d; } void Device::init(Arena* a, App* ap) { void* hm; arena = a; app = ap; hm = arena_alloc(a, device_heap_size); heap = (Heap*)arena_alloc(a, sizeof *heap); init_heap(heap, hm, device_heap_size); ((Device_Vk*)this)->init_internal(); } void Device::destroy() { ((Device_Vk*)this)->deinit_internal(); } void Device::on_resize() { ((Device_Vk*)this)->on_resize_internal(app->w, app->h); } void Device::begin_frame() { Device_Vk* dev = (Device_Vk*)this; dev->collect_garbage(); dev->current_ctx = (Context_Vk*)&acquire(); dev->terminator_index++; dev->terminator_index %= dev->swapchain.image_count; dev->terminators[dev->terminator_index].execute(dev); vkAcquireNextImageKHR( dev->dev, dev->swapchain.swapchain, UINT64_MAX, dev->current_ctx->semaphore, VK_NULL_HANDLE, &dev->backbuffer_index ); dev->backbuffer_id = dev->swapchain.textures[dev->backbuffer_index]; dev->first_rp = true; } void Device::submit(Context& ctx_) { Context_Vk* ctx = (Context_Vk*)&ctx_; Device_Vk* dev = (Device_Vk*)this; VkSubmitInfo si{}; si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; /* si.waitSemaphoreCount = 1; si.pWaitSemaphores = &ctx->semaphore; si.pWaitDstStageMask = &stage; si.signalSemaphoreCount = 1; si.pSignalSemaphores = &ctx->semaphore;*/ si.commandBufferCount = 1; si.pCommandBuffers = &ctx->cb; vkEndCommandBuffer(ctx->cb); vkQueueSubmit(dev->queue, 1, &si, ctx->fence); ctx->wait(); ctx->release(); } void Device::present() { Device_Vk* dev = (Device_Vk*)this; Context_Vk* ctx = dev->current_ctx; VkPresentInfoKHR pi{}; VkSubmitInfo si{}; VkPipelineStageFlags stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; ctx->transition( dev->get_backbuffer(), Resource_State::presentable ); si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; si.waitSemaphoreCount = 1; si.pWaitSemaphores = &ctx->semaphore; si.pWaitDstStageMask = &stage; si.signalSemaphoreCount = 1; si.pSignalSemaphores = &ctx->semaphore; si.commandBufferCount = 1; si.pCommandBuffers = &ctx->cb; vkEndCommandBuffer(ctx->cb); vkQueueSubmit(dev->queue, 1, &si, ctx->fence); pi.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; pi.waitSemaphoreCount = 1; pi.pWaitSemaphores = &ctx->semaphore; pi.swapchainCount = 1; pi.pSwapchains = &dev->swapchain.swapchain; pi.pImageIndices = &dev->backbuffer_index; vkQueuePresentKHR(dev->queue, &pi); ctx->release(); } Texture_Id Device::get_backbuffer() { return ((Device_Vk*)this)->backbuffer_id; } Texture& Device::get_texture(Texture_Id id) { return ((Device_Vk*)this)->textures[id]; } Texture_Id Device_Vk::alloc_texture() { Texture_Vk tex{}; Texture_Id id(texture_count++); tex.id = id; textures.set(id, tex); return id; } Buffer_Id Device_Vk::alloc_buffer() { Buffer_Vk buf{}; Buffer_Id id(buffer_count++); buf.id = id; buffers.set(id, buf); return id; } Vertex_Format_Id Device_Vk::alloc_vf() { Vertex_Format_Vk vf{}; Vertex_Format_Id id(vertex_format_count++); vertex_formats.set(id, vf); return id; } Vertex_Format_Id Device::create_vertex_format( const Vertex_Format_Desc& desc ) { Device_Vk* dev = (Device_Vk*)this; Vertex_Format_Id id = dev->alloc_vf(); dev->vertex_formats[id].init(dev, desc); return id; } void Device::destroy_vertex_format(Vertex_Format_Id id) { Device_Vk* dev = (Device_Vk*)this; Vertex_Format_Vk& vf = dev->vertex_formats[id]; vf.destroy(dev); } Shader_Id Device_Vk::alloc_shader() { Shader_Vk s{}; Shader_Id id(shader_count++); s.id = id; shaders.set(id, s); return id; } Sampler_Id Device_Vk::alloc_sampler() { Sampler_Vk s{}; Sampler_Id id(sampler_count++); samplers.set(id, s); return id; } void Device::destroy_texture(Texture_Id id) { Device_Vk* dev = (Device_Vk*)this; dev->queue_destroy((Texture_Vk*)&dev->get_texture(id)); } void Context::wait() { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; vkWaitForFences( dev->dev, 1, &ctx->fence, VK_TRUE, UINT64_MAX ); } void Context::submit( const Draw& draw, const Pipeline& p, const Render_Pass& rp ) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Vertex_Buffer_Binding* binding; Rpo_Key rpo_key = { dev->first_rp, rp }; Pso_Key pso_key = { p, rpo_key }; Pipeline_Vk& pso = dev->get_pso(pso_key); Descriptor_Set_Vk& dso = dev->get_dso(pso, *(Dso_Key*)&p); auto& rpo = ctx->begin_rp(rp); Texture_Vk& target = *(Texture_Vk*)&dev->get_texture( dev->get_backbuffer() ); target.state = Resource_State::render_target; vkCmdBindPipeline( ctx->cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pso.pip ); vkCmdBindDescriptorSets( ctx->cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pso.lay, 0, 1, &dso.dset, 0, 0 ); for (binding = draw.verts; binding->id; binding++) { VkBuffer buf = ((Buffer_Vk*)&dev->get_buffer(binding->id))->buf; VkDeviceSize offset = (VkDeviceSize)binding->offset; vkCmdBindVertexBuffers(ctx->cb, 0, 1, &buf, &offset); } vkCmdDraw( ctx->cb, draw.vertex_count, draw.instance_count, draw.first_vertex, draw.first_instance ); ctx->end_rp(rpo); pso.on_submit(); dso.on_submit(); } void Context::submit( const Draw* draws, int count, const Pipeline& p, const Render_Pass& rp ) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; (void)draws; (void)count; (void)p; (void)rp; (void)dev; assert(0); /* todo */ } void Context::copy(Buffer_Id dst, Buffer_Id src) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Buffer_Vk& a = *(Buffer_Vk*)&dev->get_buffer(dst); Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src); VkBufferCopy region{}; region.srcOffset = 0; region.dstOffset = 0; region.size = b.size; vkCmdCopyBuffer( ctx->cb, b.buf, a.buf, 1, ®ion ); } void Context::copy(Texture_Id dst, Buffer_Id src) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Texture_Vk& a = *(Texture_Vk*)&dev->get_texture(dst); Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src); VkBufferImageCopy c{}; c.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; c.imageSubresource.layerCount = 1; c.imageExtent.width = a.w; c.imageExtent.height = a.h; c.imageExtent.depth = 1; vkCmdCopyBufferToImage( ctx->cb, b.buf, a.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &c ); } void Context::transition(Texture_Id id, Resource_State state) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id); VkImageMemoryBarrier b{}; VkImageLayout src_layout = state_to_image_layout(tex.state); VkImageLayout dst_layout = state_to_image_layout(state); VkPipelineStageFlags src_stage, dst_stage; tex.state = state; b.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; b.oldLayout = src_layout; b.newLayout = dst_layout; b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; b.image = tex.image; b.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; b.subresourceRange.baseMipLevel = 0; b.subresourceRange.levelCount = 1; b.subresourceRange.baseArrayLayer = 0; b.subresourceRange.layerCount = 1; if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ) { b.srcAccessMask = 0; b.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ) { b.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ) { b.srcAccessMask = 0; b.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ) { b.srcAccessMask = 0; b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ) { b.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; b.dstAccessMask = 0; src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; } else if ( src_layout == VK_IMAGE_LAYOUT_UNDEFINED && dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ) { b.srcAccessMask = 0; b.dstAccessMask = 0; src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; } else { print_err("Bad resource transition.\n"); pbreak(389); } vkCmdPipelineBarrier( ctx->cb, src_stage, dst_stage, 0, 0, 0, 0, 0, 1, &b ); } Renderpass_Vk& Context_Vk::begin_rp(const Render_Pass& rp) { Renderpass_Vk& rpo = dev->get_rpo({ dev->first_rp, rp}); VkRenderPassBeginInfo rpbi{}; rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rpbi.renderPass = rpo.rpo; rpbi.framebuffer = rpo.fbo; rpbi.renderArea.extent = dev->swapchain.size; rpbi.clearValueCount = rp.mode == Clear_Mode::clear? 1: 0; rpbi.pClearValues = &rpo.clear; vkCmdBeginRenderPass( cb, &rpbi, VK_SUBPASS_CONTENTS_INLINE ); dev->first_rp = false; return rpo; } void Context_Vk::end_rp(Renderpass_Vk& rpo) { vkCmdEndRenderPass(cb); rpo.on_submit(); } void Context::submit(const Render_Pass& rp) { Context_Vk* ctx = (Context_Vk*)this; auto& rpo = ctx->begin_rp(rp); ctx->end_rp(rpo); } void Context_Vk::init_pool() { VkCommandPoolCreateInfo pi{}; VkResult r; pi.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; pi.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; pi.queueFamilyIndex = (uint32_t)dev->queue_index; r = vkCreateCommandPool(dev->dev, &pi, &dev->ac, &pool); if (r != VK_SUCCESS) { print_err("Failed to create a command pool.\n"); pbreak(r); } } void Context_Vk::init_cb() { VkCommandBufferAllocateInfo ci{}; VkResult r; ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; ci.commandPool = pool; ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; ci.commandBufferCount = 1; r = vkAllocateCommandBuffers(dev->dev, &ci, &cb); if (r != VK_SUCCESS) { print_err("Failed to allocate a command buffer.\n"); pbreak(r); } } void Context_Vk::init_sync() { VkFenceCreateInfo fi{}; VkSemaphoreCreateInfo si{}; VkResult r; fi.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fi.flags = VK_FENCE_CREATE_SIGNALED_BIT; r = vkCreateFence(dev->dev, &fi, &dev->ac, &fence); if (r != VK_SUCCESS) { print_err("Failed to create a fence.\n"); pbreak(r); } si.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; r = vkCreateSemaphore(dev->dev, &si, &dev->ac, &semaphore); if (r != VK_SUCCESS) { print_err("Failed to create a semaphore.\n"); pbreak(r); } } void Context_Vk::init(Device_Vk* device) { dev = device; init_pool(); init_cb(); init_sync(); state |= context_state_init; } void Context_Vk::begin_record() { VkCommandBufferBeginInfo bi{}; bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; wait(); vkResetFences(dev->dev, 1, &fence); vkResetCommandBuffer(cb, 0); vkBeginCommandBuffer(cb, &bi); } Context_Vk& Context_Vk::acquire(Device_Vk* device) { if (~state & context_state_init) init(device); state &= ~context_state_avail; begin_record(); return *this; } void Context_Vk::release() { state |= context_state_avail; } void Context_Vk::destroy() { state &= ~context_state_init; vkDestroyCommandPool(dev->dev, pool, &dev->ac); vkDestroySemaphore(dev->dev, semaphore, &dev->ac); vkDestroyFence(dev->dev, fence, &dev->ac); } Context& Device::acquire() { Device_Vk* vk = (Device_Vk*)this; int i; for (i = 0; i < max_contexts; i++) { if (vk->contexts[i].state & context_state_avail) return vk->contexts[i].acquire(vk); } print_err("Too many active contexts!\n"); print("Probably a submit was missed.\n"); pbreak(10000); return vk->contexts[0]; } Context& Device::get_ctx() { Device_Vk* vk = (Device_Vk*)this; return *vk->current_ctx; } void Pipeline_Vk::init_stages( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { int count = 0, i; Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader); for (i = 0; i < shader_type_count; i++) { if (shader.modules[i]) count++; } VkPipelineShaderStageCreateInfo* sis = (VkPipelineShaderStageCreateInfo*)arena_alloc( &scope, sizeof *sis * count ); memset(sis, 0, sizeof *sis * count); for (i = 0, count = 0; i < shader_type_count; i++) { if (shader.modules[i]) { auto& si = sis[i]; si.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; si.flags = 0; si.stage = Shader_Vk::stage((Shader_Type)i); si.module = shader.modules[i]; si.pName = shader.entrypoints[i]; count++; } } info.stageCount = count; info.pStages = sis; } void Pipeline_Vk::init_vertex_input( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { Vertex_Format_Vk& vf = dev->vertex_formats[desc.vertex_format]; VkPipelineVertexInputStateCreateInfo& vi = *(VkPipelineVertexInputStateCreateInfo*)arena_alloc( &scope, sizeof vi ); memset(&vi, 0, sizeof vi); vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; vi.vertexBindingDescriptionCount = vf.binding_count; vi.pVertexBindingDescriptions = vf.bindings; vi.vertexAttributeDescriptionCount = vf.attr_count; vi.pVertexAttributeDescriptions = vf.attrs; info.pVertexInputState = &vi; } void Pipeline_Vk::init_input_assembly( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineInputAssemblyStateCreateInfo& ia = *(VkPipelineInputAssemblyStateCreateInfo*)arena_alloc( &scope, sizeof ia ); (void)dev; (void)desc; (void)info; memset(&ia, 0, sizeof ia); ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; info.pInputAssemblyState = &ia; } void Pipeline_Vk::init_viewport( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineViewportStateCreateInfo& vi = *(VkPipelineViewportStateCreateInfo*)arena_alloc( &scope, sizeof vi ); VkRect2D& scissor = *(VkRect2D*)arena_alloc( &scope, sizeof scissor ); VkViewport& viewport = *(VkViewport*)arena_alloc( &scope, sizeof viewport ); memset(&vi, 0, sizeof vi); memset(&scissor, 0, sizeof scissor); memset(&viewport, 0, sizeof viewport); scissor.offset.x = desc.scissor[0]; scissor.offset.y = desc.scissor[1]; scissor.extent.width = desc.scissor[2]; scissor.extent.height = desc.scissor[3]; viewport.x = desc.viewport[0]; viewport.y = desc.viewport[1]; viewport.width = desc.viewport[2]; viewport.height = desc.viewport[3]; viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; vi.viewportCount = 1; vi.pViewports = &viewport; vi.scissorCount = 1; vi.pScissors = &scissor; info.pViewportState = &vi; } void Pipeline_Vk::init_rasterisation( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineRasterizationStateCreateInfo& ri = *(VkPipelineRasterizationStateCreateInfo*)arena_alloc( &scope, sizeof ri ); (void)dev; (void)desc; memset(&ri, 0, sizeof ri); ri.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; ri.depthClampEnable = VK_FALSE; ri.rasterizerDiscardEnable = VK_FALSE; ri.polygonMode = VK_POLYGON_MODE_FILL; ri.lineWidth = 1.0f; ri.cullMode = VK_CULL_MODE_NONE; ri.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; ri.depthBiasEnable = VK_FALSE; info.pRasterizationState = &ri; } void Pipeline_Vk::init_msaa( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineMultisampleStateCreateInfo& mi = *(VkPipelineMultisampleStateCreateInfo*)arena_alloc( &scope, sizeof mi ); (void)dev; (void)desc; memset(&mi, 0, sizeof mi); mi.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; mi.sampleShadingEnable = VK_FALSE; mi.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; info.pMultisampleState = &mi; } void Pipeline_Vk::init_depthstencil( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineDepthStencilStateCreateInfo& ds = *(VkPipelineDepthStencilStateCreateInfo*)arena_alloc( &scope, sizeof ds ); (void)dev; (void)desc; memset(&ds, 0, sizeof ds); ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; ds.depthTestEnable = VK_FALSE; ds.depthWriteEnable = VK_FALSE; ds.depthCompareOp = VK_COMPARE_OP_LESS; ds.depthBoundsTestEnable = VK_FALSE; ds.stencilTestEnable = VK_FALSE; info.pDepthStencilState = &ds; } void Pipeline_Vk::init_blending( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, const Pipeline& desc ) { VkPipelineColorBlendStateCreateInfo& bi = *(VkPipelineColorBlendStateCreateInfo*)arena_alloc( &scope, sizeof bi ); VkPipelineColorBlendAttachmentState& abs = *(VkPipelineColorBlendAttachmentState*)arena_alloc( &scope, sizeof abs ); (void)dev; (void)desc; memset(&bi, 0, sizeof bi); memset(&abs, 0, sizeof abs); abs.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; abs.blendEnable = VK_FALSE; bi.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; bi.flags = 0; bi.logicOpEnable = VK_FALSE; bi.attachmentCount = 1; bi.pAttachments = &abs; info.pColorBlendState = &bi; } void Pipeline_Vk::init_descriptors( Device_Vk* dev, const Pipeline& desc ) { const Descriptor* sdescs = desc.descriptors; Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader); VkResult r; int count = desc.descriptor_count; int i; { VkDescriptorSetLayoutBinding* descs = (VkDescriptorSetLayoutBinding*)heap_alloc( dev->heap, count * sizeof *descs ); VkDescriptorSetLayoutCreateInfo di{}; memset(descs, 0, count * sizeof *descs); for (i = 0; i < count; i++) { int j, stage; auto& dst = descs[i]; auto& src = sdescs[i]; switch (src.type) { case Descriptor::Type::texture: dst.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; break; case Descriptor::Type::constant_buffer: dst.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; break; } dst.binding = src.slot; dst.descriptorCount = 1; dst.stageFlags = 0; stage = shader.descriptor_stage(src.slot); for (j = 0; j < shader_type_count; j++) { if (stage & (1 << j)) { dst.stageFlags |= Shader_Vk::stage((Shader_Type)j); } } } di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; di.bindingCount = (uint32_t)count; di.pBindings = descs; r = vkCreateDescriptorSetLayout( dev->dev, &di, &dev->ac, &dlay ); if (r != VK_SUCCESS) { print_err("Failed to create descriptor set layout.\n"); pbreak(r); } heap_free(dev->heap, descs); } } void Pipeline_Vk::init_layout( Device_Vk* dev, const Pipeline& desc ) { VkResult r; VkPipelineLayoutCreateInfo li{}; (void)desc; int set_count = desc.descriptor_count? 1: 0; if (set_count) { init_descriptors(dev, desc); } li.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; li.setLayoutCount = set_count; li.pSetLayouts = &dlay; li.pushConstantRangeCount = 0; r = vkCreatePipelineLayout( dev->dev, &li, &dev->ac, &lay ); if (r != VK_SUCCESS) { print_err("Failed to create a pipeline layout.\n"); pbreak(r); } } void Pipeline_Vk::init(Device_Vk* dev, const Pso_Key& key) { char buffer[1024]; Arena scope; VkResult r; const auto& desc = key.pso; VkGraphicsPipelineCreateInfo info{}; init_arena(&scope, buffer, sizeof buffer); init_layout(dev, desc); init_stages(scope, dev, info, desc); init_vertex_input(scope, dev, info, desc); init_input_assembly(scope, dev, info, desc); init_viewport(scope, dev, info, desc); init_rasterisation(scope, dev, info, desc); init_msaa(scope, dev, info, desc); init_depthstencil(scope, dev, info, desc); init_blending(scope, dev, info, desc); info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.flags = 0; info.renderPass = dev->get_rpo(key.rpo).rpo; info.subpass = 0; info.layout = lay; r = vkCreateGraphicsPipelines( dev->dev, VK_NULL_HANDLE, 1, &info, &dev->ac, &pip ); if (r != VK_SUCCESS) { print_err("Failed to create a pipeline.\n"); pbreak(r); } } void Pipeline_Vk::destroy(Device_Vk* dev) { vkDestroyDescriptorSetLayout(dev->dev, dlay, &dev->ac); vkDestroyPipelineLayout(dev->dev, lay, &dev->ac); vkDestroyPipeline(dev->dev, pip, &dev->ac); } void Descriptor_Set_Vk::init( Device_Vk* dev, const Pipeline_Vk& pip, const Pipeline& desc ) { int count = desc.descriptor_count, i; int sampler_count = 0, cbuffer_count = 0; int size_count = 0; VkDescriptorSetAllocateInfo da{}; VkDescriptorPoolSize sizes[4]; VkResult r; for (i = 0; i < count; i++) { auto& src = desc.descriptors[i]; switch (src.type) { case Descriptor::Type::texture: sampler_count++; break; case Descriptor::Type::constant_buffer: cbuffer_count++; break; } } if (sampler_count) { int idx = size_count++; sizes[idx] = { .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = (uint32_t)sampler_count }; } if (cbuffer_count) { int idx = size_count++; sizes[idx] = { .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .descriptorCount = (uint32_t)cbuffer_count }; } { VkDescriptorPoolCreateInfo di{}; di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, di.poolSizeCount = (uint32_t)size_count; di.pPoolSizes = sizes; di.maxSets = (uint32_t)count; r = vkCreateDescriptorPool(dev->dev, &di, &dev->ac, &dp); if (r != VK_SUCCESS) { print_err("Failed to create a descriptor pool.\n"); pbreak(r); } } da.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; da.descriptorPool = dp; da.descriptorSetCount = 1; da.pSetLayouts = &pip.dlay; r = vkAllocateDescriptorSets( dev->dev, &da, &dset ); if (r != VK_SUCCESS) { print_err("Failed to allocate descriptor set.\n"); pbreak(r); } for (i = 0; i < count; i++) { VkDescriptorImageInfo img{}; VkDescriptorBufferInfo buf{}; VkWriteDescriptorSet wd{}; auto& src = desc.descriptors[i]; wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; wd.dstSet = dset; wd.dstBinding = src.slot; wd.dstArrayElement = 0; wd.descriptorCount = 1; switch (src.type) { case Descriptor::Type::texture: { Texture_Descriptor* td = (Texture_Descriptor*)src.payload; Texture_Vk& t = *(Texture_Vk*)&dev->get_texture(td->texture); Sampler_Vk& s = *(Sampler_Vk*)&dev->samplers[td->sampler]; assert(td->texture); assert(td->sampler); img.imageView = t.view; img.sampler = s.sampler; img.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; wd.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; wd.pImageInfo = &img; } break; case Descriptor::Type::constant_buffer: { Constant_Buffer_Descriptor* cd = (Constant_Buffer_Descriptor*)src.payload; Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(cd->buffer); assert(cd->buffer); buf.buffer = b.buf; buf.offset = 0; buf.range = b.size; wd.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; wd.pBufferInfo = &buf; } break; } vkUpdateDescriptorSets(dev->dev, 1, &wd, 0, 0); } } void Descriptor_Set_Vk::destroy(Device_Vk* dev) { vkDestroyDescriptorPool(dev->dev, dp, &dev->ac); } int Shader_Vk::svariable_type_size(SVariable_Type type) { switch (type) { case svariable_type_float: return 4; case svariable_type_vec2: return 8; case svariable_type_vec3: return 12; case svariable_type_vec4: return 16; default: assert(0); /* todo */ } return 0; } VkFormat Vertex_Format_Vk::format_from_svar_type( SVariable_Type type ) { switch (type) { case svariable_type_float: return VK_FORMAT_R32_SFLOAT; case svariable_type_vec2: return VK_FORMAT_R32G32_SFLOAT; case svariable_type_vec3: return VK_FORMAT_R32G32B32_SFLOAT; case svariable_type_vec4: return VK_FORMAT_R32G32B32A32_SFLOAT; default: assert(0); /* todo */ } return (VkFormat)0; } void Vertex_Format_Vk::init( Device_Vk* dev, const Vertex_Format_Desc& desc ) { int i; binding_count = desc.binding_count; attr_count = desc.attribute_count; bindings = (VkVertexInputBindingDescription*)heap_alloc( dev->heap, binding_count * sizeof *bindings ); attrs = (VkVertexInputAttributeDescription*)heap_alloc( dev->heap, attr_count * sizeof *attrs ); memset(bindings, 0, binding_count * sizeof *bindings); memset(attrs, 0, attr_count * sizeof *attrs); for (i = 0; i < binding_count; i++) { auto& dst = bindings[i]; const auto& src = desc.bindings[i]; dst.binding = src.binding; dst.stride = src.stride; dst.inputRate = src.rate == sbinding_rate_instance? VK_VERTEX_INPUT_RATE_INSTANCE: VK_VERTEX_INPUT_RATE_VERTEX; } for (i = 0; i < attr_count; i++) { auto& dst = attrs[i]; auto& src = desc.attributes[i]; dst.binding = src.binding; dst.location = src.index; dst.format = format_from_svar_type(src.type); dst.offset = src.offset; } } void Vertex_Format_Vk::destroy(Device_Vk* dev) { heap_free(dev->heap, attrs); heap_free(dev->heap, bindings); } bool Shader_Vk::init_module( Device_Vk* dev, int stage, char* buf, int size ) { VkResult r; VkShaderModule m; VkShaderModuleCreateInfo mi{}; mi.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; mi.codeSize = size; mi.pCode = (uint32_t*)buf; r = vkCreateShaderModule(dev->dev, &mi, &dev->ac, &m); modules[stage] = m; return r == VK_SUCCESS; } int Shader_Vk::Vertex_Format::find_binding(const char* name) { int i; int bucket = (int)(hash_string(name) % binding_count); for (i = 0; i < binding_count; i++) { Binding& binding = bindings[bucket]; if ( !binding.name[0] || !strcmp(binding.name, name) ) return bucket; bucket = (bucket + 1) % binding_count; } return -1; } int Shader_Vk::Vertex_Format::find_attribute(const char* name) { int i; int bucket = (int)(hash_string(name) % attr_count); for (i = 0; i < attr_count; i++) { Attribute& attr = attributes[bucket]; if ( !attr.name[0] || !strcmp(attr.name, name) ) return bucket; bucket = (bucket + 1) % attr_count; } return -1; } bool Shader_Vk::Vertex_Format::init( Device_Vk* dev, Pack_File* f ) { int i, attr_index = 0; int start = pack_tell(f); attr_count = 0; for (i = 0; i < binding_count; i++) { char name[24]; int count, j; SBinding_Rate rate; pack_read(f, name, sizeof name); pack_read(f, &rate, 4); pack_read(f, &count, 4); for (j = 0; j < count; j++) { char aname[28]; SVariable_Type type; pack_read(f, aname, sizeof aname); pack_read(f, &type, 4); attr_count++; } } pack_seek(f, start, seek_rel_start); bindings = (Binding*)heap_alloc( dev->heap, binding_count * sizeof *bindings ); attributes = (Attribute*)heap_alloc( dev->heap, attr_count * sizeof *attributes ); for (i = 0; i < binding_count; i++) bindings[i].name[0] = 0; for (i = 0; i < attr_count; i++) attributes[i].name[0] = 0; for (i = 0; i < binding_count; i++) { Binding* binding; char name[24]; int count, j; SBinding_Rate rate; pack_read(f, name, sizeof name); pack_read(f, &rate, 4); pack_read(f, &count, 4); binding = &bindings[find_binding(name)]; strcpy(binding->name, name); binding->rate = rate; binding->attr_count = count; binding->attributes = (int*)heap_alloc( dev->heap, count * sizeof *binding->attributes ); binding->index = i; for (j = 0; j < count; j++, attr_index++) { int bucket; Attribute* attr; char aname[28]; SVariable_Type type; pack_read(f, aname, sizeof aname); pack_read(f, &type, 4); bucket = find_attribute(aname); binding->attributes[j] = bucket; attr = &attributes[bucket]; strcpy(attr->name, aname); attr->index = j; attr->type = type; } } return true; } void Shader_Vk::Vertex_Format::destroy(Device_Vk* dev) { int i; for (i = 0; i < binding_count; i++) heap_free(dev->heap, bindings[i].attributes); heap_free(dev->heap, bindings); heap_free(dev->heap, attributes); } void Shader_Vk::destroy(Device_Vk* dev) { int i; for (i = 0; i < shader_type_count; i++) if (modules[i]) vkDestroyShaderModule(dev->dev, modules[i], &dev->ac); vfd.destroy(dev); heap_free(dev->heap, descs); dev->destroy_vertex_format(vf); dev->shaders.remove(id); } int Shader::binding_index(const char* name) { int idx; Shader_Vk* sh = (Shader_Vk*)this; idx = sh->vfd.find_binding(name); if (idx < 0 || !sh->vfd.bindings[idx].name[0]) return -1; return sh->vfd.bindings[idx].index; } int Shader::attribute_index(const char* name) { int idx; Shader_Vk* sh = (Shader_Vk*)this; idx = sh->vfd.find_attribute(name); if (idx < 0 || !sh->vfd.attributes[idx].name[0]) return -1; return sh->vfd.attributes[idx].index; } int Shader::descriptor_binding(const char* name) { int idx; Shader_Vk* sh = (Shader_Vk*)this; idx = sh->find_descriptor(name); if (idx < 0 || !sh->descs[idx].name[0]) return -1; return sh->descs[idx].slot; } int Shader::descriptor_stage(int slot) { Shader_Vk* sh = (Shader_Vk*)this; int i; for (i = 0; i < sh->desc_count; i++) { if (sh->descs[i].slot == slot) { return sh->descs[i].stage; } } return 0; } void Buffer_Vk::init( Device_Vk* dev, int flags, VkDeviceSize s ) { VkBufferCreateInfo bi{}; VkMemoryRequirements req; VkResult r; size = s; bi.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bi.size = size; bi.usage = get_usage(flags); bi.sharingMode = VK_SHARING_MODE_EXCLUSIVE; r = vkCreateBuffer(dev->dev, &bi, &dev->ac, &buf); if (r != VK_SUCCESS) { print_err("Failed to create a buffer.\n"); pbreak(r); } vkGetBufferMemoryRequirements(dev->dev, buf, &req); { VkMemoryPropertyFlags props = get_memory_flags(flags); int mt = dev->find_memory_type(req.memoryTypeBits, props); VkMemoryAllocateInfo ai{}; if (mt < 0) { print("Failed to find a satisfying memory type index.\n"); pbreak(mt); } ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; ai.allocationSize = req.size; ai.memoryTypeIndex = mt; r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &memory); if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) { print_err("Out of video memory.\n"); pbreak(r); } if (r != VK_SUCCESS) { print_err("VRAM allocation failed.\n"); pbreak(r); } } vkBindBufferMemory(dev->dev, buf, memory, 0); } void Buffer_Vk::destroy(Device_Vk* dev) { vkDestroyBuffer(dev->dev, buf, &dev->ac); vkFreeMemory(dev->dev, memory, &dev->ac); dev->buffers.remove(id); } Buffer_Id Device::create_buffer(size_t size, int flags) { Device_Vk* dev = (Device_Vk*)this; Buffer_Id id = dev->alloc_buffer(); Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); buf.init(dev, flags, (VkDeviceSize)size); return id; } void Device::destroy_buffer(Buffer_Id id) { Device_Vk* dev = (Device_Vk*)this; Buffer_Vk* buf = (Buffer_Vk*)&get_buffer(id); dev->queue_destroy(buf); } void* Device::map_buffer( Buffer_Id id, size_t offset, size_t size ) { Device_Vk* dev = (Device_Vk*)this; Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); void* ptr; vkMapMemory( dev->dev, buf.memory, (VkDeviceSize)offset, (VkDeviceSize)size, 0, &ptr ); return ptr; } void Device::unmap_buffer(Buffer_Id id) { Device_Vk* dev = (Device_Vk*)this; Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id); vkUnmapMemory(dev->dev, buf.memory); } Buffer& Device::get_buffer(Buffer_Id id) { Device_Vk* dev = (Device_Vk*)this; assert(id.index); assert(dev->buffers.has(id)); return dev->buffers[id]; } Texture_Id Device::create_texture( Texture_Format fmt, int w, int h, Buffer_Id init ) { VkImageCreateInfo ii{}; VkResult r; Device_Vk* dev = (Device_Vk*)this; Texture_Id id = dev->alloc_texture(); Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id); VkMemoryRequirements req; tex.state = Resource_State::undefined; ii.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; ii.imageType = VK_IMAGE_TYPE_2D; ii.extent.width = w; ii.extent.height = h; ii.extent.depth = 1; ii.mipLevels = 1; ii.arrayLayers = 1; ii.format = get_vk_format(fmt); ii.tiling = VK_IMAGE_TILING_OPTIMAL; ii.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; ii.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; ii.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ii.samples = VK_SAMPLE_COUNT_1_BIT; r = vkCreateImage(dev->dev, &ii, &dev->ac, &tex.image); if (r != VK_SUCCESS) { print_err("Failed to create an image.\n"); } vkGetImageMemoryRequirements(dev->dev, tex.image, &req); { VkMemoryPropertyFlags props = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; int mt = dev->find_memory_type(req.memoryTypeBits, props); VkMemoryAllocateInfo ai{}; if (mt < 0) { print("Failed to find a satisfying memory type index.\n"); pbreak(mt); } ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; ai.allocationSize = req.size; ai.memoryTypeIndex = mt; r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &tex.memory); if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) { print_err("Out of video memory.\n"); pbreak(r); } if (r != VK_SUCCESS) { print_err("VRAM allocation failed.\n"); pbreak(r); } } vkBindImageMemory(dev->dev, tex.image, tex.memory, 0); tex.w = w; tex.h = h; tex.alias = false; tex.view = make_view( dev, tex.image, ii.format, VK_IMAGE_ASPECT_COLOR_BIT ); if (init) { Context& ctx = dev->acquire(); ctx.transition(id, Resource_State::copy_dst); ctx.copy(id, init); ctx.transition(id, Resource_State::shader_read); dev->submit(ctx); } return id; } Shader& Device::get_shader(Shader_Id id) { Device_Vk* dev = (Device_Vk*)this; assert(id.index); assert(dev->shaders.has(id)); return dev->shaders[id]; } Sampler_Id Device::create_sampler(const Sampler_State& state) { Device_Vk* dev = (Device_Vk*)this; Sampler_Id id = dev->alloc_sampler(); Sampler_Vk& s = dev->samplers[id]; s.init(dev, state); return id; } void Device::destroy_sampler(Sampler_Id id) { Device_Vk* dev = (Device_Vk*)this; Sampler_Vk& s = dev->samplers[id]; dev->queue_destroy(&s); } void Shader_Loader::init(Device_Vk* d) { dev = d; } Asset* Shader_Loader::load( Arena* a, Arena* s, Pack_File* f ) { Shader_Vk* shader; Shader_Id id; (void)s; (void)a; id = dev->alloc_shader(); shader = (Shader_Vk*)&dev->get_shader(id); if (!shader->init(dev, f)) { dev->shaders.remove(id); return 0; } return shader; } void Shader_Loader::unload(Asset* a) { Shader_Vk* sh = (Shader_Vk*)a; dev->queue_destroy(sh); } int Shader_Vk::find_descriptor(const char* name) { int i; int bucket = (int)(hash_string(name) % desc_count); for (i = 0; i < desc_count; i++) { Desc& desc = descs[bucket]; if ( !desc.name[0] || !strcmp(desc.name, name) ) return bucket; bucket = (bucket + 1) % desc_count; } return -1; } bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { char magic[4]; int binding_count, target_count, i; pack_read(f, magic, 4); if ( magic[0] != 'C' || magic[1] != 'S' || magic[2] != 'H' || magic[3] != '2' ) return false; pack_read(f, &type, 4); pack_read(f, &binding_count, 4); pack_read(f, &target_count, 4); pack_read(f, &desc_count, 4); assert(binding_count); vfd.binding_count = binding_count; if (!vfd.init(dev, f)) return false; { Vertex_Format_Desc desc{}; desc.binding_count = vfd.binding_count; desc.attribute_count = vfd.attr_count; desc.bindings = (Vertex_Format_Desc::Binding*)heap_alloc( dev->heap, sizeof *desc.bindings ); desc.attributes = (Vertex_Format_Desc::Attribute*)heap_alloc( dev->heap, sizeof *desc.attributes * desc.attribute_count ); for (i = 0; i < vfd.binding_count; i++) { int j, stride = 0; auto& src = vfd.bindings[i]; auto& dst = desc.bindings[src.index]; for (j = 0; j < src.attr_count; j++) { auto& src_attr = vfd.attributes[src.attributes[j]]; auto& dst_attr = desc.attributes[src.attributes[j]]; dst_attr.binding = src.index; dst_attr.index = j; dst_attr.type = src_attr.type; dst_attr.offset = stride; stride += svariable_type_size(src_attr.type); } dst.binding = src.index; dst.stride = stride; dst.rate = src.rate; } vf = dev->create_vertex_format(desc); heap_free(dev->heap, desc.attributes); heap_free(dev->heap, desc.bindings); } pack_seek( f, 32 * target_count, seek_rel_cur ); descs = (Desc*)heap_alloc( dev->heap, desc_count * sizeof *descs ); pack_read(f, descs, desc_count * sizeof *descs); for (i = 0; i < shader_type_count; i++) { int o, s; pack_read(f, &o, 4); pack_read(f, &s, 4); if (o) { bool r; int before = pack_tell(f); char* buf = (char*)heap_alloc(dev->heap, s); pack_seek(f, o, seek_rel_start); pack_read(f, buf, s); r = init_module(dev, i, buf, s); heap_free(dev->heap, buf); pack_seek(f, before, seek_rel_start); if (!r) return false; } else { modules[i] = VK_NULL_HANDLE; } pack_read(f, entrypoints[i], 24); } return true; } void Texture_Loader::init(Device_Vk* d) { dev = d; } size_t Texture_Loader::calc_size( Texture_Format fmt, int w, int h ) { switch (fmt) { case texture_format_bc1: return (w / 4) * (h / 4) * 8; case texture_format_r8i: return w * h; default: print_err("Can't load this texture format.\n"); pbreak(45498); return 0; } } Buffer_Id Texture_Loader::upload(void* buf, size_t size) { void* mem; Buffer_Id id = dev->create_buffer( size, Buffer_Flags::copy_src | Buffer_Flags::cpu_readwrite ); mem = dev->map_buffer(id, 0, size); memcpy(mem, buf, size); dev->unmap_buffer(id); return id; } Asset* Texture_Loader::load(Arena* a, Arena* s, Pack_File* f) { char magic[4]; void* data; int w, h; size_t size; Texture_Format fmt; (void)a; pack_read(f, magic, 4); pack_read(f, &w, 4); pack_read(f, &h, 4); pack_read(f, &fmt, 4); size = calc_size(fmt, w, h); data = arena_alloc(s, size); pack_read(f, data, size); { Buffer_Id buf = upload(data, size); Texture_Id tex = dev->create_texture(fmt, w, h, buf); dev->destroy_buffer(buf); return &dev->get_texture(tex); } } void Texture_Loader::unload(Asset* a) { Texture_Vk* tex = (Texture_Vk*)a; dev->destroy_texture(tex->id); } void Texture_Vk::destroy(Device_Vk* dev) { if (!alias) { vkDestroyImage(dev->dev, image, &dev->ac); vkFreeMemory(dev->dev, memory, &dev->ac); } vkDestroyImageView(dev->dev, view, &dev->ac); dev->textures.remove(id); } VkFilter Sampler_Vk::get_filter(Filter_Mode mode) { switch (mode) { case Filter_Mode::point: return VK_FILTER_NEAREST; case Filter_Mode::linear: return VK_FILTER_LINEAR; } assert(0); return (VkFilter)0; } VkSamplerMipmapMode Sampler_Vk::get_mipmap_mode( Filter_Mode mode ) { switch (mode) { case Filter_Mode::point: return VK_SAMPLER_MIPMAP_MODE_NEAREST; case Filter_Mode::linear: return VK_SAMPLER_MIPMAP_MODE_LINEAR; } assert(0); return (VkSamplerMipmapMode)0; } VkSamplerAddressMode Sampler_Vk::get_mode( Address_Mode mode ) { switch (mode) { case Address_Mode::repeat: return VK_SAMPLER_ADDRESS_MODE_REPEAT; case Address_Mode::mirror: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; case Address_Mode::clamp: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Address_Mode::border: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; } assert(0); return (VkSamplerAddressMode)0; } void Sampler_Vk::init(Device_Vk* dev, const Sampler_State& s) { VkSamplerCreateInfo si{}; VkSamplerCustomBorderColorCreateInfoEXT bi{}; VkClearColorValue col{}; VkResult r; col.float32[0] = s.border[0]; col.float32[1] = s.border[1]; col.float32[2] = s.border[2]; col.float32[3] = s.border[3]; si.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; si.magFilter = get_filter(s.mag); si.minFilter = get_filter(s.min); si.mipmapMode = get_mipmap_mode(s.mip); si.addressModeU = get_mode(s.address_u); si.addressModeV = get_mode(s.address_v); si.addressModeW = get_mode(s.address_w); si.borderColor = VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; si.pNext = &bi; bi.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; bi.customBorderColor = col; bi.format = VK_FORMAT_R32G32B32A32_SFLOAT; r = vkCreateSampler(dev->dev, &si, &dev->ac, &sampler); if (r != VK_SUCCESS) { print_err("Failed to create a sampler.\n"); pbreak(r); } } void Sampler_Vk::destroy(Device_Vk* dev) { vkDestroySampler(dev->dev, sampler, &dev->ac); }