diff options
Diffstat (limited to 'video.cpp')
-rw-r--r-- | video.cpp | 355 |
1 files changed, 299 insertions, 56 deletions
@@ -430,6 +430,12 @@ enum { context_state_init = 1 << 1 }; + +struct Shader_Module { + int mask; + VkShaderModule mod; +}; + struct Shader_Vk : public Shader, public Late_Terminated { struct Attribute { char name[28]; @@ -464,23 +470,31 @@ struct Shader_Vk : public Shader, public Late_Terminated { int stage; }; + struct Option { + char name[24]; + int mask; + int stage; + }; + SProgram_Type type; - VkShaderModule modules[shader_type_count]; - char entrypoints[shader_type_count][24]; Vertex_Format vfd; Desc* descs; - int desc_count; + Shader_Module* modules[shader_type_count]; + Option* options; + int desc_count, opt_count; + int module_count[shader_type_count]; - bool init(Device_Vk* dev, Pack_File* f); - bool init_module( + bool init(Device_Vk* dev, Arena* a, Pack_File* f); + VkShaderModule make_module( Device_Vk* dev, - int stage, char* buf, int size ); void destroy(Device_Vk* dev) override; int find_descriptor(const char* name); + int find_module(Shader_Type type, int mask); + int find_opt(Shader_Type, const char* name); static VkShaderStageFlagBits stage(Shader_Type type) { switch (type) { @@ -561,7 +575,8 @@ struct Texture_Vk : public Texture, public Late_Terminated { int array_size, int start_mip, int start_array, - bool alias + bool alias, + int samples ); void destroy(Device_Vk*) override; void set_name(Device_Vk* dev, const char* name); @@ -732,7 +747,8 @@ struct Pipeline_Vk { Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, - const Pipeline& desc + const Pipeline& desc, + const Render_Pass& rpo ); void init_depthstencil( Arena& scope, @@ -934,6 +950,7 @@ struct Device_Vk : public Device { VkPhysicalDevice phys_dev; VkSurfaceKHR surf; uint32_t backbuffer_index; + VkSampleCountFlagBits max_samples; Texture_Id backbuffer_id; Swap_Cap swap_cap; VkPhysicalDeviceMemoryProperties mem_props; @@ -1021,6 +1038,9 @@ struct Device_Vk : public Device { void create_terminators(); void create_depth(int w, int h); + VkSampleCountFlagBits get_max_samples(); + VkSampleCountFlagBits get_samples(int); + int find_memory_type( uint32_t filter, VkMemoryPropertyFlags flags @@ -1114,6 +1134,37 @@ void Device_Vk::init_validation() { #endif +VkSampleCountFlagBits Device_Vk::get_max_samples() { + VkPhysicalDeviceProperties p; + VkSampleCountFlagBits + i = VK_SAMPLE_COUNT_64_BIT, + e = VK_SAMPLE_COUNT_1_BIT; + VkSampleCountFlags c; + vkGetPhysicalDeviceProperties(phys_dev, &p); + c = + p.limits.framebufferColorSampleCounts & + p.limits.framebufferDepthSampleCounts; + for (; i >= e; i = (VkSampleCountFlagBits)(i >> 1)) + if (c & i) return i; + return VK_SAMPLE_COUNT_1_BIT; +} + +VkSampleCountFlagBits Device_Vk::get_samples( + int c +) { + VkSampleCountFlagBits b = VK_SAMPLE_COUNT_1_BIT; + switch (c) { + case 1: b = VK_SAMPLE_COUNT_1_BIT; break; + case 2: b = VK_SAMPLE_COUNT_2_BIT; break; + case 4: b = VK_SAMPLE_COUNT_4_BIT; break; + case 8: b = VK_SAMPLE_COUNT_8_BIT; break; + case 16: b = VK_SAMPLE_COUNT_16_BIT; break; + case 32: b = VK_SAMPLE_COUNT_32_BIT; break; + case 64: b = VK_SAMPLE_COUNT_64_BIT; break; + default: break; + } + return std::min(max_samples, b); +} bool Device_Vk::has_validation() { unsigned count, i; @@ -1352,6 +1403,7 @@ void Device_Vk::init_internal() { #endif surf = app_create_vk_surface(app, inst); create_dev(&swap_cap); + max_samples = get_max_samples(); vrama.init(this); gladLoaderLoadVulkan(inst, phys_dev, dev); vkGetDeviceQueue(dev, (uint32_t)queue_index, 0, &queue); @@ -1386,7 +1438,7 @@ void Device_Vk::create_depth(int w, int h) { destroy_texture(depth); depth = create_texture( "default depth", - texture_format_d32, + texture_format_d24s8, Texture_Flags::sampleable | Texture_Flags::depth_stencil_target, w, h, @@ -1642,7 +1694,7 @@ void Renderpass_Vk::init( auto& colour = rp.colours[i]; auto& ad = ads[index]; ad.format = get_vk_format(colour.fmt); - ad.samples = VK_SAMPLE_COUNT_1_BIT; + ad.samples = dev->get_samples(colour.samples); ad.loadOp = load_op_from_mode(colour.mode); ad.storeOp = VK_ATTACHMENT_STORE_OP_STORE; ad.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -1659,9 +1711,10 @@ void Renderpass_Vk::init( if (has_depth) { int i = count++; auto& ad = ads[i]; - ad.format = get_vk_format(dev->get_texture(rp.depth.id).fmt); - ad.samples = VK_SAMPLE_COUNT_1_BIT; - ad.loadOp = load_op_from_mode(rp.depth.mode); + auto& depth = rp.depth; + ad.format = get_vk_format(dev->get_texture(depth.id).fmt); + ad.samples = dev->get_samples(depth.samples); + ad.loadOp = load_op_from_mode(depth.mode); ad.storeOp = VK_ATTACHMENT_STORE_OP_STORE; ad.stencilLoadOp = ad.loadOp; ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; @@ -1887,7 +1940,8 @@ Texture_Id Swapchain::create_image( 1, 0, 0, - true + true, + 1 ); return id; } @@ -2016,6 +2070,8 @@ void Device::present() { VkSubmitInfo si{}; VkPipelineStageFlags stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + heap_defrag(dev->heap); + // ^ this makes it >4x the speed ctx->check_end_rp(); ctx->transition( dev->get_backbuffer(), @@ -2252,6 +2308,71 @@ void Context::copy(Texture_Id dst, Buffer_Id src) { ); } +void Context::copy( + Texture_Id dst, + Buffer_Id src, + int mip, + int x, + int y, + int w, + int h +) { + Context_Vk* ctx = (Context_Vk*)this; + Device_Vk* dev = ctx->dev; + Texture_Vk& a = *(Texture_Vk*)&dev->get_texture(dst); + Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src); + VkBufferImageCopy c{}; + transition(dst, Resource_State::copy_dst); + c.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + c.imageSubresource.layerCount = 1; + c.imageSubresource.mipLevel = mip; + c.imageExtent.width = w; + c.imageExtent.height = h; + c.imageExtent.depth = 1; + c.imageOffset.x = x; + c.imageOffset.y = y; + ctx->check_end_rp(); + vkCmdCopyBufferToImage( + ctx->cb, + b.buf, + a.image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &c + ); +} + +void Context::resolve(Texture_Id dst, Texture_Id src) { + Context_Vk* ctx = (Context_Vk*)this; + Device_Vk* dev = ctx->dev; + Texture_Vk& d = *(Texture_Vk*)&dev->get_texture(dst); + Texture_Vk& s = *(Texture_Vk*)&dev->get_texture(src); + VkImageResolve r{}; + r.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + r.srcSubresource.layerCount = 1; + r.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + r.dstSubresource.layerCount = 1; + r.extent.width = d.w; + r.extent.height = d.h; + r.extent.depth = 1; + assert(d.w == s.w); + assert(d.h == s.h); + assert(d.d == 1 && s.d == 1); + assert(d.samples == 1 && s.samples > 1); + ctx->check_end_rp(); + transition(src, Resource_State::copy_src); + transition(dst, Resource_State::copy_dst); + vkCmdResolveImage( + ctx->cb, + s.image, + state_to_image_layout(s.state), + d.image, + state_to_image_layout(d.state), + 1, + &r + ); +} + void Context::transition(Texture_Id id, Resource_State state) { Context_Vk* ctx = (Context_Vk*)this; Device_Vk* dev = ctx->dev; @@ -2400,6 +2521,14 @@ void Context::transition(Texture_Id id, Resource_State state) { b.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + } else if ( + src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + dst_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL + ) { + b.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + b.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; } else { print_err("Bad resource transition.\n"); assert(0); @@ -2698,12 +2827,24 @@ void Pipeline_Vk::init_stages( zero(sis, sizeof *sis * count); for (i = 0, count = 0; i < shader_type_count; i++) { if (shader.modules[i]) { + int idx = shader.find_module( + (Shader_Type)i, + desc.shader_masks[i] + ); + VkShaderModule mod; + if (idx < 0) { + mod = shader.modules[i][0].mod; + print_war("Shader variant not found; using the default >~<\n"); + print(" ^ mask was 0x%x\n", desc.shader_masks[i]); + } else + mod = shader.modules[i][idx].mod; + assert(idx >= 0); auto& si = sis[i]; si.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; si.flags = 0; si.stage = Shader_Vk::stage((Shader_Type)i); - si.module = shader.modules[i]; - si.pName = shader.entrypoints[i]; + si.module = mod; + si.pName = "main"; count++; } } @@ -2823,7 +2964,8 @@ void Pipeline_Vk::init_msaa( Arena& scope, Device_Vk* dev, VkGraphicsPipelineCreateInfo& info, - const Pipeline& desc + const Pipeline& desc, + const Render_Pass& rpo ) { VkPipelineMultisampleStateCreateInfo& mi = *(VkPipelineMultisampleStateCreateInfo*)arena_alloc( @@ -2835,7 +2977,7 @@ void Pipeline_Vk::init_msaa( zero(&mi, sizeof mi); mi.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; mi.sampleShadingEnable = VK_FALSE; - mi.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + mi.rasterizationSamples = dev->get_samples(rpo.get_samples()); info.pMultisampleState = &mi; } @@ -3030,7 +3172,7 @@ void Pipeline_Vk::init(Device_Vk* dev, const Pso_Key& key) { init_input_assembly(scope, dev, info, desc); init_viewport(scope, info, desc); init_rasterisation(scope, dev, info, desc); - init_msaa(scope, dev, info, desc); + init_msaa(scope, dev, info, desc, key.rpo.rpo); init_depthstencil(scope, dev, info, desc); init_blending(scope, dev, info, key.rpo.rpo, desc); info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; @@ -3282,9 +3424,8 @@ void Vertex_Format_Vk::optimise(const Vertex_Format_Vk* shadervf) { } } -bool Shader_Vk::init_module( +VkShaderModule Shader_Vk::make_module( Device_Vk* dev, - int stage, char* buf, int size ) { @@ -3295,8 +3436,9 @@ bool Shader_Vk::init_module( mi.codeSize = size; mi.pCode = (uint32_t*)buf; r = vkCreateShaderModule(dev->dev, &mi, &dev->ac, &m); - modules[stage] = m; - return r == VK_SUCCESS; + if (r == VK_SUCCESS) + return m; + return 0; } int Shader_Vk::Vertex_Format::find_binding(const char* name) { @@ -3408,8 +3550,11 @@ void Shader_Vk::Vertex_Format::destroy(Device_Vk* dev) { void Shader_Vk::destroy(Device_Vk* dev) { int i; for (i = 0; i < shader_type_count; i++) - if (modules[i]) - vkDestroyShaderModule(dev->dev, modules[i], &dev->ac); + if (modules[i]) { + int j, e = module_count[i]; + for (j = 0; j < e; j++) + vkDestroyShaderModule(dev->dev, modules[i][j].mod, &dev->ac); + } vfd.destroy(dev); heap_free(dev->heap, descs); dev->destroy_vertex_format(vf); @@ -3440,6 +3585,11 @@ int Shader::descriptor_binding(const char* name) { return sh->descs[idx].slot; } +int Shader::opt_mask(Shader_Type type, const char* name) { + Shader_Vk* sh = (Shader_Vk*)this; + return sh->find_opt(type, name); +} + int Shader::descriptor_stage(int slot) { Shader_Vk* sh = (Shader_Vk*)this; int i; @@ -3560,7 +3710,8 @@ Texture_Id Device::create_texture( int d, int mip_count, int array_size, - Buffer_Id init + Buffer_Id init, + int samples ) { VkImageCreateInfo ii{}; VkResult r; @@ -3592,7 +3743,7 @@ Texture_Id Device::create_texture( ii.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; ii.usage = get_texture_usage(flags); ii.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ii.samples = VK_SAMPLE_COUNT_1_BIT; + ii.samples = dev->get_samples(samples); ii.flags = image_flags; r = vkCreateImage(dev->dev, &ii, &dev->ac, &image); if (r != VK_SUCCESS) { @@ -3650,7 +3801,8 @@ Texture_Id Device::create_texture( array_size, 0, 0, - false + false, + samples ); if (init) { Context& ctx = dev->acquire(); @@ -3713,7 +3865,8 @@ Texture_Id Device::alias_texture( array_size, start_mip, start_array, - true + true, + texture.samples ); nt.set_name(dev, name); return ntid; @@ -3792,11 +3945,10 @@ Asset* Shader_Loader::load( Shader_Vk* shader; Shader_Id id; (void)s; - (void)a; (void)filename; id = dev->alloc_shader(); shader = (Shader_Vk*)&dev->get_shader(id); - if (!shader->init(dev, f)) { + if (!shader->init(dev, a, f)) { dev->shaders.remove(id); return 0; } @@ -3822,7 +3974,46 @@ int Shader_Vk::find_descriptor(const char* name) { return -1; } -bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { +int Shader_Vk::find_module( + Shader_Type type, + int mask +) { + int i; + int count = module_count[type]; + Shader_Module* arr = modules[type]; + int bucket = (int)( + fnv1a64((uint8_t*)&mask, sizeof mask) % + count + ); + for (i = 0; i < count; i++) { + Shader_Module& mod = arr[bucket]; + if (mod.mask == mask) + return bucket; + bucket = (bucket + 1) % count; + } + return -1; +} + +int Shader_Vk::find_opt( + Shader_Type type, + const char* name +) { + int count = opt_count, i; + int bucket = (int)( + hash_string(name) % + count + ); + int stage = 1 << type; + for (i = 0; i < count; i++) { + Option& o = options[bucket]; + if (string_equal(name, o.name) && (o.stage & stage)) + return o.mask; + bucket = (bucket + 1) % count; + } + return 0; +} + +bool Shader_Vk::init(Device_Vk* dev, Arena* a, Pack_File* f) { char magic[4]; int binding_count, target_count, i; pack_read(f, magic, 4); @@ -3836,6 +4027,7 @@ bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { pack_read(f, &binding_count, 4); pack_read(f, &target_count, 4); pack_read(f, &desc_count, 4); + pack_read(f, &opt_count, 4); assert(binding_count); vfd.binding_count = binding_count; if (!vfd.init(dev, f)) @@ -3883,24 +4075,60 @@ bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { desc_count * sizeof *descs ); pack_read(f, descs, desc_count * sizeof *descs); + options = (Option*)arena_alloc( + a, + opt_count * sizeof *options + ); + pack_read(f, options, opt_count * sizeof *options); for (i = 0; i < shader_type_count; i++) { - int o, s; - pack_read(f, &o, 4); - pack_read(f, &s, 4); - if (o) { - bool r; - int before = pack_tell(f); - char* buf = (char*)heap_alloc(dev->heap, s); - pack_seek(f, o, seek_rel_start); - pack_read(f, buf, s); - r = init_module(dev, i, buf, s); - heap_free(dev->heap, buf); - pack_seek(f, before, seek_rel_start); - if (!r) return false; + int c; + pack_read(f, &c, 4); + module_count[i] = c; + if (c) { + int o, s, mask; + int bucket, j; + Shader_Module* m = (Shader_Module*)arena_alloc( + a, + c * sizeof *m + ); + for (j = 0; j < c; j++) { + m[j].mask = -1; + } + for (j = 0; j < c; j++) { + int k; + pack_read(f, &o, 4); /* H_Variant */ + pack_read(f, &s, 4); + pack_read(f, &mask, 4); + pack_seek(f, 4, seek_rel_cur); + bucket = (int)( + fnv1a64((uint8_t*)&m, sizeof m) % + c + ); + for (k = 0; k < c; k++) { + Shader_Module& mod = m[bucket]; + if (mod.mask == -1) + goto found; + bucket = (bucket + 1) % c; + } + assert(0); + { + found: + char* buf = (char*)heap_alloc(dev->heap, s); + VkShaderModule r; + int before = pack_tell(f); + pack_seek(f, o, seek_rel_start); + pack_read(f, buf, s); + r = make_module(dev, buf, s); + heap_free(dev->heap, buf); + pack_seek(f, before, seek_rel_start); + if (!r) return false; + m[bucket] = Shader_Module { mask, r }; + } + } + modules[i] = m; } else { - modules[i] = VK_NULL_HANDLE; + modules[i] = 0; } - pack_read(f, entrypoints[i], 24); } return true; } @@ -3946,13 +4174,15 @@ Asset* Texture_Loader::load( char magic[4]; int w, h; size_t size; - Texture_Format fmt; + Texture_Format fmt = texture_format_r8i; + int mips = 0; (void)a; (void)s; pack_read(f, magic, 4); pack_read(f, &w, 4); pack_read(f, &h, 4); - pack_read(f, &fmt, 4); + pack_read(f, &fmt, 2); + pack_read(f, &mips, 2); size = calc_size(fmt, w, h); { Buffer_Id buf = dev->create_buffer( @@ -3961,9 +4191,6 @@ Asset* Texture_Loader::load( Buffer_Flags::copy_src | Buffer_Flags::cpu_readwrite ); - void* mem = dev->map_buffer(buf, 0, size); - pack_read(f, mem, size); - dev->unmap_buffer(buf); Texture_Id tex = dev->create_texture( filename, fmt, @@ -3971,11 +4198,25 @@ Asset* Texture_Loader::load( w, h, 1, + mips, 1, - 1, - buf + 0 ); - dev->destroy_buffer(buf); + { + int i; + for (i = 0; i < mips; i++) { + size = calc_size(fmt, w, h); + void* mem = dev->map_buffer(buf, 0, size); + pack_read(f, mem, size); + dev->unmap_buffer(buf); + auto& ctx = dev->acquire(); + ctx.copy(tex, buf, i, 0, 0, w, h); + dev->submit(ctx); + w >>= 1; + h >>= 1; + } + } + dev->destroy_bufferi(buf); return &dev->get_texture(tex); } } @@ -4002,7 +4243,8 @@ void Texture_Vk::init( int array_size, int start_mip, int start_array, - bool alias + bool alias, + int samples ) { t->id = id; t->parent = parent; @@ -4022,6 +4264,7 @@ void Texture_Vk::init( t->start_mip = start_mip; t->start_array = start_array; t->alias = alias; + t->samples = samples; } void Texture_Vk::destroy(Device_Vk* dev) { |