diff options
-rw-r--r-- | c2.cpp | 32 | ||||
-rw-r--r-- | convmodel.c | 3 | ||||
-rw-r--r-- | entity.hpp | 8 | ||||
-rw-r--r-- | intermediate/forward.h | 13 | ||||
-rw-r--r-- | intermediate/surface.glsl | 146 | ||||
-rw-r--r-- | lighting.cpp | 9 | ||||
-rw-r--r-- | lighting.hpp | 7 | ||||
-rw-r--r-- | model.cpp | 28 | ||||
-rw-r--r-- | model.hpp | 2 | ||||
-rw-r--r-- | pipeline.cpp | 13 | ||||
-rw-r--r-- | qstd/memory.c | 2 | ||||
-rw-r--r-- | renderer.cpp | 7 | ||||
-rw-r--r-- | sc/sc.cpp | 219 | ||||
-rw-r--r-- | video.cpp | 175 | ||||
-rw-r--r-- | video.hpp | 8 | ||||
-rw-r--r-- | world.cpp | 132 | ||||
-rw-r--r-- | world.hpp | 63 |
17 files changed, 646 insertions, 221 deletions
@@ -60,33 +60,6 @@ static Buffer_Id upload_verts(Device* dev) { return vbo; } -Texture_Id make_default_texture(Device* dev) { - unsigned* mem; - Texture_Id tex; - Buffer_Id buf = dev->create_buffer( - "default texture stage", - 4, - Buffer_Flags::copy_src | - Buffer_Flags::cpu_readwrite - ); - mem = (unsigned*)dev->map_buffer(buf, 0, 4); - mem[0] = 0xffffffff; - dev->unmap_buffer(buf); - tex = dev->create_texture( - "default PBR texture", - texture_format_rgba8i, - Texture_Flags::sampleable | Texture_Flags::copy_dst, - 1, - 1, - 1, - 1, - 1, - buf - ); - dev->destroy_bufferi(buf); - return tex; -} - static Sampler_Id create_clamped_linear(Device* dev) { Sampler_State s{}; s.min = Filter_Mode::linear; @@ -642,7 +615,6 @@ struct C2 : public App { Collider* box_col, * floor_col; Texture* texture; Texture* texture2; - Texture_Id default_texture; Entity_Id monkey, monkey2, box, floor; Model_Scene scene; Renderer renderer; @@ -694,11 +666,10 @@ struct C2 : public App { ); assets.init(&asset_arena, "pack", 128); dev = Device::create(&video_arena, this); - default_texture = make_default_texture(dev); make_hdr_target(); make_ui_texture(); model_loader.init(dev, &assets); - mat_loader.init(&assets, default_texture); + mat_loader.init(&assets); register_asset_loader("MODL", &model_loader); register_asset_loader("MTRL", &mat_loader); shader = (Shader*)assets.load("triangle.csh"); @@ -1035,7 +1006,6 @@ struct C2 : public App { ui->destroy(); deinit_editor(); assets.destroy(); - dev->destroy_texture(default_texture); dev->destroy_texture(hdr_target); dev->destroy_texture(hdr_resolved); dev->destroy_texture(ms_depth); diff --git a/convmodel.c b/convmodel.c index 31a062d..89f65c8 100644 --- a/convmodel.c +++ b/convmodel.c @@ -332,7 +332,7 @@ Shader_Attrib* parse_shader_attribs(const char* fname) { FILE* f; char magic[4]; int type, i; - int binding_count, target_count, desc_count; + int binding_count, target_count, desc_count, opt_count; char* fpath = arena_alloc( &arena, string_len(fname) + @@ -362,6 +362,7 @@ Shader_Attrib* parse_shader_attribs(const char* fname) { fread(&binding_count, 4, 1, f); fread(&target_count, 4, 1, f); fread(&desc_count, 4, 1, f); + fread(&opt_count, 4, 1, f); assert(binding_count); for (i = 0; i < binding_count; i++) { char name[24]; diff --git a/entity.hpp b/entity.hpp new file mode 100644 index 0000000..34cc84d --- /dev/null +++ b/entity.hpp @@ -0,0 +1,8 @@ +#ifndef entity_hpp +#define entity_hpp + +#include <stdint.h> + +using Entity_Id = uint32_t; + +#endif diff --git a/intermediate/forward.h b/intermediate/forward.h index 377a3cb..3bc2d50 100644 --- a/intermediate/forward.h +++ b/intermediate/forward.h @@ -17,9 +17,6 @@ type: vec3 name: caster_id type: int [variable] -name: type -type: int -[variable] name: range type: float @@ -40,6 +37,12 @@ type: int [variable] name: frame type: int +[variable] +name: sun_irange +type: ivec2 +[variable] +name: point_irange +type: ivec2 [cbuffer] name: globals @@ -48,8 +51,4 @@ stage: fragment #endif -/* match Light::Type in lighting.hpp */ -#define LT_SUN 0 -#define LT_POINT 1 - #endif diff --git a/intermediate/surface.glsl b/intermediate/surface.glsl index 79f5f65..cc287e4 100644 --- a/intermediate/surface.glsl +++ b/intermediate/surface.glsl @@ -9,6 +9,22 @@ fragment: main #ifdef DESC +[option] +name: albedomap +stage: fragment +[option] +name: aomap +stage: fragment +[option] +name: metalmap +stage: fragment +[option] +name: roughmap +stage: fragment +[option] +name: normalmap +stage: fragment + [binding] name: mesh rate: vertex @@ -142,8 +158,8 @@ void main() { #define pi 3.14159265358979323846 -vec3 diffuse_brdf(vec2 uv) { - vec3 a = material.albedo * texture(albedo, uv).rgb; +vec3 diffuse_brdf(vec2 uv, vec3 base) { + vec3 a = base; return a / pi; } @@ -157,7 +173,11 @@ float specular_G1(float a, vec3 v, vec3 n) { float specular_brdf(vec2 uv, vec3 ref, vec3 l, vec3 v, vec3 n) { float ndl = max(dot(n, l), 0.0); float ndv = max(dot(n, v), 0.0); +#if OPT_roughmap float a = texture(rough, uv).r * material.roughness; +#else + float a = material.roughness; +#endif float a2 = a * a; float ndr = max(dot(n, ref), 0.0); float b = ((ndr * ndr) * (a2 - 1) + 1); @@ -215,61 +235,115 @@ float get_shadow(Light l, vec3 wpos) { return d; } +vec3 apply_light( + Light l, + vec3 p, + vec2 uv, + vec3 base_diffuse, + vec3 spec_col, + vec3 ref, + vec3 light_dir, + vec3 view_dir, + vec3 nrm, + float cos_theta_i, + float atten +) { + vec3 diffuse = base_diffuse * cos_theta_i; + vec3 spec = + spec_col * + specular_brdf(uv, ref, light_dir, view_dir, nrm) * + cos_theta_i; + float shadow = 1.0f; + if (l.caster_id >= 0) + shadow = get_shadow(l, p); + return (diffuse + spec) * atten * l.brightness * l.colour * shadow; +} + void main() { - int i; + int i, e; vec2 uv = interpolator.uv; vec3 p = interpolator.position.xyz; +#if OPT_normalmap vec3 nrmsample = texture(normal, uv).rgb; vec2 nrmxy = nrmsample.xy * 2.0 - 1.0; vec3 nrm = normalize(vec3(nrmxy, 1.0)); - if (nrmsample.b == 1.0) /* default texture */ - nrm = normalize(interpolator.tbn[2]); - else - nrm = normalize(interpolator.tbn * nrm); + nrm = normalize(interpolator.tbn * nrm); +#else + vec3 nrm = normalize(interpolator.tbn[2]); +#endif +#if OPT_albedomap vec3 col = texture(albedo, uv).rgb * material.albedo; - vec3 view_dir = normalize(globals.camera_pos - p); +#else + vec3 col = material.albedo; +#endif + +#if OPT_metalmap float met = texture(metal, uv).r * material.metalness; +#else + float met = material.metalness; +#endif + + vec3 view_dir = normalize(globals.camera_pos - p); vec3 ref = reflect(-view_dir, nrm); vec3 ref_col = texture(env_cube, ref, material.roughness * 8.0).rgb; vec3 spec_col = mix(ref_col, ref_col * col, met); vec3 amb_col = min(textureLod(env_cube, nrm, 8.0).rgb, 0.05); +#if OPT_aomap vec3 ambient = amb_col * texture(ao, uv).r * material.ao; +#else + vec3 ambient = amb_col * material.ao; +#endif - vec3 base_diffuse = diffuse_brdf(uv) * (1.0 - met); + vec3 base_diffuse = diffuse_brdf(uv, col) * (1.0 - met); vec3 light = 0.0.xxx; - for (i = 0; i < globals.light_count; i++) { + e = globals.sun_irange.y; + for (i = globals.sun_irange.x; i < e; i++) { + Light l = lights[i]; + vec3 light_dir = l.pos; + float cos_theta_i = max(dot(nrm, light_dir), 0.0); + float atten = 1.0; + light += apply_light( + l, + p, + uv, + base_diffuse, + spec_col, + ref, + light_dir, + view_dir, + nrm, + cos_theta_i, + atten + ); + } + e = globals.point_irange.y; + for (i = globals.point_irange.x; i < e; i++) { Light l = lights[i]; - vec3 light_dir; - float cos_theta_i, atten; - switch (l.type) { - case LT_SUN: - light_dir = l.pos; - cos_theta_i = max(dot(nrm, light_dir), 0.0); - atten = 1.0; - break; - case LT_POINT: { - float d; - light_dir = p - l.pos; - d = length(light_dir); - atten = max(d, 0.01); - atten = 1.0 / (atten * atten); - light_dir /= d; - } break; - } - vec3 diffuse = base_diffuse * cos_theta_i; - vec3 spec = - spec_col * - specular_brdf(uv, ref, light_dir, view_dir, nrm) * - cos_theta_i; - float shadow = 1.0f; - if (l.caster_id >= 0) - shadow = get_shadow(l, p); - light += (diffuse + spec) * atten * l.brightness * l.colour * shadow; + float cos_theta_i; + vec3 light_dir = l.pos - p; + float d = length(light_dir); + float atten = max(d, 0.01); + atten = 1.0 / (atten * atten); + light_dir /= d; + cos_theta_i = max(dot(nrm, light_dir), 0.0); + light += apply_light( + l, + p, + uv, + base_diffuse, + spec_col, + ref, + light_dir, + view_dir, + nrm, + cos_theta_i, + atten + ); } colour = vec4(ambient + light, 1.0); diff --git a/lighting.cpp b/lighting.cpp index bf01831..b140611 100644 --- a/lighting.cpp +++ b/lighting.cpp @@ -15,9 +15,8 @@ struct GPU_Light { float brightness; v3f colour; int caster_id; - int type; float range; - int pad[2]; + int pad[3]; }; struct GPU_Caster { @@ -101,6 +100,7 @@ void Lighting::write_bufs( GPU_Light* ldst = (GPU_Light*)lptr; GPU_Caster* cdst = (GPU_Caster*)cptr; int count = 0, ccount = 0; + sun_range[0] = count; for (auto v : w.view<Sun_Light>()) { GPU_Light gl; Sun_Light& l = v.get<Sun_Light>(); @@ -108,7 +108,6 @@ void Lighting::write_bufs( print_war("Over light limit.\n"); goto cancel; } - gl.type = (int)Light::Type::sun; gl.brightness = l.brightness; gl.colour = l.colour; gl.pos = l.dir; @@ -127,6 +126,8 @@ void Lighting::write_bufs( gl.caster_id = -1; ldst[count++] = gl; } + sun_range[1] = count; + point_range[0] = count; for (auto v : w.view<Transform, Point_Light>()) { GPU_Light gl; Transform& t = v.get<Transform>(); @@ -135,7 +136,6 @@ void Lighting::write_bufs( print_war("Over light limit.\n"); goto cancel; } - gl.type = (int)Light::Type::point; gl.brightness = l.brightness; gl.colour = l.colour; gl.pos = v3f( @@ -147,6 +147,7 @@ void Lighting::write_bufs( gl.range = l.range; ldst[count++] = gl; } + point_range[1] = count; cancel: light_count = count; caster_count = ccount; diff --git a/lighting.hpp b/lighting.hpp index 38e95ae..7c57bab 100644 --- a/lighting.hpp +++ b/lighting.hpp @@ -21,6 +21,8 @@ struct Lighting { Texture_Id shadow_slices[max_shadows]; Sampler_Id shadow_sampler; Camera_Id cameras[max_shadows]; + int sun_range[2]; + int point_range[2]; int light_count, caster_count; void init(Device* dev); void destroy(Device* dev, Renderer& r); @@ -41,11 +43,6 @@ struct Lighting { }; struct Light { - enum class Type { - sun, - point - }; - v3f colour; float brightness; bool caster; @@ -32,20 +32,26 @@ void Material::use( Sampler_Id sampler, Shader& shader ) { - auto bind = [&shader, &pb, sampler]( + int opt = 0; + auto bind = [&shader, &pb, &opt, sampler]( const char* name, + const char* optname, Texture_Id t ) { int loc = shader.descriptor_binding(name); if (loc >= 0) { - pb.texture(loc, t, sampler); + if (t) { + pb.texture(loc, t, sampler); + opt |= shader.opt_mask(shader_type_fragment, optname); + } } }; - bind("albedo", tex.albedo); - bind("ao", tex.ao); - bind("metal", tex.metal); - bind("rough", tex.rough); - bind("normal", tex.normal); + bind("albedo", "albedomap", tex.albedo); + bind("ao", "aomap", tex.ao); + bind("metal", "metalmap", tex.metal); + bind("rough", "roughmap", tex.rough); + bind("normal", "normalmap", tex.normal); + pb.option(shader_type_fragment, opt); } void Model_Loader::init(Device* device, Asset_Arena* shader_arena) { @@ -214,10 +220,8 @@ void Model::update_transforms() { } void Material_Loader::init( - Asset_Arena* texture_arena, - Texture_Id dt + Asset_Arena* texture_arena ) { - default_tex = dt; textures = texture_arena; } @@ -235,11 +239,11 @@ Asset* Material_Loader::load( return r; }; auto read_tex = [&](int len) { - if (!len) return default_tex; + if (!len) return Texture_Id(0); const char* name = read_name(len); auto t = (Texture*)textures->load(name); if (!t) - return default_tex; + return Texture_Id(0); return t->id; }; (void)filename; @@ -85,7 +85,7 @@ struct Model_Loader : public Asset_Loader { struct Material_Loader : public Asset_Loader { Asset_Arena* textures; Texture_Id default_tex; - void init(Asset_Arena* texture_arena, Texture_Id dt); + void init(Asset_Arena* texture_arena); Asset* load( Arena* a, Arena* s, diff --git a/pipeline.cpp b/pipeline.cpp index 374dad8..4d34ebd 100644 --- a/pipeline.cpp +++ b/pipeline.cpp @@ -254,6 +254,14 @@ Pipeline_Builder& Pipeline_Builder::shader(Shader_Id s) { return *this; } +Pipeline_Builder& Pipeline_Builder::option( + Shader_Type type, + int mask +) { + pip->shader_masks[type] = mask; + return *this; +} + Pipeline_Builder& Pipeline_Builder::texture( int binding, Texture_Id t, @@ -321,6 +329,7 @@ Pipeline& Pipeline_Builder::build() { } void Pipeline::hash() { + int i; #define h(n, v) \ n = fnv1a64_2(n, (uint8_t*)&v, sizeof v) pipeline_hash = fnv1a64(0, 0); @@ -347,8 +356,10 @@ void Pipeline::hash() { h(pipeline_hash, blend_src_alpha); h(pipeline_hash, blend_dst_alpha); h(pipeline_hash, cull_mode); + for (i = 0; i < shader_type_count; i++) + h(pipeline_hash, shader_masks[i]); { - int i, e = descriptor_count; + int e = descriptor_count; descriptor_resource_hash = fnv1a64(0, 0); for (i = 0; i < e; i++) { Descriptor* d = &descriptors[i]; diff --git a/qstd/memory.c b/qstd/memory.c index 87618f9..6db9ad9 100644 --- a/qstd/memory.c +++ b/qstd/memory.c @@ -86,7 +86,7 @@ void arena_push(Arena* a) { void arena_pop(Arena* a) { assert(a->last_push); - a->ptr -= a->last_push; + a->ptr = a->last_push; a->last_push = *(int*)&a->buf[a->ptr]; } diff --git a/renderer.cpp b/renderer.cpp index 64c3795..6208d3c 100644 --- a/renderer.cpp +++ b/renderer.cpp @@ -15,6 +15,9 @@ struct Global_Cbuffer { v3f camera_pos; int light_count; int frame; + int pad; + int sun_irange[2]; + int point_irange[2]; }; void init_drawlist( @@ -119,6 +122,10 @@ void Renderer::update_globals( cb->camera_pos = cp; cb->frame = frame; cb->light_count = l->light_count; + cb->sun_irange[0] = l->sun_range[0]; + cb->sun_irange[1] = l->sun_range[1]; + cb->point_irange[0] = l->point_range[0]; + cb->point_irange[1] = l->point_range[1]; globals.unmap(d); globals.update(ctx); } @@ -225,6 +225,10 @@ struct Desc { int stage; std::string strct; }; + struct Option { + int stage; + int mask; + }; int type; std::vector<Binding> bindings; std::vector<Variable> trgts; @@ -233,8 +237,10 @@ struct Desc { std::unordered_map<std::string, Texture> textures; std::unordered_map<std::string, CBuffer> cbuffers; std::unordered_map<std::string, SBuffer> sbuffers; + std::unordered_map<std::string, Option> options; std::vector<Descriptor> descriptors; std::string entrypoints[shader_type_count]; + int copts[shader_type_count]; void read_var(Variable& d, cfg_Object* desc) { const char* sname = find_string_default(desc, "name", 0); if (!sname) { @@ -397,10 +403,40 @@ struct Desc { buf.stage |= 1 << stage_from_string(sstage); } + void read_opt(cfg_Object* desc) { + const char* sname = find_string_default(desc, "name", 0); + if (!sname) { + print_err("%s must have a name.\n", desc->name); + pbreak(1001); + } + const char* sstage = find_string_default(desc, "stage", 0); + if (!sstage) { + print_err("%s must define a stage.\n", sname); + pbreak(1002); + } + std::string n(sname); + if (n.size() > 23) { + print_err("Option name %s is too long (max 23 chars).\n", sname); + pbreak(1003); + } + if (!options.contains(n)) { + Option& o = options[n]; + o.mask = 0; + o.stage = 0; + } + int stage = stage_from_string(sstage); + Option& opt = options[n]; + opt.stage |= 1 << stage; + opt.mask = copts[stage]; + copts[stage] <<= 1; + } + void build(cfg_Object* desc) { int i; Binding* cur_binding = 0; type = get_program_type(desc); + for (i = 0; i < shader_type_count; i++) + copts[i] = 1; if (type != sprogram_type_graphics) { assert(0); /* todo */ return; @@ -450,6 +486,8 @@ struct Desc { } else if (!strcmp(desc->name, "struct")) { desc = read_struct(desc); continue; + } else if (!strcmp(desc->name, "option")) { + read_opt(desc); } desc = desc->next; } @@ -628,22 +666,35 @@ std::vector<uint32_t> compile_shader( const char* src, const char* define, int stage, + int opt, EShLanguage lang ) { std::string vars = d.build_vs(); - const char* srcs[] = { + std::vector<const char*> srcs = { glsl_version_s, "\n", "#define ", define, "\n", builtin_src, - presrc, - src + presrc }; - const char* src_names[] = { + std::vector<const char*> src_names = { sname, sname, sname, sname, sname, - sname, sname, sname + sname, sname }; - static_assert(sizeof srcs == sizeof src_names); + for (const auto& p : d.options) { + const auto& o = p.second; + std::string def = "#define OPT_" + p.first + " "; + if (opt & o.mask) + def += "1\n"; + else + def += "0\n"; + /* memory leak lol */ + srcs.push_back(strdup(def.c_str())); + src_names.push_back(sname); + } + srcs.push_back(src); + src_names.push_back(sname); + assert(src_names.size() == srcs.size()); glslang::TShader shader(lang); glslang::TProgram program; glslang::TIntermediate* ir; @@ -663,10 +714,10 @@ std::vector<uint32_t> compile_shader( #endif EShMessages msg = (EShMessages)(EShMsgSpvRules | EShMsgVulkanRules); shader.setStringsWithLengthsAndNames( - srcs, + &srcs[0], 0, - src_names, - sizeof srcs / sizeof *srcs + &src_names[0], + src_names.size() ); shader.setEnvClient(glslang::EShClientVulkan, client_version); shader.setEnvTarget(glslang::EShTargetSpv, target_version); @@ -736,10 +787,23 @@ void configure( } } +struct H_Variant { + int o = -1, s; + int mask; + int pad = 0; +}; +struct H_Option { + char name[24] = { 0 }; + int mask; + int stage; +}; +static_assert(sizeof(H_Variant) == 16); +using Variant_Map = std::unordered_map<int, std::vector<uint32_t>>; + void compile_shaders( const char* sname, const char* fname, - std::vector<uint32_t>* spv, + Variant_Map* spv, const char* src, Desc& d ) { @@ -747,19 +811,40 @@ void compile_shaders( EShLanguage lang; int i; for (i = 0; i < shader_type_count; i++) { + int sm = 1 << i; if (!d.entrypoints[i].empty()) { std::string ps; + int opt = 0, j; configure(d, i, define, lang, ps); - spv[i] = compile_shader( - d, - sname, - fname, - ps.c_str(), - src, - define, - i, - lang - ); + for (const auto& p : d.options) { + if (~p.second.stage & sm) continue; + opt |= p.second.mask; + } + for (j = 0; j <= opt; j++) { + spv[i][j] = compile_shader( + d, + sname, + fname, + ps.c_str(), + src, + define, + i, + j, + lang + ); + } + if (!opt) + spv[i][0] = compile_shader( + d, + sname, + fname, + ps.c_str(), + src, + define, + i, + 0, + lang + ); } } } @@ -767,10 +852,14 @@ void compile_shaders( void write_csh( const char* fname, const Desc& d, - const std::vector<uint32_t>* stages + Variant_Map* stages ) { - int hsize = 20, i, coff; + int hsize = 24, i, coff; FILE* f = fopen(fname, "wb"); + H_Variant* variants[shader_type_count]; + H_Option* hopts = 0; + int vc[shader_type_count]; + std::vector<const std::vector<uint32_t>*> order; if (!f) { print_err("Failed to open %s\n", fname); pbreak(500); @@ -781,6 +870,7 @@ void write_csh( c = d.bindings.size(); fwrite(&c, 4, 1, f); c = d.trgts.size(); fwrite(&c, 4, 1, f); c = d.descriptors.size(); fwrite(&c, 4, 1, f); + c = d.options.size(); fwrite(&c, 4, 1, f); for (const auto& b : d.bindings) { char buf[24]; int count = b.attrs.size(); @@ -816,36 +906,79 @@ void write_csh( fwrite(&d.stage, 4, 1, f); hsize += 32; } - hsize += shader_type_count * 32; - for (i = 0, coff = 0; i < shader_type_count; i++) { - int o = 0; - char buf[24]; - memset(buf, 0, sizeof buf); - if (d.entrypoints[i].empty()) { - fwrite(&o, 4, 1, f); - fwrite(&o, 4, 1, f); - } else { - int size = stages[i].size() * sizeof(uint32_t); - strcpy(buf, d.entrypoints[i].c_str()); - o = hsize + coff; - fwrite(&o, 4, 1, f); - fwrite(&size, 4, 1, f); - coff += size; + if (d.options.size()) + hopts = new H_Option[d.options.size()]; + for (const auto& p : d.options) { + const auto& name = p.first; + const auto& o = p.second; + int count = d.options.size(); + int j, bucket = (int)( + hash_string(name.c_str()) % + count + ); + for (j = 0; j < count; j++) { + auto& ho = hopts[j]; + if (!ho.name[0]) { + strcpy(ho.name, name.c_str()); + ho.mask = o.mask; + ho.stage = o.stage; + goto oklmao; + } + bucket = (bucket + 1) % count; } - fwrite(buf, 1, sizeof buf, f); + assert(0); + oklmao: + hsize += 32; } - for (i = 0; i < shader_type_count; i++) - if (!d.entrypoints[i].empty()) { - auto& stage = stages[i]; - fwrite(&stage[0], sizeof(uint32_t), stage.size(), f); + fwrite(hopts, sizeof *hopts, d.options.size(), f); + delete[] hopts; + for (i = 0; i < shader_type_count; i++) { + vc[i] = stages[i].size(); + hsize += vc[i] * 16 + 4; + variants[i] = new H_Variant[vc[i]]; + } + for (i = 0, coff = 0; i < shader_type_count; i++) { + fwrite(&vc[i], 4, 1, f); + for (const auto& p : stages[i]) { + int mask = p.first, j; + int bucket = (int)( + fnv1a64((uint8_t*)&mask, sizeof mask) % + vc[i] + ); + for (j = 0; j < vc[i]; j++) { + H_Variant& v = variants[i][bucket]; + if (v.o == -1) { + auto& arr = p.second; + v.o = coff + hsize; + v.s = arr.size() * sizeof(uint32_t); + v.mask = mask; + coff += v.s; + order.push_back(&arr); + goto done; + } + bucket = (bucket + 1) % vc[i]; + } + assert(0); + done:; } + fwrite(variants[i], sizeof(H_Variant), vc[i], f); + delete[] variants[i]; + } + for (auto bytecode : order) { + fwrite( + &(*bytecode)[0], + sizeof(uint32_t), + bytecode->size(), + f + ); + } } int main(int argc, const char** argv) { char* src; size_t src_size; std::string desc_src; - std::vector<uint32_t> spv[shader_type_count]; + Variant_Map spv[shader_type_count]; cfg_Object* cdesc; void* dp_mem; Desc desc; @@ -430,6 +430,12 @@ enum { context_state_init = 1 << 1 }; + +struct Shader_Module { + int mask; + VkShaderModule mod; +}; + struct Shader_Vk : public Shader, public Late_Terminated { struct Attribute { char name[28]; @@ -464,23 +470,31 @@ struct Shader_Vk : public Shader, public Late_Terminated { int stage; }; + struct Option { + char name[24]; + int mask; + int stage; + }; + SProgram_Type type; - VkShaderModule modules[shader_type_count]; - char entrypoints[shader_type_count][24]; Vertex_Format vfd; Desc* descs; - int desc_count; + Shader_Module* modules[shader_type_count]; + Option* options; + int desc_count, opt_count; + int module_count[shader_type_count]; - bool init(Device_Vk* dev, Pack_File* f); - bool init_module( + bool init(Device_Vk* dev, Arena* a, Pack_File* f); + VkShaderModule make_module( Device_Vk* dev, - int stage, char* buf, int size ); void destroy(Device_Vk* dev) override; int find_descriptor(const char* name); + int find_module(Shader_Type type, int mask); + int find_opt(Shader_Type, const char* name); static VkShaderStageFlagBits stage(Shader_Type type) { switch (type) { @@ -2056,6 +2070,8 @@ void Device::present() { VkSubmitInfo si{}; VkPipelineStageFlags stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + heap_defrag(dev->heap); + // ^ this makes it >4x the speed ctx->check_end_rp(); ctx->transition( dev->get_backbuffer(), @@ -2811,12 +2827,24 @@ void Pipeline_Vk::init_stages( zero(sis, sizeof *sis * count); for (i = 0, count = 0; i < shader_type_count; i++) { if (shader.modules[i]) { + int idx = shader.find_module( + (Shader_Type)i, + desc.shader_masks[i] + ); + VkShaderModule mod; + if (idx < 0) { + mod = shader.modules[i][0].mod; + print_war("Shader variant not found; using the default >~<\n"); + print(" ^ mask was 0x%x\n", desc.shader_masks[i]); + } else + mod = shader.modules[i][idx].mod; + assert(idx >= 0); auto& si = sis[i]; si.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; si.flags = 0; si.stage = Shader_Vk::stage((Shader_Type)i); - si.module = shader.modules[i]; - si.pName = shader.entrypoints[i]; + si.module = mod; + si.pName = "main"; count++; } } @@ -3396,9 +3424,8 @@ void Vertex_Format_Vk::optimise(const Vertex_Format_Vk* shadervf) { } } -bool Shader_Vk::init_module( +VkShaderModule Shader_Vk::make_module( Device_Vk* dev, - int stage, char* buf, int size ) { @@ -3409,8 +3436,9 @@ bool Shader_Vk::init_module( mi.codeSize = size; mi.pCode = (uint32_t*)buf; r = vkCreateShaderModule(dev->dev, &mi, &dev->ac, &m); - modules[stage] = m; - return r == VK_SUCCESS; + if (r == VK_SUCCESS) + return m; + return 0; } int Shader_Vk::Vertex_Format::find_binding(const char* name) { @@ -3522,8 +3550,11 @@ void Shader_Vk::Vertex_Format::destroy(Device_Vk* dev) { void Shader_Vk::destroy(Device_Vk* dev) { int i; for (i = 0; i < shader_type_count; i++) - if (modules[i]) - vkDestroyShaderModule(dev->dev, modules[i], &dev->ac); + if (modules[i]) { + int j, e = module_count[i]; + for (j = 0; j < e; j++) + vkDestroyShaderModule(dev->dev, modules[i][j].mod, &dev->ac); + } vfd.destroy(dev); heap_free(dev->heap, descs); dev->destroy_vertex_format(vf); @@ -3554,6 +3585,11 @@ int Shader::descriptor_binding(const char* name) { return sh->descs[idx].slot; } +int Shader::opt_mask(Shader_Type type, const char* name) { + Shader_Vk* sh = (Shader_Vk*)this; + return sh->find_opt(type, name); +} + int Shader::descriptor_stage(int slot) { Shader_Vk* sh = (Shader_Vk*)this; int i; @@ -3909,11 +3945,10 @@ Asset* Shader_Loader::load( Shader_Vk* shader; Shader_Id id; (void)s; - (void)a; (void)filename; id = dev->alloc_shader(); shader = (Shader_Vk*)&dev->get_shader(id); - if (!shader->init(dev, f)) { + if (!shader->init(dev, a, f)) { dev->shaders.remove(id); return 0; } @@ -3939,7 +3974,46 @@ int Shader_Vk::find_descriptor(const char* name) { return -1; } -bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { +int Shader_Vk::find_module( + Shader_Type type, + int mask +) { + int i; + int count = module_count[type]; + Shader_Module* arr = modules[type]; + int bucket = (int)( + fnv1a64((uint8_t*)&mask, sizeof mask) % + count + ); + for (i = 0; i < count; i++) { + Shader_Module& mod = arr[bucket]; + if (mod.mask == mask) + return bucket; + bucket = (bucket + 1) % count; + } + return -1; +} + +int Shader_Vk::find_opt( + Shader_Type type, + const char* name +) { + int count = opt_count, i; + int bucket = (int)( + hash_string(name) % + count + ); + int stage = 1 << type; + for (i = 0; i < count; i++) { + Option& o = options[bucket]; + if (string_equal(name, o.name) && (o.stage & stage)) + return o.mask; + bucket = (bucket + 1) % count; + } + return 0; +} + +bool Shader_Vk::init(Device_Vk* dev, Arena* a, Pack_File* f) { char magic[4]; int binding_count, target_count, i; pack_read(f, magic, 4); @@ -3953,6 +4027,7 @@ bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { pack_read(f, &binding_count, 4); pack_read(f, &target_count, 4); pack_read(f, &desc_count, 4); + pack_read(f, &opt_count, 4); assert(binding_count); vfd.binding_count = binding_count; if (!vfd.init(dev, f)) @@ -4000,24 +4075,60 @@ bool Shader_Vk::init(Device_Vk* dev, Pack_File* f) { desc_count * sizeof *descs ); pack_read(f, descs, desc_count * sizeof *descs); + options = (Option*)arena_alloc( + a, + opt_count * sizeof *options + ); + pack_read(f, options, opt_count * sizeof *options); for (i = 0; i < shader_type_count; i++) { - int o, s; - pack_read(f, &o, 4); - pack_read(f, &s, 4); - if (o) { - bool r; - int before = pack_tell(f); - char* buf = (char*)heap_alloc(dev->heap, s); - pack_seek(f, o, seek_rel_start); - pack_read(f, buf, s); - r = init_module(dev, i, buf, s); - heap_free(dev->heap, buf); - pack_seek(f, before, seek_rel_start); - if (!r) return false; + int c; + pack_read(f, &c, 4); + module_count[i] = c; + if (c) { + int o, s, mask; + int bucket, j; + Shader_Module* m = (Shader_Module*)arena_alloc( + a, + c * sizeof *m + ); + for (j = 0; j < c; j++) { + m[j].mask = -1; + } + for (j = 0; j < c; j++) { + int k; + pack_read(f, &o, 4); /* H_Variant */ + pack_read(f, &s, 4); + pack_read(f, &mask, 4); + pack_seek(f, 4, seek_rel_cur); + bucket = (int)( + fnv1a64((uint8_t*)&m, sizeof m) % + c + ); + for (k = 0; k < c; k++) { + Shader_Module& mod = m[bucket]; + if (mod.mask == -1) + goto found; + bucket = (bucket + 1) % c; + } + assert(0); + { + found: + char* buf = (char*)heap_alloc(dev->heap, s); + VkShaderModule r; + int before = pack_tell(f); + pack_seek(f, o, seek_rel_start); + pack_read(f, buf, s); + r = make_module(dev, buf, s); + heap_free(dev->heap, buf); + pack_seek(f, before, seek_rel_start); + if (!r) return false; + m[bucket] = Shader_Module { mask, r }; + } + } + modules[i] = m; } else { - modules[i] = VK_NULL_HANDLE; + modules[i] = 0; } - pack_read(f, entrypoints[i], 24); } return true; } @@ -134,12 +134,18 @@ struct Pipeline { Vertex_Format_Id vertex_format; Shader_Id shader; int samples; + int shader_masks[shader_type_count]; Descriptor descriptors[pipeline_max_descriptors]; int descriptor_count; void hash(); bool pipeline_eq(const Pipeline& other) const { + int i; + for (i = 0; i < shader_type_count; i++) { + if (other.shader_masks[i] != shader_masks[i]) + return false; + } return shader == other.shader && vertex_format == other.vertex_format && @@ -333,6 +339,7 @@ struct Pipeline_Builder { ); PB& cull(Cull_Mode mode); PB& shader(Shader_Id s); + PB& option(Shader_Type type, int mask); PB& vertex_format(Vertex_Format_Id vf); PB& texture(int binding, Texture_Id t, Sampler_Id s); PB& cbuffer( @@ -588,6 +595,7 @@ struct Shader : public Asset { int attribute_index(const char* name); int target_index(const char* name); int descriptor_binding(const char* name); + int opt_mask(Shader_Type type, const char* name); int descriptor_stage(int slot); }; @@ -8,12 +8,33 @@ extern "C" { static int component_sizes[max_components]; +void Slinky::init(Arena* a, int size) { + data = arena_alloc(a, size * slinky_size); + entities = (Entity_Id*)arena_alloc( + a, + sizeof *entities * slinky_size + ); + count = 0; +} + +void* Slinky::add(Entity_Id eid, int size, int& mapping) { + int idx = count++; + assert(count <= slinky_size); + entities[idx] = eid; + mapping = idx; + return &((char*)data)[idx * size]; +} + +void* Slinky::get(Entity_Id eid, int mapping, int size) { + char* e = &((char*)data)[mapping * size]; + assert(eid == entities[mapping]); + (void)eid; + return e; +} + void Pool::init(Arena* a, Component_Mask m) { int i, coff; - next = 0; - size = 0; mask = m; - count = 0; for (i = 0, coff = 0; i < max_components; i++) { if (m & ((Component_Mask)1 << i)) { offsets[i] = coff; @@ -21,50 +42,76 @@ void Pool::init(Arena* a, Component_Mask m) { } } size = coff; - data = arena_alloc(a, size * pool_cap); + slinkies[0].init(a, size); + slinky_count = 1; + count = 0; } -void* Pool::add(Arena* a, Entity_Id eid) { - int idx; - if (next) - return next->add(a, eid); - if (count >= pool_cap) { - next = (Pool*)arena_alloc(a, sizeof *next); - next->init(a, mask); - return next->add(a, eid); +Slinky& Pool::get_slinky(Arena* a) { + int i, e = slinky_count; + for (i = 0; i < e; i++) { + Slinky& s = slinkies[i]; + if (s.count < slinky_size) + return s; } - idx = count++; - entities[idx] = eid; - mapping[entity_index(eid)] = idx; - return &((char*)data)[idx * size]; + assert(slinky_count < max_slinkies); + Slinky& s = slinkies[slinky_count++]; + s.init(a, size); + return s; +} + +void* Pool::add(Arena* a, Entity_Id eid) { + Slinky& slinky = get_slinky(a); + Mapping& m = mapping[entity_index(eid)]; + m.slinky = &slinky - slinkies; + count++; + return slinky.add(eid, size, m.mapping); } void Pool::remove(Entity_Id eid) { - int eind = entity_index(eid); - int end = count - 1; - assert(eid == entities[mapping[eind]]); - memmove( - (char*)data + eind * size, - (char*)data + end * size, - size - ); - entities[eind] = entities[count]; - mapping[end] = eind; - count = end; + Mapping& m = mapping[entity_index(eid)]; + Slinky& s = slinkies[m.slinky]; + assert(m.slinky >= 0); + if (count > 1) { + int idx = slinky_count - 1; + Entity_Id le; + void* last; +#ifdef DEBUG + last = 0; +#endif + for (; idx >= 0; idx--) { + Slinky& s = slinkies[idx]; + int& c = s.count; + if (c) { + c--; + last = (char*)s.data + c * size; + le = s.entities[c]; + s.entities[c] = 0; + break; + } + } + assert(last != 0); + memcpy((char*)s.data + m.mapping * size, last, size); + s.entities[m.mapping] = le; + mapping[entity_index(le)] = m; + m = Mapping { -1, -1 }; + } else + s.count--; + count--; } void* Pool::get(Entity_Id eid, int cid) { - int eind = entity_index(eid); - char* e = &((char*)data)[mapping[eind] * size]; - assert(eid == entities[mapping[eind]]); - return &e[offsets[cid]]; + Mapping m = mapping[entity_index(eid)]; + Slinky& s = slinkies[m.slinky]; + char* ptr = (char*)s.get(eid, m.mapping, size); + return ptr + offsets[cid]; } void* Pool::get(Entity_Id eid) { - int eind = entity_index(eid); - void* e = &((char*)data)[mapping[eind] * size]; - assert(eid == entities[mapping[eind]]); - return e; + Mapping m = mapping[entity_index(eid)]; + Slinky& s = slinkies[m.slinky]; + char* ptr = (char*)s.get(eid, m.mapping, size); + return ptr; } int get_new_component_id(int size) { @@ -91,6 +138,7 @@ void World::init(Arena* a) { arena = a; count = 0; free_count = 0; + dq_count = 0; for (i = 0; i < max_entities; i++) versions[i] = 1; for (i = 0; i < max_pools; i++) @@ -166,7 +214,6 @@ void* World::get(Entity_Id eid, int cid) { int eind = entity_index(eid); Pool& p = get_pool(masks[eind]); assert(versions[eind] == entity_version(eid)); - assert(p.entities[p.mapping[eind]] == eid); return p.get(eid, cid); } @@ -202,3 +249,16 @@ void World::destroy(Entity_Id e) { versions[entity_index(e)]++; } +void World::qdestroy(Entity_Id e) { + assert(e); + assert(dq_count < max_entities); + dq[dq_count++] = e; +} + +void World::update() { + int i, e = dq_count; + Entity_Id* d = dq; + for (i = 0; i < e; i++, d++) + destroy(*d); + dq_count = 0; +} @@ -3,28 +3,59 @@ struct Arena; +#include "entity.hpp" + #include <stdint.h> #include <new> +#include <tuple> #include <utility> - +extern "C" { +#include "plat.h" +} using Component_Mask = uint64_t; -using Entity_Id = uint32_t; #define max_entities 1024 -#define pool_cap 128 +#define slinky_size 1 #define max_components 64 +#define max_slinkies (max_entities / slinky_size) + +static_assert((!(slinky_size & (slinky_size - 1)))); +/* slinky_size needs to be a power of 2 */ +static consteval int get_slinky_size_bit() { + int i; + int m = slinky_size; + for (i = 0; m; m >>= 1, i++); + if (i == 0) + throw "wtf"; + return i - 1; +} +static constexpr int slinky_size_bit = get_slinky_size_bit(); + +struct Slinky { + Entity_Id* entities; + void* data; + int count; + + void init(Arena* a, int size); + void* add(Entity_Id eid, int size, int& mapping); + void* get(Entity_Id eid, int mapping, int size); +}; struct Pool { + struct Mapping { + int slinky; + int mapping; + }; + Component_Mask mask; - Pool* next; - void* data; - Entity_Id entities[pool_cap]; - int mapping[max_entities]; + Slinky slinkies[max_slinkies]; + Mapping mapping[max_entities]; int offsets[max_components]; - int size, count; + int size, count, slinky_count; void init(Arena* a, Component_Mask m); + Slinky& get_slinky(Arena* a); void* add(Arena* a, Entity_Id eid); void* get(Entity_Id eid, int cid); void* get(Entity_Id eid); @@ -43,8 +74,9 @@ struct World { uint8_t versions[max_entities]; Entity_Id entities[max_entities]; Entity_Id freelist[max_entities]; + Entity_Id dq[max_entities]; Arena* arena; - int count, free_count; + int count, dq_count, free_count; void init(Arena* a); uint64_t hash_mask(Component_Mask m); @@ -94,6 +126,8 @@ struct World { void* get(Entity_Id eid, int cid); void remove(Entity_Id eid, int cid); void destroy(Entity_Id e); + void qdestroy(Entity_Id e); + void update(); struct View { World* w; @@ -126,8 +160,11 @@ struct World { template <typename C> C& get() { Pool* p = v->pools[ind]; + Slinky& s = p->slinkies[ptr >> slinky_size_bit]; + int si = &s - p->slinkies; + int off = ptr - (si << slinky_size_bit); return *(C*)( - (char*)p->data + ptr * p->size + + (char*)s.data + off * p->size + p->offsets[v->w->get_component_id<C>()] ); } @@ -138,7 +175,11 @@ struct World { Entity_Id entity() { Pool* p = v->pools[ind]; - return p->entities[ptr]; + Slinky& s = p->slinkies[ptr >> slinky_size_bit]; + int si = &s - p->slinkies; + int off = ptr - (si << slinky_size_bit); + assert(s.entities[off]); + return s.entities[off]; } }; |