#include "app.hpp"
#include "hashmap.hpp"
#include "video.hpp"

#define device_heap_size (1024 * 1024 * 8)
#define max_textures 1024
#define max_buffers 1024
#define max_vertex_formats 64
#define max_rpos 128
#define max_fbos 128
#define max_pipelines 128
#define max_descriptor_sets 1024
#define max_shaders 32
#define max_samplers 16

extern "C" {
#include "memory.h"
#include "pack.h"
#include "plat.h"
#include "sc/sh_enums.h"
#include "sc/sh_helpers.h"
#include "str.h"
}

#include <algorithm>
#include <new>
#include <tuple>
#include <unordered_map>

#include "glad_vk.h"

#include <math.h>
#include <string.h>

#ifdef min /* use std::min and max instead */
#undef min
#endif
#ifdef max
#undef max
#endif

#if !defined(plat_win)
#define __stdcall
#endif

const char* device_exts[] = {
	VK_KHR_SWAPCHAIN_EXTENSION_NAME,
	VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME,
#ifdef DEBUG
	VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME
#endif
};

extern "C" {
VkSurfaceKHR app_create_vk_surface(App* app, VkInstance inst);
void app_destroy_vk_surface(
	App* app,
	VkInstance inst,
	VkSurfaceKHR surf
);
}

struct Device_Vk;

struct Vram_Allocator {
	static constexpr int size_alignment = (1024 * 1024 * 32);
	struct Page;
	struct Chunk;
	struct Allocation {
		VkDeviceMemory mem;
		Page* page;
		Chunk* chunk;
		bool valid() const { return chunk != 0; }
		VkDeviceSize offset() const { return chunk->get_offset(); }
		void* map(VkDeviceSize off) {
			assert(page->mapping != 0);
			return (char*)page->mapping + offset() + off;
		}
		static Allocation null() {
			Allocation r{};
			return r;
		}
	};
	struct Chunk {
		VkDeviceSize offset;
		VkDeviceSize pad;
		VkDeviceSize size;
		Chunk* next;
		bool free;

		VkDeviceSize get_offset() {
			return offset + pad;
		}
	};
	struct Page {
		VkDeviceMemory memory;
		VkDeviceSize size;
		int type;
		Chunk* chunks;
		Page* next;
		void* mapping;
		/* need something better for host-writable,
		 * non-coherent mappings */

		void init(Device_Vk* dev, VkDeviceSize size, int type);
		void defrag(Device_Vk* dev);
		Allocation imp_alloc(Device_Vk* dev, VkDeviceSize size);
		Allocation alloc(
			Device_Vk* dev,
			VkDeviceSize size,
			VkDeviceSize align
		);
	};
	Page* pages;
	Device_Vk* dev;

	void init(Device_Vk* d);
	void destroy();
	Allocation alloc(
		int type,
		VkDeviceSize size,
		VkDeviceSize align
	);
	void free(Allocation& alloc);
};

static VkCullModeFlags get_vk_cull_mode(Cull_Mode mode) {
	switch (mode) {
		case Cull_Mode::none:  return VK_CULL_MODE_NONE;
		case Cull_Mode::back:  return VK_CULL_MODE_BACK_BIT;
		case Cull_Mode::front: return VK_CULL_MODE_FRONT_BIT;
	}
	assert(0);
	return VK_CULL_MODE_NONE;
}

static VkPrimitiveTopology get_topology(Geo_Type type) {
	switch (type) {
		case Geo_Type::triangles:
			return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
		case Geo_Type::lines:
			return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
		case Geo_Type::points:
			return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
	}
	assert(0);
	return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
}

static VkFormat get_vk_format(Texture_Format fmt) {
	switch (fmt) {
		case texture_format_r8i:     return VK_FORMAT_R8_UNORM;
		case texture_format_r16f:    return VK_FORMAT_R16_SFLOAT;
		case texture_format_r32f:    return VK_FORMAT_R32_SFLOAT;
		case texture_format_rg8i:    return VK_FORMAT_R8G8_UNORM;
		case texture_format_rg16f:   return VK_FORMAT_R16G16_SFLOAT;
		case texture_format_rg32f:   return VK_FORMAT_R32G32_SFLOAT;
		case texture_format_rgb8i:   return VK_FORMAT_R8G8B8_UNORM;
		case texture_format_rgb16f:  return VK_FORMAT_R16G16B16_SFLOAT;
		case texture_format_rgb32f:  return VK_FORMAT_R32G32B32_SFLOAT;
		case texture_format_rgba8i:  return VK_FORMAT_R8G8B8A8_UNORM;
		case texture_format_rgba8i_srgb: return VK_FORMAT_R8G8B8A8_SRGB;
		case texture_format_bgra8i_srgb: return VK_FORMAT_B8G8R8A8_SRGB;
		case texture_format_rgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
		case texture_format_rgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
		case texture_format_bc1:     return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
		case texture_format_bc4:     return VK_FORMAT_BC4_UNORM_BLOCK;
		case texture_format_bc5:     return VK_FORMAT_BC5_UNORM_BLOCK;
		case texture_format_d16:     return VK_FORMAT_D16_UNORM;
		case texture_format_d24s8:   return VK_FORMAT_D24_UNORM_S8_UINT;
		case texture_format_d32:     return VK_FORMAT_D32_SFLOAT;
		case texture_format_count:   break;
	}
	assert(0);
	return VK_FORMAT_UNDEFINED;
}

static VkBlendFactor get_vk_blend_factor(Blend_Factor mode) {
	switch (mode) {
		case Blend_Factor::zero:           return VK_BLEND_FACTOR_ZERO;
		case Blend_Factor::one:            return VK_BLEND_FACTOR_ONE;
		case Blend_Factor::src_colour:     return VK_BLEND_FACTOR_SRC_COLOR;
		case Blend_Factor::inv_src_colour: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR;
		case Blend_Factor::dst_colour:     return VK_BLEND_FACTOR_DST_COLOR;
		case Blend_Factor::inv_dst_colour: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
		case Blend_Factor::src_alpha:      return VK_BLEND_FACTOR_SRC_ALPHA;
		case Blend_Factor::inv_src_alpha:  return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
		case Blend_Factor::dst_alpha:      return VK_BLEND_FACTOR_DST_ALPHA;
		case Blend_Factor::inv_dst_alpha:  return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA;
	}
	assert(0);
	return VK_BLEND_FACTOR_ONE;
}

static VkBlendOp get_vk_blend_op(Blend_Mode mode) {
	switch (mode) {
		case Blend_Mode::add:              return VK_BLEND_OP_ADD;
		case Blend_Mode::subtract:         return VK_BLEND_OP_SUBTRACT;
		case Blend_Mode::reverse_subtract: return VK_BLEND_OP_SUBTRACT;
		case Blend_Mode::min:              return VK_BLEND_OP_MIN;
		case Blend_Mode::max:              return VK_BLEND_OP_MAX;
	}
	assert(0);
	return VK_BLEND_OP_ADD;
}

static VkImageUsageFlags get_texture_usage(int flags) {
	VkImageUsageFlags f = 0;
	if (flags & Texture_Flags::sampleable)
		f |= VK_IMAGE_USAGE_SAMPLED_BIT;
	if (flags & Texture_Flags::colour_target)
		f |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
	if (flags & Texture_Flags::depth_stencil_target)
		f |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
	if (flags & Texture_Flags::copy_src)
		f |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
	if (flags & Texture_Flags::copy_dst)
		f |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
	return f;
}

static VkImageAspectFlags get_image_aspect(
	Texture_Format fmt,
	int flags
) {
	VkImageUsageFlags f = 0;
	if (flags & Texture_Flags::depth_stencil_target) {
		if (fmt == texture_format_d24s8)
			f |=
				VK_IMAGE_ASPECT_DEPTH_BIT |
				VK_IMAGE_ASPECT_STENCIL_BIT;
		else
			f |= VK_IMAGE_ASPECT_DEPTH_BIT;
	} else
		f |= VK_IMAGE_ASPECT_COLOR_BIT;
	return f;
}

static VkImageViewType get_view_type(int a, int d, int flags) {
	VkImageViewType t = VK_IMAGE_VIEW_TYPE_2D;
	if (flags & Texture_Flags::cubemap) {
		if (a > 1)
			t = VK_IMAGE_VIEW_TYPE_CUBE;
		else
			t = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
	} else if (d > 1) {
		t = VK_IMAGE_VIEW_TYPE_3D;
		assert(a == 1);
	} else if (a > 1) {
		t = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
	}
	return t;
}

template <bool Depth>
VkImageLayout state_to_image_layout(Resource_State s) {
	switch (s) {
		case undefined:     return VK_IMAGE_LAYOUT_UNDEFINED;
		case copy_dst:      return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
		case copy_src:      return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
		case shader_read:   return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
		case render_target:
			return Depth?
				VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
		case presentable:   return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
	}
	assert(0);
	return VK_IMAGE_LAYOUT_UNDEFINED;
}

VkImageLayout state_to_image_layout(Resource_State s) {
	return state_to_image_layout<false>(s);
}

static void* __stdcall vk_alloc(
	void* uptr,
	size_t size,
	size_t alignment,
	VkSystemAllocationScope scope
) {
	Device* d = (Device*)uptr;
	void* r;
	(void)scope;
	if (!size) return 0;
	r = heap_alloc_aligned(
		d->heap,
		size,
		alignment
	);
	if (!r) {
		print_err("Out of memory.");
		pbreak(4096);
	}
	return r;
}

static void __stdcall vk_free(
	void* uptr,
	void* ptr
) {
	Device* d = (Device*)uptr;
	if (!ptr) return;
	heap_free(d->heap, ptr);
}

static void* __stdcall vk_realloc(
	void* uptr,
	void* old,
	size_t size,
	size_t alignment,
	VkSystemAllocationScope scope
) {
	int os;
	void* na;
	(void)scope;
	if (!old)
		return vk_alloc(uptr, size, alignment, scope);
	if (!size) {
		vk_free(uptr, old);
		return 0;
	}
	os = heap_block_size(old);
	na = vk_alloc(uptr, size, alignment, scope);
	memcpy(na, old, std::min(os, (int)size));
	vk_free(uptr, old);
	return na;
}

typedef struct {
	VkSurfaceCapabilitiesKHR cap;
	unsigned fmt_count, pm_count;
	VkSurfaceFormatKHR* fmts;
	VkPresentModeKHR* pms;
} Swap_Cap;

static void get_swap_cap(
	Device* d,
	VkPhysicalDevice dev,
	VkSurfaceKHR surf,
	Swap_Cap* cap
) {
	cap->fmts = 0;
	cap->pms = 0;
	vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
		dev,
		surf,
		&cap->cap
	);
	vkGetPhysicalDeviceSurfaceFormatsKHR(
		dev,
		surf,
		&cap->fmt_count,
		0
	);
	if (cap->fmt_count) {
		cap->fmts = (VkSurfaceFormatKHR*)heap_alloc(
			d->heap,
			sizeof *cap->fmts * cap->fmt_count
		);
		vkGetPhysicalDeviceSurfaceFormatsKHR(
			dev,
			surf,
			&cap->fmt_count,
			cap->fmts
		);
	}
	vkGetPhysicalDeviceSurfacePresentModesKHR(
		dev,
		surf,
		&cap->pm_count,
		0
	);
	if (cap->pm_count) {
		cap->pms = (VkPresentModeKHR*)heap_alloc(
			d->heap,
			sizeof *cap->pms * cap->pm_count
		);
		vkGetPhysicalDeviceSurfacePresentModesKHR(
			dev,
			surf,
			&cap->pm_count,
			cap->pms
		);
	}
}

static void deinit_swap_cap(
	Device* d,
	Swap_Cap* cap
) {
	if (cap->fmts) heap_free(d->heap, cap->fmts);
	if (cap->pms)  heap_free(d->heap, cap->pms);
}

struct Late_Terminated {
	Late_Terminated* next;

	virtual void destroy(Device_Vk* dev) = 0;
};

struct Swapchain {
	VkSwapchainKHR swapchain;
	Texture_Id* textures;
	VkSurfaceFormatKHR format;
	VkExtent2D size;
	VkPresentModeKHR mode;
	VkSemaphore image_avail;
	int image_count;

	void init(const App& app, Device_Vk* dev);
	void initr(const App& app, Device_Vk* dev);
	void recreate(const App& app, Device_Vk* dev);
	void get_images(Device_Vk* dev);
	void destroy(Device_Vk* dev);

	Texture_Id create_image(
		Device_Vk* dev,
		VkImage image,
		VkImageView view,
		int w,
		int h
	);
};

#define max_contexts 16

enum {
	context_state_avail = 1 << 0,
	context_state_init  = 1 << 1
};


struct Shader_Module {
	int mask;
	VkShaderModule mod;
};

struct Shader_Vk : public Shader, public Late_Terminated {
	struct Attribute {
		char name[28];
		SVariable_Type type;
		int index;
	};

	struct Binding {
		char name[24];
		SBinding_Rate rate;
		int attr_count;
		int index;
		int* attributes;
	};

	struct Vertex_Format {
		Binding* bindings;
		Attribute* attributes;
		int attr_count;
		int binding_count;

		bool init(Device_Vk* dev, Pack_File* f);
		void destroy(Device_Vk* dev);

		int find_binding(const char* name);
		int find_attribute(const char* name);
	};

	struct Desc {
		char name[24];
		int slot;
		int stage;
	};

	struct Option {
		char name[24];
		int mask;
		int stage;
	};

	SProgram_Type type;
	Vertex_Format vfd;
	Desc* descs;
	Shader_Module* modules[shader_type_count];
	Option* options;
	int desc_count, opt_count;
	int module_count[shader_type_count];

	bool init(Device_Vk* dev, Arena* a, Pack_File* f);
	VkShaderModule make_module(
		Device_Vk* dev,
		char* buf,
		int size
	);
	void destroy(Device_Vk* dev) override;

	int find_descriptor(const char* name);
	int find_module(Shader_Type type, int mask);
	int find_opt(Shader_Type, const char* name);

	static VkShaderStageFlagBits stage(Shader_Type type) {
		switch (type) {
			case shader_type_vertex:
				return VK_SHADER_STAGE_VERTEX_BIT;
			case shader_type_fragment:
				return VK_SHADER_STAGE_FRAGMENT_BIT;
			default:
				assert(0);
				return (VkShaderStageFlagBits)0;
		}
	}
};

struct Renderpass_Vk;
struct Framebuffer_Vk;
struct Pipeline_Vk;
struct Rpo_Key;
struct Context_Vk : public Context {
	int state;
	Device_Vk* dev;
	VkCommandBuffer cb;
	VkCommandPool pool;
	VkFence fence;
	VkSemaphore semaphore;

	VkPipeline last_pso;
	VkDescriptorSet last_dso;
	VkRenderPass last_rpo;
	VkFramebuffer last_fbo;

	void init_pool();
	void init_cb();
	void init_sync();
	void init(Device_Vk* device);
	void begin_record();
	Context_Vk& acquire(Device_Vk* device);
	void release();
	void destroy();

	std::pair<Renderpass_Vk&, Framebuffer_Vk&> begin_rp(
		const Rpo_Key& rp
	);
	void end_rp(
		const Render_Pass& rp,
		Renderpass_Vk& rpo,
		Framebuffer_Vk& fbo
	);
	void check_end_rp();

	void submit_descriptors(
		const Pipeline_Vk& pso,
		const Pipeline& p
	);
};

struct Texture_Vk : public Texture, public Late_Terminated {
	VkImage image;
	VkImageView view;
	Vram_Allocator::Allocation memory;
	Resource_State state;
	Texture_Id parent;

	static void init(
		Texture_Vk* t,
		Texture_Id id,
		Texture_Id parent,
		VkImage img,
		VkImageView v,
		Vram_Allocator::Allocation mem,
		Resource_State state,
		Texture_Format fmt,
		int flags,
		int w,
		int h,
		int d,
		int mip_count,
		int array_size,
		int start_mip,
		int start_array,
		bool alias,
		int samples
	);
	void destroy(Device_Vk*) override;
	void set_name(Device_Vk* dev, const char* name);
};

struct Buffer_Vk : public Buffer, public Late_Terminated {
	VkBuffer buf;
	VkDeviceSize size;
	Vram_Allocator::Allocation memory;
	int flags;

	void init(Device_Vk* dev, int flags, VkDeviceSize size);
	void destroy(Device_Vk* dev) override;
	void set_name(Device_Vk* dev, const char* name);

	static VkBufferUsageFlags get_usage(int flags) {
		VkBufferUsageFlags r = 0;
		if (flags & Buffer_Flags::index_buffer)
			r |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
		if (flags & Buffer_Flags::vertex_buffer)
			r |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
		if (flags & Buffer_Flags::constant_buffer)
			r |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
		if (flags & Buffer_Flags::storage_buffer)
			r |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
		if (flags & Buffer_Flags::copy_src)
			r |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
		if (flags & Buffer_Flags::copy_dst)
			r |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
		return r;
	}

	static VkMemoryPropertyFlags get_memory_flags(int flags) {
		VkMemoryPropertyFlags r = 0;
		r |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
		if (flags & Buffer_Flags::cpu_read)
			r |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
		if (flags & Buffer_Flags::cpu_readwrite)
			r |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
		return r;
	}
};

struct Render_Pass_States {
	Resource_State colours[max_colour_attachments];
	Resource_State depth;
	int colour_count;

	bool operator==(const Render_Pass_States& other) const {
		int i;
		if (colour_count != other.colour_count) return false;
		if (depth != other.depth) return false;
		for (i = 0; i < colour_count; i++)
			if (colours[i] != other.colours[i])
				return false;
		return true;
	}
};

struct Rpo_Key {
	Render_Pass rpo;
	Render_Pass_States states;

	bool operator==(const Rpo_Key& other) const {
		return
			rpo.layout_eq(other.rpo) &&
			states == other.states;
	}
};

struct Fbo_Key {
	Render_Pass rpo;
	bool operator==(const Fbo_Key& other) const {
		return rpo.resources_eq(other.rpo);
	}
};

struct Pso_Key {
	Pipeline pso;
	Rpo_Key rpo;

	bool operator==(const Pso_Key& other) const {
		return
			rpo == other.rpo &&
			pso.pipeline_eq(other.pso) &&
			pso.desc_layout_eq(other.pso);
	}
};

struct Dso_Key {
	Pipeline pip;

	bool operator==(const Dso_Key& other) const {
		return pip.desc_resources_eq(other.pip);
	}
};

struct Renderpass_Vk {
	VkRenderPass rpo;
	int age;

	void on_submit() {
		age = 0;
	}

	VkAttachmentLoadOp load_op_from_mode(Clear_Mode m);

	void init(Device_Vk* dev, const Rpo_Key& rp);
	void destroy(Device_Vk* dev);
};

struct Framebuffer_Vk {
	VkFramebuffer fbo;
	int w, h;
	int age;

	void on_submit() {
		age = 0;
	}

	void init(
		Device_Vk* dev,
		const Renderpass_Vk& rpo,
		const Render_Pass& rp
	);
	void destroy(Device_Vk* dev);
};

struct Pipeline_Vk {
	VkPipeline pip;
	VkPipelineLayout lay;
	VkDescriptorSetLayout dlay;
	int age;

	void init(Device_Vk* dev, const Pso_Key& desc);
	void destroy(Device_Vk* dev);

	void init_stages(
		Arena& scope,
		Device_Vk* dev,
		VkGraphicsPipelineCreateInfo& info,
		const Pipeline& desc
	);
	void init_vertex_input(
		Arena& scope,
		Device_Vk* dev,
		VkGraphicsPipelineCreateInfo& info,
		const Pipeline& desc
	);
	void init_input_assembly(
		Arena& scope,
		Device_Vk* dev,
		VkGraphicsPipelineCreateInfo& info,
		const Pipeline& desc
	);
	void init_viewport(
		Arena& scope,
		VkGraphicsPipelineCreateInfo& info,
		const Pipeline& desc
	);
	void init_rasterisation(
		Arena& scope,
		Device_Vk* dev,
		VkGraphicsPipelineCreateInfo& info,
		const Pipeline& desc
	);
	void init_msaa(
		Arena& scope,
		Device_Vk* dev,
		VkGraphicsPipelineCreateInfo& info,
		const Pipeline& desc,
		const Render_Pass& rpo
	);
	void init_depthstencil(
		Arena& scope,
		Device_Vk* dev,
		VkGraphicsPipelineCreateInfo& info,
		const Pipeline& desc
	);
	void init_blending(
		Arena& scope,
		Device_Vk* dev,
		VkGraphicsPipelineCreateInfo& info,
		const Render_Pass& rp,
		const Pipeline& desc
	);
	void init_layout(
		Device_Vk* dev,
		const Pipeline& desc
	);
	void init_descriptors(
		Device_Vk* dev,
		const Pipeline& desc
	);

	static VkCompareOp get_compare_op(Depth_Mode m);

	void on_submit() {
		age = 0;
	}
};

struct Descriptor_Set_Vk {
	VkDescriptorPool dp;
	VkDescriptorSet dset;
	int age;

	void init(
		Device_Vk* dev,
		const Pipeline_Vk& pip,
		const Pipeline& desc
	);
	void destroy(Device_Vk* dev);

	void on_submit() {
		age = 0;
	}
};

struct Vertex_Format_Vk {
	VkVertexInputBindingDescription* bindings;
	int binding_count;
	VkVertexInputAttributeDescription* attrs;
	int attr_count;

	void init(Device_Vk* dev, const Vertex_Format_Desc& desc);
	void destroy(Device_Vk* dev);
	static VkFormat format_from_svar_type(SVariable_Type type);

	void clone(Arena* arena);
	void optimise(const Vertex_Format_Vk* shadervf);
};

struct Sampler_Vk : public Late_Terminated {
	VkSampler sampler;
	Sampler_Id id;

	void init(Device_Vk* dev, const Sampler_State& s);
	void destroy(Device_Vk* dev) override;
	void set_name(Device_Vk* dev, const char* name);

	static VkFilter get_filter(Filter_Mode mode);
	static VkSamplerMipmapMode get_mipmap_mode(Filter_Mode mode);
	static VkSamplerAddressMode get_mode(Address_Mode mode);
};

template<>
struct Hash_Function<Rpo_Key> {
	size_t operator()(const Rpo_Key& k) const {
		return (size_t)fnv1a64_2(
			k.rpo.layout_hash,
			(uint8_t*)&k.states,
			sizeof k.states
		);
	}
};

template<>
struct Hash_Function<Fbo_Key> {
	size_t operator()(const Fbo_Key& k) const {
		return k.rpo.resource_hash;
	}
};

template<>
struct Hash_Function<Pso_Key>
{
	size_t operator()(const Pso_Key& k) const {
		uint64_t rpoh = Hash_Function<Rpo_Key>{}(k.rpo);
		return fnv1a64_2(
			k.pso.pipeline_hash,
			(uint8_t*)&rpoh,
			sizeof rpoh
		);
	}
};

template<>
struct Hash_Function<Dso_Key>
{
	size_t operator()(const Dso_Key& k) const {
		return k.pip.descriptor_resource_hash;
	}
};

template<>
struct Hash_Function<Texture_Id> {
	size_t operator()(Texture_Id id) const {
		return id.index;
	}
};

template<>
struct Hash_Function<Buffer_Id> {
	size_t operator()(Buffer_Id id) const {
		return id.index;
	}
};

template<>
struct Hash_Function<Shader_Id> {
	size_t operator()(Shader_Id id) const {
		return id.index;
	}
};

template<>
struct Hash_Function<Vertex_Format_Id> {
	size_t operator()(Vertex_Format_Id id) const {
		return id.index;
	}
};

template<>
struct Hash_Function<Sampler_Id> {
	size_t operator()(Sampler_Id id) const {
		return id.index;
	}
};

struct Shader_Loader : public Asset_Loader {
	Device_Vk* dev;
	void init(Device_Vk* d);
	Asset* load(
		Arena* a,
		Arena* s,
		const char* filename,
		Pack_File* f
	) override;
	void unload(Asset* a) override;
};

struct Texture_Loader : public Asset_Loader {
	Device_Vk* dev;
	static size_t calc_size(Texture_Format fmt, int w, int h);
	void init(Device_Vk* d);
	Asset* load(
		Arena* a,
		Arena* s,
		const char* filename,
		Pack_File* f
	) override;
	void unload(Asset* a) override;
};

struct Terminator {
	Late_Terminated* queue;

	void execute(Device_Vk* dev) {
		Late_Terminated* obj = queue;
		for (; obj; obj = obj->next)
			obj->destroy(dev);
		queue = 0;
	}

	void add(Late_Terminated* obj) {
		if (queue) {
			obj->next = queue;
			queue = obj;
		} else {
			obj->next = 0;
			queue = obj;
		}
	}
};

struct Device_Vk : public Device {
	VkAllocationCallbacks ac;
	VkInstance inst;
	VkDevice dev;
	VkPhysicalDevice phys_dev;
	VkSurfaceKHR surf;
	uint32_t backbuffer_index;
	VkSampleCountFlagBits max_samples;
	Texture_Id backbuffer_id;
	Swap_Cap swap_cap;
	VkPhysicalDeviceMemoryProperties mem_props;
	int queue_index;
	VkQueue queue;
	Swapchain swapchain;
	Context_Vk contexts[max_contexts];
	Context_Vk* current_ctx;
	Shader_Loader shader_loader;
	Texture_Loader texture_loader;
	Vram_Allocator vrama;
#ifdef DEBUG
	VkDebugUtilsMessengerEXT msg;
#endif

	Hash_Map<Texture_Id, Texture_Vk, max_textures> textures;
	Hash_Map<Buffer_Id, Buffer_Vk, max_buffers> buffers;
	Hash_Map<
		Vertex_Format_Id,
		Vertex_Format_Vk,
		max_vertex_formats
	> vertex_formats;
	Hash_Map<Shader_Id, Shader_Vk, max_shaders> shaders;
	Hash_Map<Sampler_Id, Sampler_Vk, max_samplers> samplers;
	uint32_t texture_count;
	uint32_t buffer_count;
	uint32_t vertex_format_count;
	uint32_t shader_count;
	uint32_t sampler_count;

	Hash_Map<Rpo_Key, Renderpass_Vk, max_rpos> rpo_cache;
	Hash_Map<Fbo_Key, Framebuffer_Vk, max_fbos> fbo_cache;
	Hash_Map<Pso_Key, Pipeline_Vk, max_pipelines> pso_cache;
	Hash_Map<Dso_Key, Descriptor_Set_Vk, max_descriptor_sets> dso_cache;

	Terminator* terminators;
	uint32_t terminator_index;

	Texture_Id depth;

	Texture_Id alloc_texture();
	Buffer_Id alloc_buffer();
	Vertex_Format_Id alloc_vf();
	Shader_Id alloc_shader();
	Sampler_Id alloc_sampler();

	void init_internal();
	void deinit_internal();
	void init_ac();
	void create_inst(const char** exts, int count);
	void create_dev(Swap_Cap* swap_cap);
	void find_exts(const char** exts, int& count);

	bool has_validation();
	void init_validation();

	void create_surf();
	void on_resize_internal(int w, int h);

	Renderpass_Vk& create_rpo(const Rpo_Key& rp);
	Renderpass_Vk& get_rpo(const Rpo_Key& rp);
	Framebuffer_Vk& create_fbo(
		const Renderpass_Vk& rpo,
		const Fbo_Key& fb
	);
	Framebuffer_Vk& get_fbo(
		const Renderpass_Vk& rpo,
		const Fbo_Key& fb
	);
	Pipeline_Vk& create_pso(const Pso_Key& pip);
	Pipeline_Vk& get_pso(const Pso_Key& pop);
	Descriptor_Set_Vk& create_dso(
		const Pipeline_Vk& pip,
		const Dso_Key& k
	);
	Descriptor_Set_Vk& get_dso(
		const Pipeline_Vk& pip,
		const Dso_Key& k
	);
	
	template<typename List, typename F>
	void collect_objects(List& list, int max_age, F f);
	void collect_garbage();
	void queue_destroy(Late_Terminated* obj);
	void create_terminators();
	void create_depth(int w, int h);

	VkSampleCountFlagBits get_max_samples();
	VkSampleCountFlagBits get_samples(int);

	int find_memory_type(
		uint32_t filter,
		VkMemoryPropertyFlags flags
	);
	Render_Pass_States get_rp_states(const Render_Pass& p);
};

#ifdef DEBUG
static VkBool32 debug_callback(
	VkDebugUtilsMessageSeverityFlagBitsEXT sev,
	VkDebugUtilsMessageTypeFlagsEXT type,
	const VkDebugUtilsMessengerCallbackDataEXT* data,
	void* uptr
) {
	(void)sev;
	(void)uptr;
	if (sev <= VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT)
		return 0;
	switch (sev) {
		case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT:
			print("%s\n", data->pMessage);
			break;
		case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT:
			print_war("%s\n", data->pMessage);
			break;
		case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT:
			print_err("%s\n", data->pMessage);
			pbreak((int)type);
			break;
		default: break;
	}
	return 0;
}

static VkResult create_dmesg(
	Device_Vk* d,
	const VkDebugUtilsMessengerCreateInfoEXT* information,
	const VkAllocationCallbacks* allocator,
	VkDebugUtilsMessengerEXT* messenger
) {
	PFN_vkCreateDebugUtilsMessengerEXT f;
	f = (PFN_vkCreateDebugUtilsMessengerEXT)
		vkGetInstanceProcAddr(
			d->inst,
			"vkCreateDebugUtilsMessengerEXT"
		);
	return f?
		f(d->inst, information, allocator, messenger):
		VK_ERROR_EXTENSION_NOT_PRESENT;
}

static void destroy_dmesg(
	VkInstance instance,
	VkDebugUtilsMessengerEXT messenger,
	const VkAllocationCallbacks* allocator
) {
	PFN_vkDestroyDebugUtilsMessengerEXT f;
	f = (PFN_vkDestroyDebugUtilsMessengerEXT)
		vkGetInstanceProcAddr(
			instance,
			"vkDestroyDebugUtilsMessengerEXT"
		);
	if (f)
		f(instance, messenger, allocator);
}

void Device_Vk::init_validation() {
	VkDebugUtilsMessengerCreateInfoEXT mi{};
	VkResult r;
	mi.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
	mi.messageSeverity =
		VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
		VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
		VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
	mi.messageType = 
		VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT    |
		VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
		VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
	mi.pfnUserCallback = debug_callback;
	r = create_dmesg(
		this,
		&mi,
		&ac,
		&msg
	);
	if (r != VK_SUCCESS) {
		print_err("Failed to create debug messenger.\n");
		pbreak(r);
	}
}

#endif

VkSampleCountFlagBits Device_Vk::get_max_samples() {
	VkPhysicalDeviceProperties p;
	VkSampleCountFlagBits
		i = VK_SAMPLE_COUNT_64_BIT,
		e = VK_SAMPLE_COUNT_1_BIT;
	VkSampleCountFlags c;
	vkGetPhysicalDeviceProperties(phys_dev, &p);
	c =
		p.limits.framebufferColorSampleCounts &
		p.limits.framebufferDepthSampleCounts;
	for (; i >= e; i = (VkSampleCountFlagBits)(i >> 1))
		if (c & i) return i;
	return VK_SAMPLE_COUNT_1_BIT;
}

VkSampleCountFlagBits Device_Vk::get_samples(
	int c
) {
	VkSampleCountFlagBits b = VK_SAMPLE_COUNT_1_BIT;
	switch (c) {
		case 1:  b = VK_SAMPLE_COUNT_1_BIT;  break;
		case 2:  b = VK_SAMPLE_COUNT_2_BIT;  break;
		case 4:  b = VK_SAMPLE_COUNT_4_BIT;  break;
		case 8:  b = VK_SAMPLE_COUNT_8_BIT;  break;
		case 16: b = VK_SAMPLE_COUNT_16_BIT; break;
		case 32: b = VK_SAMPLE_COUNT_32_BIT; break;
		case 64: b = VK_SAMPLE_COUNT_64_BIT; break;
		default: break;
	}
	return std::min(max_samples, b);
}

bool Device_Vk::has_validation() {
	unsigned count, i;
	int f;
	VkLayerProperties* props;
	VkResult r;
	r = vkEnumerateInstanceLayerProperties(&count, 0);
	if (!count || r != VK_SUCCESS) return 0;
	props = (VkLayerProperties*)heap_alloc(heap, count * sizeof *props);
	vkEnumerateInstanceLayerProperties(&count, props);
	for (f = 0, i = 0; i < count; i++) {
		if (strcmp(
			props[i].layerName,
			"VK_LAYER_KHRONOS_validation"
		)) {
			f = 1;
			break;
		}
	}
	heap_free(heap, props);
	return f;
}

void Device_Vk::find_exts(const char** exts, int& count) {
	app->get_vk_exts(exts, count);
	exts[count++] = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME;
#ifdef DEBUG
	exts[count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
#endif
}

void Device_Vk::init_ac() {
	ac.pUserData = this;
	ac.pfnAllocation = vk_alloc;
	ac.pfnReallocation = vk_realloc;
	ac.pfnFree = vk_free;
	ac.pfnInternalAllocation = 0;
	ac.pfnInternalFree = 0;
}

void Device_Vk::create_inst(const char** exts, int ext_count) {
	VkInstanceCreateInfo ci{};
	VkApplicationInfo ai{};
	VkResult r;
#ifdef DEBUG
	const char* vln = "VK_LAYER_KHRONOS_validation";
#endif
	ai.apiVersion = VK_API_VERSION_1_0;
	ai.pApplicationName = "C2";
	ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
	ci.pApplicationInfo = &ai;
	ci.enabledExtensionCount = (unsigned)ext_count;
	ci.ppEnabledExtensionNames = exts;
#ifdef DEBUG
	ci.enabledLayerCount = has_validation();
	ci.ppEnabledLayerNames = &vln;
	if (!ci.enabledLayerCount)
		print_war("No validation layers.");
#endif
	r = vkCreateInstance(&ci, &ac, &inst);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a Vulkan instance\n");
		pbreak(r);
	}
}

static int proc_swap(
	Device_Vk* d,
	VkPhysicalDevice dev,
	Swap_Cap* sc
) {
	get_swap_cap(d, dev, d->surf, sc);
	return sc->fmt_count > 0 && sc->pm_count > 0;
}

int proc_qf(Device_Vk* d, VkPhysicalDevice dev) {
	unsigned fc, i;
	int r = 0;
	VkBool32 press;
	VkQueueFamilyProperties* fs, * p;
	vkGetPhysicalDeviceQueueFamilyProperties(
		dev,
		&fc,
		0
	);
	fs = (VkQueueFamilyProperties*)heap_alloc(d->heap, (int)fc * sizeof *fs);
	vkGetPhysicalDeviceQueueFamilyProperties(
		dev,
		&fc,
		fs
	);
	for (i = 0; i < fc; i++) {
		p = &fs[i];
		vkGetPhysicalDeviceSurfaceSupportKHR(
			dev,
			i,
			d->surf,
			&press
		);
		if (
			p->queueFlags & VK_QUEUE_GRAPHICS_BIT &&
			press
		) {
			d->queue_index = (int)i;
			r = 1;
			goto fin;
		}
	}
fin:
	heap_free(d->heap, fs);
	return r;
}

static int sup_exts(Device_Vk* d, VkPhysicalDevice dev) {
	int r = 0, i, f;
	unsigned c, j;
	int extc = sizeof *device_exts / sizeof *device_exts;
	VkExtensionProperties* avail;
	vkEnumerateDeviceExtensionProperties(dev, 0, &c, 0);
	avail = (VkExtensionProperties*)heap_alloc(d->heap, c * sizeof *avail);
	vkEnumerateDeviceExtensionProperties(
		dev,
		0,
		&c,
		avail
	);
	for (i = 0; i < extc; i++) {
		f = 0;
		for (j = 0; j < c; j++) {
			if (!strcmp(device_exts[i], avail[j].extensionName)) {
				f = 1;
				break;
			}
		}
		if (!f) goto fin;
	}
	r = 1;
fin:
	heap_free(d->heap, avail);
	return r;
}

VkPhysicalDevice get_phys_dev(Device_Vk* d, Swap_Cap* sc) {
	unsigned dc, i;
	VkPhysicalDevice* devs, dev;
	vkEnumeratePhysicalDevices(d->inst, &dc, 0);
	if (!dc) {
		print_err(
			"Couldn't find any vulkan-capable graphics hardware.\n"
		);
		pbreak(400);
	}
	devs = (VkPhysicalDevice*)heap_alloc(d->heap, (int)dc * sizeof *devs);
	vkEnumeratePhysicalDevices(d->inst, &dc, devs);
	for (i = 0; i < dc; i++) {
		dev = devs[i];
		if (
			proc_swap(d, dev, sc) &&
			proc_qf(d, dev) &&
			sup_exts(d, dev)
		) {
			heap_free(d->heap, devs);
			return dev;
		}
		deinit_swap_cap(d, sc);
	}
	print_err("Couldn't find a suitable GPU.\n");
	pbreak(401);
	heap_free(d->heap, devs);
	return 0;
}

void Device_Vk::create_dev(Swap_Cap* swap_cap) {
	const float priority = 0.0f;
	VkDeviceQueueCreateInfo qi{};
	VkPhysicalDeviceCustomBorderColorFeaturesEXT border{};
	VkDeviceCreateInfo di{};
	VkPhysicalDeviceFeatures pdf{};
	VkResult r;
	phys_dev = get_phys_dev(this, swap_cap);
	vkGetPhysicalDeviceMemoryProperties(phys_dev, &mem_props);
	border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
	border.customBorderColors = true;
	qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
	qi.queueFamilyIndex = queue_index;
	qi.queueCount = 1;
	qi.pQueuePriorities = &priority;
	di.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
	di.pQueueCreateInfos = &qi;
	di.queueCreateInfoCount = 1;
	di.pEnabledFeatures = &pdf;
	di.enabledExtensionCount =
		sizeof device_exts / sizeof *device_exts;
	di.ppEnabledExtensionNames = device_exts;
	di.pNext = &border;
	r = vkCreateDevice(
		phys_dev,
		&di,
		&ac,
		&dev
	);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a Vulkan device.\n");
		pbreak(r);
	}
}

void Device_Vk::init_internal() {
	const char* exts[16];
	int ext_count = 0, i;
	gladLoaderLoadVulkan(0, 0, 0);
	textures.init();
	texture_count = 1;
	buffers.init();
	buffer_count = 1;
	vertex_formats.init();
	vertex_format_count = 1;
	shaders.init();
	shader_count = 1;
	samplers.init();
	sampler_count = 1;
	rpo_cache.init();
	fbo_cache.init();
	pso_cache.init();
	dso_cache.init();
	shader_loader.init(this);
	texture_loader.init(this);
	register_asset_loader("CSH2", &shader_loader);
	register_asset_loader("TXTR", &texture_loader);
	find_exts(exts, ext_count);
	init_ac();
	create_inst(exts, ext_count);
#ifdef DEBUG
	if (has_validation())
		init_validation();
#endif
	surf = app_create_vk_surface(app, inst);
	create_dev(&swap_cap);
	max_samples = get_max_samples();
	vrama.init(this);
	gladLoaderLoadVulkan(inst, phys_dev, dev);
	vkGetDeviceQueue(dev, (uint32_t)queue_index, 0, &queue);
	terminators = 0;
	terminator_index = 0;
	for (i = 0; i < max_contexts; i++)
		contexts[i].state = context_state_avail;
	swapchain.init(*app, this);
	create_terminators();
	depth = 0;
	create_depth(swapchain.size.width, swapchain.size.height);
}

void Device_Vk::create_terminators() {
	int i, count = swapchain.image_count;
	if (terminators) {
		for (i = 0; i < count; i++)
			terminators[i].execute(this);
		heap_free(heap, terminators);
	}
	terminators = (Terminator*)heap_alloc(
		heap,
		count * sizeof *terminators
	);
	for (i = 0; i < count; i++) {
		terminators[i].queue = 0;
	}
}

void Device_Vk::create_depth(int w, int h) {
	if (depth)
		destroy_texture(depth);
	depth = create_texture(
		"default depth",
		texture_format_d24s8,
		Texture_Flags::sampleable | Texture_Flags::depth_stencil_target,
		w,
		h,
		1,
		1,
		1,
		0
	);
}

void Device_Vk::deinit_internal() {
	int i, image_count = swapchain.image_count;
	vkDeviceWaitIdle(dev);
	destroy_texture(depth);
	swapchain.destroy(this);
	deinit_swap_cap(this, &swap_cap);
	app_destroy_vk_surface(app, inst, surf);
	for (auto i : rpo_cache)
		i.second.destroy(this);
	for (auto i : fbo_cache)
		i.second.destroy(this);
	for (auto i : pso_cache)
		i.second.destroy(this);
	for (auto i : dso_cache)
		i.second.destroy(this);
	for (i = 0; i < max_contexts; i++) {
		auto& context = contexts[i];
		if (context.state & context_state_init)
			context.destroy();
	}
	for (i = 0; i < image_count; i++) {
		terminators[i].execute(this);
	}
	vrama.destroy();
	vkDestroyDevice(dev, &ac);
#ifdef DEBUG
	destroy_dmesg(
		inst,
		msg,
		&ac
	);
#endif
	vkDestroyInstance(inst, &ac);
}

void Device_Vk::on_resize_internal(int w, int h) {
	(void)w;
	(void)h;
	vkDeviceWaitIdle(dev);
	deinit_swap_cap(this, &swap_cap);
	get_swap_cap(this, phys_dev, surf, &swap_cap);
	swapchain.recreate(*app, this);
	create_terminators();
	create_depth(swapchain.size.width, swapchain.size.height);
}

Renderpass_Vk& Device_Vk::create_rpo(const Rpo_Key& k) {
	Renderpass_Vk rpo;
	rpo.init(this, k);
	rpo.age = 0;
	auto& r = rpo_cache.set(k, rpo);
#ifdef DEBUG
	if (hooks)
		hooks->on_rpo_create(rpo_cache.kaddr(k)->rpo);
#endif
	return r;
}

Renderpass_Vk& Device_Vk::get_rpo(const Rpo_Key& rp) {
	Renderpass_Vk* rpo = rpo_cache.get(rp);
	if (!rpo)
		return create_rpo(rp);
	return *rpo;
}

Framebuffer_Vk& Device_Vk::create_fbo(
	const Renderpass_Vk& rpo,
	const Fbo_Key& k
) {
	auto& fb = k.rpo;
	Framebuffer_Vk fbo;
	fbo.init(this, rpo, fb);
	fbo.age = 0;
	auto& r = fbo_cache.set(k, fbo);
#ifdef DEBUG
	if (hooks)
		hooks->on_fbo_create(fbo_cache.kaddr(k)->rpo);
#endif
	return r;
}

Framebuffer_Vk& Device_Vk::get_fbo(
	const Renderpass_Vk& rpo,
	const Fbo_Key& fb
) {
	Framebuffer_Vk* fbo = fbo_cache.get(fb);
	if (!fbo)
		return create_fbo(rpo, fb);
	return *fbo;
}

Pipeline_Vk& Device_Vk::create_pso(const Pso_Key& pip) {
	Pipeline_Vk pso;
	pso.age = 0;
	pso.init(this, pip);
	auto& r = pso_cache.set(pip, pso);
#ifdef DEBUG
	if (hooks)
		hooks->on_pso_create(pso_cache.kaddr(pip)->pso);
#endif
	return r;
}

Pipeline_Vk& Device_Vk::get_pso(const Pso_Key& pip) {
	Pipeline_Vk* pso = pso_cache.get(pip);
	if (!pso)
		return create_pso(pip);
	return *pso;
}

Descriptor_Set_Vk& Device_Vk::create_dso(
	const Pipeline_Vk& pip,
	const Dso_Key& k
) {
	Descriptor_Set_Vk dso;
	dso.age = 0;
	dso.init(this, pip, k.pip);
	auto& r = dso_cache.set(k, dso);
#ifdef DEBUG
	if (hooks)
		hooks->on_dso_create(dso_cache.kaddr(k)->pip);
#endif
	return r;
}

Descriptor_Set_Vk& Device_Vk::get_dso(
	const Pipeline_Vk& pip,
	const Dso_Key& k
) {
	Descriptor_Set_Vk* dso = dso_cache.get(k);
	if (!dso)
		return create_dso(pip, k);
	return *dso;
}

template<typename List, typename F>
void Device_Vk::collect_objects(List& list, int max_age, F f) {
	for (auto i : list) {
		auto& obj = i.second;
		obj.age++;
		if (obj.age > max_age) {
			f(i);
			obj.destroy(this);
			list.remove(i.first);
		}
	}
}

void Device_Vk::collect_garbage() {
	int max_age = swapchain.image_count + 3;
	collect_objects(rpo_cache, max_age, [this](auto i){
#ifdef DEBUG
		if (hooks)
			hooks->on_rpo_destroy(rpo_cache.kaddr(i.first)->rpo);
#endif
	});
	collect_objects(fbo_cache, max_age, [this](auto i){
#ifdef DEBUG
		if (hooks)
			hooks->on_fbo_destroy(fbo_cache.kaddr(i.first)->rpo);
#endif
	});
	collect_objects(pso_cache, max_age, [this](auto i){
#ifdef DEBUG
		if (hooks)
			hooks->on_pso_destroy(pso_cache.kaddr(i.first)->pso);
#endif
	});
	collect_objects(dso_cache, max_age, [this](auto i){
#ifdef DEBUG
		if (hooks)
			hooks->on_dso_destroy(dso_cache.kaddr(i.first)->pip);
#endif
	});
}

void Device_Vk::queue_destroy(Late_Terminated* obj) {
	terminators[terminator_index].add(obj);
}

int Device_Vk::find_memory_type(
	uint32_t filter,
	VkMemoryPropertyFlags flags
) {
	int i, e = mem_props.memoryTypeCount;
	auto* types = mem_props.memoryTypes;
	for (i = 0; i < e; i++) {
		if (
			(filter & (1 << i)) &&
			(types[i].propertyFlags & flags) == flags
		) return i;
	}
	return -1;
}

Render_Pass_States Device_Vk::get_rp_states(
	const Render_Pass& p
) {
	int i;
	Render_Pass_States s{};
	s.colour_count = p.colour_count;
	for (i = 0; i < p.colour_count; i++) {
		Texture_Vk& t =
			*(Texture_Vk*)&get_texture(p.colours[i].id);
		s.colours[i] = t.state;
	}
	if (p.depth.id) {
		Texture_Vk& t =
			*(Texture_Vk*)&get_texture(p.depth.id);
		s.depth = t.state;
	}
	return s;
}

VkAttachmentLoadOp Renderpass_Vk::load_op_from_mode(
	Clear_Mode m
) {
	switch (m) {
		case Clear_Mode::discard: return VK_ATTACHMENT_LOAD_OP_DONT_CARE;
		case Clear_Mode::clear:   return VK_ATTACHMENT_LOAD_OP_CLEAR;
		case Clear_Mode::restore: return VK_ATTACHMENT_LOAD_OP_LOAD;
	}
	assert(0);
	return VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}

void Renderpass_Vk::init(
	Device_Vk* dev,
	const Rpo_Key& rpk
) {
	VkRenderPassCreateInfo ri{};
	VkAttachmentDescription ads[2];
	VkAttachmentReference car, dar;
	VkSubpassDescription sd{};
	VkResult r;
	auto& rp = rpk.rpo;
	auto& states = rpk.states;
	bool has_depth = rp.depth.id;
	int count = 0, i, c = rp.colour_count;
	zero(ads, sizeof ads);
	for (i = 0; i < c; i++) {
		int index = count++;
		auto& colour = rp.colours[i];
		auto& ad = ads[index];
		ad.format = get_vk_format(colour.fmt);
		ad.samples = dev->get_samples(colour.samples);
		ad.loadOp = load_op_from_mode(colour.mode);
		ad.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
		ad.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
		ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
		if (colour.mode == Clear_Mode::restore) {
			Resource_State state = states.colours[i];
			ad.initialLayout = state_to_image_layout(state);
		} else
			ad.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
		ad.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
		car.attachment = index;
		car.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
	}
	if (has_depth) {
		int i = count++;
		auto& ad = ads[i];
		auto& depth = rp.depth;
		ad.format = get_vk_format(dev->get_texture(depth.id).fmt);
		ad.samples = dev->get_samples(depth.samples);
		ad.loadOp = load_op_from_mode(depth.mode);
		ad.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
		ad.stencilLoadOp = ad.loadOp;
		ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
		if (rp.depth.mode == Clear_Mode::restore) {
			Resource_State state = states.depth;
			ad.initialLayout = state_to_image_layout<true>(state);
		} else
			ad.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
		ad.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
		dar.attachment = i;
		dar.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
	}

	sd.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
	sd.colorAttachmentCount = rp.colour_count;
	sd.pColorAttachments = &car;
	sd.pDepthStencilAttachment = has_depth? &dar: 0;

	ri.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
	ri.attachmentCount = count;
	ri.pAttachments = ads;
	ri.subpassCount = 1;
	ri.pSubpasses = &sd;

	r = vkCreateRenderPass(dev->dev, &ri, &dev->ac, &rpo);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a render pass\n");
		pbreak(r);
	}
}

void Renderpass_Vk::destroy(Device_Vk* dev) {
	vkDestroyRenderPass(dev->dev, rpo, &dev->ac);
}

void Framebuffer_Vk::init(
	Device_Vk* dev,
	const Renderpass_Vk& rpo,
	const Render_Pass& rp
) {
	bool has_depth = rp.depth.id;
	int i, count = 0;
	VkImageView atts[2];
	VkResult r;
	VkFramebufferCreateInfo fbi{};
	for (i = 0; i < rp.colour_count; i++) {
		const auto& tar = rp.colours[i];
		const Texture_Vk& texture =
			*(const Texture_Vk*)&dev->get_texture(tar.id);
		atts[count++] = texture.view;
		w = texture.w;
		h = texture.h;
	}
	if (has_depth) {
		const Texture_Vk& texture =
			*(const Texture_Vk*)&dev->get_texture(rp.depth.id);
		atts[count++] = texture.view;
		w = texture.w;
		h = texture.h;
	}
	fbi.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
	fbi.renderPass = rpo.rpo;
	fbi.width = w;
	fbi.height = h;
	fbi.layers = 1;
	fbi.attachmentCount = count;
	fbi.pAttachments = atts;
	r = vkCreateFramebuffer(dev->dev, &fbi, &dev->ac, &fbo);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a framebuffer.\n");
		pbreak(r);
	}
}

void Framebuffer_Vk::destroy(Device_Vk* dev) {
	vkDestroyFramebuffer(dev->dev, fbo, &dev->ac);
}

static int get_image_count(const Swap_Cap& s) {
	const VkSurfaceCapabilitiesKHR& cap = s.cap;
	return cap.minImageCount + (cap.minImageCount < cap.maxImageCount);
}

static VkExtent2D choose_swap_extent(const App& app, const VkSurfaceCapabilitiesKHR& cap) {
	VkExtent2D r = { (uint32_t)app.w, (uint32_t)app.h };
	r.width  = std::min(r.width,  cap.maxImageExtent.width);
	r.height = std::min(r.height, cap.maxImageExtent.height);
	r.width  = std::max(r.width,  cap.minImageExtent.width);
	r.height = std::max(r.height, cap.minImageExtent.height);
	return r;
}

static VkSurfaceFormatKHR choose_swap_format(const Swap_Cap& cap) {
	unsigned i;
	for (i = 0; i < cap.fmt_count; i++) {
		const auto& fmt = cap.fmts[i];
		if (
			fmt.format == VK_FORMAT_B8G8R8A8_SRGB &&
			fmt.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR
		) return fmt;
	}
	print_err("Failed to find a surface that supports VK_FORMAT_B8G8R8A8_SRGB.\n");
	return cap.fmts[0];
}

static VkPresentModeKHR choose_swap_mode(const Swap_Cap& cap, bool vsync) {
	if (!vsync) {
		int i, c = cap.pm_count;
		for (i = 0; i < c; i++)
			if (cap.pms[i] == VK_PRESENT_MODE_IMMEDIATE_KHR)
				return cap.pms[i];
		for (i = 0; i < c; i++)
			if (cap.pms[i] == VK_PRESENT_MODE_MAILBOX_KHR)
				return cap.pms[i];
	}
	return VK_PRESENT_MODE_FIFO_KHR;
}

static VkImageView make_view(
	Device_Vk* dev,
	VkImage image,
	VkFormat fmt,
	VkImageAspectFlags flags,
	VkImageViewType type
) {
	VkImageViewCreateInfo vi{};
	VkResult r;
	VkImageView view;
	vi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
	vi.image = image;
	vi.viewType = type;
	vi.format = fmt;
	vi.subresourceRange.aspectMask = flags;
	vi.subresourceRange.baseMipLevel = 0;
	vi.subresourceRange.levelCount = 1;
	vi.subresourceRange.baseArrayLayer = 0;
	vi.subresourceRange.layerCount = 1;
	r = vkCreateImageView(dev->dev, &vi, &dev->ac, &view);
	if (r != VK_SUCCESS) {
		print_err("Failed to make image view.\n");
		pbreak((int)r);
	}
	return view;
}

void Swapchain::init(const App& app, Device_Vk* dev) {
	swapchain = (VkSwapchainKHR)0;
	textures = 0;
	initr(app, dev);
}

void Swapchain::initr(const App& app, Device_Vk* dev) {
	image_count = get_image_count(dev->swap_cap);
	size = choose_swap_extent(app, dev->swap_cap.cap);
	format = choose_swap_format(dev->swap_cap);
	mode = choose_swap_mode(dev->swap_cap, true);
	{
		VkResult r;
		VkSwapchainCreateInfoKHR si{};
		si.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
		si.surface = dev->surf;
		si.minImageCount = image_count;
		si.imageFormat = format.format;
		si.imageColorSpace = format.colorSpace;
		si.imageExtent = size;
		si.imageArrayLayers = 1;
		si.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
		si.preTransform = dev->swap_cap.cap.currentTransform;
		si.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
		si.presentMode = mode;
		si.clipped = VK_TRUE;
		si.oldSwapchain = swapchain;
		si.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
		r = vkCreateSwapchainKHR(dev->dev, &si, &dev->ac, &swapchain);
		if (r != VK_SUCCESS) {
			print_err("Failed to create swapchain.\n");
			pbreak(r);
		}
	}
	{
		VkResult r;
		VkSemaphoreCreateInfo si{};
		si.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
		r = vkCreateSemaphore(dev->dev, &si, &dev->ac, &image_avail);
		if (r != VK_SUCCESS) {
			print_err("Failed to create a semaphore.\n");
			pbreak(r);
		}
	}
	get_images(dev);
}

void Swapchain::recreate(const App& app, Device_Vk* dev) {
	Swapchain old = *this;
	vkDeviceWaitIdle(dev->dev);
	initr(app, dev);
	old.destroy(dev);
}

Texture_Id Swapchain::create_image(
	Device_Vk* dev,
	VkImage image,
	VkImageView view,
	int w,
	int h
) {
	Texture_Id id = dev->alloc_texture();
	Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id);
	Texture_Vk::init(
		&tex,
		id,
		0,
		image,
		view,
		Vram_Allocator::Allocation::null(),
		Resource_State::undefined,
		texture_format_bgra8i_srgb,
		Texture_Flags::swapchain,
		w,
		h,
		1,
		1,
		1,
		0,
		0,
		true,
		1
	);
	return id;
}

void Swapchain::get_images(Device_Vk* dev) {
	unsigned count;
	int i;
	VkImage* images;
	vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, 0);
	Context& ctx = dev->acquire();
	image_count = count;
	images = (VkImage*)heap_alloc(
		dev->heap,
		sizeof *images * image_count
	);
	textures = (Texture_Id*)heap_alloc(
		dev->heap,
		sizeof *textures * image_count
	);
	vkGetSwapchainImagesKHR(dev->dev, swapchain, &count, images);
	for (i = 0; i < image_count; i++) {
		VkImageView view = make_view(dev,
			images[i],
			format.format,
			VK_IMAGE_ASPECT_COLOR_BIT,
			VK_IMAGE_VIEW_TYPE_2D
		);
		textures[i] = create_image(
			dev,
			images[i],
			view,
			size.width,
			size.height
		);
	}
	dev->submit(ctx);
	heap_free(dev->heap, images);
}

void Swapchain::destroy(Device_Vk* dev) {
	int i;
	for (i = 0; i < image_count; i++)
		dev->destroy_texture(textures[i]);
	vkDestroySemaphore(dev->dev, image_avail, &dev->ac);
	vkDestroySwapchainKHR(dev->dev, swapchain, &dev->ac);
	heap_free(dev->heap, textures);
	textures = 0;
}

Device* Device::create(Arena* a, App* ap) {
	Device_Vk* d = (Device_Vk*)arena_alloc(a, sizeof *d);
	new(d) Device_Vk();
	d->init(a, ap);
	return d;
}

void Device::init(Arena* a, App* ap) {
	void* hm;
	arena = a;
	app = ap;
	hm = arena_alloc(a, device_heap_size);
	heap = (Heap*)arena_alloc(a, sizeof *heap);
	init_heap(heap, hm, device_heap_size);
	hooks = 0;
	((Device_Vk*)this)->init_internal();
}

void Device::destroy() {
	((Device_Vk*)this)->deinit_internal();
}

void Device::register_hooks(Device_Debug_Hooks* h) {
	h->dev = this;
	hooks = h;
}

void Device::on_resize() {
	((Device_Vk*)this)->on_resize_internal(app->w, app->h);
}

void Device::begin_frame() {
	Device_Vk* dev = (Device_Vk*)this;
	dev->collect_garbage();
	dev->current_ctx = (Context_Vk*)&acquire();
	dev->terminator_index++;
	dev->terminator_index %= dev->swapchain.image_count;
	dev->terminators[dev->terminator_index].execute(dev);
	vkAcquireNextImageKHR(
		dev->dev,
		dev->swapchain.swapchain,
		UINT64_MAX,
		dev->swapchain.image_avail,
		VK_NULL_HANDLE,
		&dev->backbuffer_index
	);
	dev->backbuffer_id = dev->swapchain.textures[dev->backbuffer_index];
}

void Device::submit(Context& ctx_) {
	Context_Vk* ctx = (Context_Vk*)&ctx_;
	Device_Vk* dev = (Device_Vk*)this;
	VkSubmitInfo si{};
	si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
/*	si.waitSemaphoreCount = 1;
	si.pWaitSemaphores = &ctx->semaphore;
	si.pWaitDstStageMask = &stage;
	si.signalSemaphoreCount = 1;
	si.pSignalSemaphores = &ctx->semaphore;*/
	si.commandBufferCount = 1;
	si.pCommandBuffers = &ctx->cb;
	ctx->check_end_rp();
	vkEndCommandBuffer(ctx->cb);
	vkQueueSubmit(dev->queue, 1, &si, ctx->fence);
	ctx->wait();
#ifdef DEBUG
	if (dev->hooks)
		dev->hooks->on_submit(*ctx);
#endif
	ctx->release();
}

void Device::present() {
	Device_Vk* dev = (Device_Vk*)this;
	Context_Vk* ctx = dev->current_ctx;
	VkPresentInfoKHR pi{};
	VkSubmitInfo si{};
	VkPipelineStageFlags stage =
		VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
	heap_defrag(dev->heap);
	// ^ this makes it >4x the speed
	ctx->check_end_rp();
	ctx->transition(
		dev->get_backbuffer(),
		Resource_State::presentable
	);
	si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
	si.waitSemaphoreCount = 1;
	si.pWaitSemaphores = &dev->swapchain.image_avail;
	si.pWaitDstStageMask = &stage;
	si.signalSemaphoreCount = 1;
	si.pSignalSemaphores = &ctx->semaphore;
	si.commandBufferCount = 1;
	si.pCommandBuffers = &ctx->cb;
	vkEndCommandBuffer(ctx->cb);
	vkQueueSubmit(dev->queue, 1, &si, ctx->fence);
	pi.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
	pi.waitSemaphoreCount = 1;
	pi.pWaitSemaphores = &ctx->semaphore;
	pi.swapchainCount = 1;
	pi.pSwapchains = &dev->swapchain.swapchain;
	pi.pImageIndices = &dev->backbuffer_index;
	vkQueuePresentKHR(dev->queue, &pi);
#ifdef DEBUG
	if (hooks) {
		hooks->on_submit(*ctx);
		hooks->on_present(*ctx);
	}
#endif
	ctx->release();
}

Texture_Id Device::get_backbuffer() {
	return ((Device_Vk*)this)->backbuffer_id;
}

Texture_Id Device::get_depth_target() {
	return ((Device_Vk*)this)->depth;
}

Texture& Device::get_texture(Texture_Id id) {
	assert(id.index);
	return ((Device_Vk*)this)->textures[id];
}

Texture_Id Device_Vk::alloc_texture() {
	Texture_Vk tex{};
	Texture_Id id(texture_count++);
	tex.id = id;
	textures.set(id, tex);
	return id;
}

Buffer_Id Device_Vk::alloc_buffer() {
	Buffer_Vk buf{};
	Buffer_Id id(buffer_count++);
	buf.id = id;
	buffers.set(id, buf);
	return id;
}

Vertex_Format_Id Device_Vk::alloc_vf() {
	Vertex_Format_Vk vf{};
	Vertex_Format_Id id(vertex_format_count++);
	vertex_formats.set(id, vf);
	return id;
}
Vertex_Format_Id Device::create_vertex_format(
	const Vertex_Format_Desc& desc
) {
	Device_Vk* dev = (Device_Vk*)this;
	Vertex_Format_Id id = dev->alloc_vf();
	dev->vertex_formats[id].init(dev, desc);
	return id;
}
void Device::destroy_vertex_format(Vertex_Format_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	Vertex_Format_Vk& vf = dev->vertex_formats[id];
	vf.destroy(dev);
}

Shader_Id Device_Vk::alloc_shader() {
	Shader_Vk s{};
	Shader_Id id(shader_count++);
	s.id = id;
	shaders.set(id, s);
	return id;
}

Sampler_Id Device_Vk::alloc_sampler() {
	Sampler_Vk s{};
	Sampler_Id id(sampler_count++);
	samplers.set(id, s);
	return id;
}

void Device::destroy_texture(Texture_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	dev->queue_destroy((Texture_Vk*)&dev->get_texture(id));
}

void Device::destroy_texturei(Texture_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	((Texture_Vk*)&dev->get_texture(id))->destroy(dev);
}

void Context::wait() {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	vkWaitForFences(
		dev->dev,
		1,
		&ctx->fence,
		VK_TRUE,
		UINT64_MAX
	);
}

void Context::submit(
	const Draw& draw,
	const Pipeline& p,
	const Render_Pass& rp
) {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	Vertex_Buffer_Binding* binding;
	Rpo_Key rpo_key = { rp, dev->get_rp_states(rp) };
	Pso_Key pso_key = { p, rpo_key };
	Pipeline_Vk& pso = dev->get_pso(pso_key);
	Texture_Vk& target = *(Texture_Vk*)&dev->get_texture(
		dev->get_backbuffer()
	);
	target.state = Resource_State::render_target;
	if (pso.pip != ctx->last_pso)
		vkCmdBindPipeline(
			ctx->cb,
			VK_PIPELINE_BIND_POINT_GRAPHICS,
			pso.pip
		);
	ctx->last_pso = pso.pip;
	ctx->submit_descriptors(pso, p);
	auto [rpo, fbo] = ctx->begin_rp(rpo_key);
	for (binding = draw.verts; binding->id; binding++) {
		VkBuffer buf = ((Buffer_Vk*)&dev->get_buffer(binding->id))->buf;
		VkDeviceSize offset = (VkDeviceSize)binding->offset;
		vkCmdBindVertexBuffers(ctx->cb, 0, 1, &buf, &offset);
	}
	if (draw.inds.id) {
		const Index_Buffer_Binding& inds = draw.inds;
		VkBuffer buf = ((Buffer_Vk*)&dev->get_buffer(inds.id))->buf;
		VkDeviceSize offset = (VkDeviceSize)inds.offset;
		vkCmdBindIndexBuffer(
			ctx->cb,
			buf,
			offset,
			VK_INDEX_TYPE_UINT16
		);
		vkCmdDrawIndexed(
			ctx->cb,
			draw.vertex_count,
			draw.instance_count,
			draw.first_vertex,
			draw.vertex_offset,
			draw.first_instance
		);
	} else {
		vkCmdDraw(
			ctx->cb,
			draw.vertex_count,
			draw.instance_count,
			draw.first_vertex,
			draw.first_instance
		);
	}
	ctx->end_rp(rp, rpo, fbo);
	pso.on_submit();
}

void Context::submit(
	const Draw* draws,
	int count,
	const Pipeline& p,
	const Render_Pass& rp
) {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	(void)draws;
	(void)count;
	(void)p;
	(void)rp;
	(void)dev;
	assert(0);
	/* todo */
}

void Context::copy(Buffer_Id dst, Buffer_Id src) {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	Buffer_Vk& a = *(Buffer_Vk*)&dev->get_buffer(dst);
	Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src);
	VkBufferCopy region{};
	region.srcOffset = 0;
	region.dstOffset = 0;
	region.size = b.size;
	ctx->check_end_rp();
	vkCmdCopyBuffer(
		ctx->cb,
		b.buf,
		a.buf,
		1,
		&region
	);
}

void Context::copy(Texture_Id dst, Buffer_Id src) {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	Texture_Vk& a = *(Texture_Vk*)&dev->get_texture(dst);
	Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src);
	VkBufferImageCopy c{};
	transition(dst, Resource_State::copy_dst);
	c.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	c.imageSubresource.layerCount = 1;
	c.imageExtent.width = a.w;
	c.imageExtent.height = a.h;
	c.imageExtent.depth = 1;
	ctx->check_end_rp();
	vkCmdCopyBufferToImage(
		ctx->cb,
		b.buf,
		a.image,
		VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
		1,
		&c
	);
}

void Context::copy(
	Texture_Id dst,
	Buffer_Id src,
	int mip,
	int x,
	int y,
	int w,
	int h
) {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	Texture_Vk& a = *(Texture_Vk*)&dev->get_texture(dst);
	Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(src);
	VkBufferImageCopy c{};
	transition(dst, Resource_State::copy_dst);
	c.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	c.imageSubresource.layerCount = 1;
	c.imageSubresource.mipLevel = mip;
	c.imageExtent.width = w;
	c.imageExtent.height = h;
	c.imageExtent.depth = 1;
	c.imageOffset.x = x;
	c.imageOffset.y = y;
	ctx->check_end_rp();
	vkCmdCopyBufferToImage(
		ctx->cb,
		b.buf,
		a.image,
		VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
		1,
		&c
	);
}

void Context::resolve(Texture_Id dst, Texture_Id src) {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	Texture_Vk& d = *(Texture_Vk*)&dev->get_texture(dst);
	Texture_Vk& s = *(Texture_Vk*)&dev->get_texture(src);
	VkImageResolve r{};
	r.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	r.srcSubresource.layerCount = 1;
	r.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	r.dstSubresource.layerCount = 1;
	r.extent.width = d.w;
	r.extent.height = d.h;
	r.extent.depth = 1;
	assert(d.w == s.w);
	assert(d.h == s.h);
	assert(d.d == 1 && s.d == 1);
	assert(d.samples == 1 && s.samples > 1);
	ctx->check_end_rp();
	transition(src, Resource_State::copy_src);
	transition(dst, Resource_State::copy_dst);
	vkCmdResolveImage(
		ctx->cb,
		s.image,
		state_to_image_layout(s.state),
		d.image,
		state_to_image_layout(d.state),
		1,
		&r
	);
}

void Context::transition(Texture_Id id, Resource_State state) {
	Context_Vk* ctx = (Context_Vk*)this;
	Device_Vk* dev = ctx->dev;
	Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id);
	VkImageMemoryBarrier b{};
	VkImageLayout src_layout = state_to_image_layout(tex.state);
	VkImageLayout dst_layout = state_to_image_layout(state);
	VkPipelineStageFlags src_stage, dst_stage;
	if (tex.parent) {
		transition(tex.parent, state);
		tex.state = state;
		return;
	}
	if (tex.state == state) return;
	ctx->check_end_rp();
	tex.state = state;
	b.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
	b.oldLayout = src_layout;
	b.newLayout = dst_layout;
	b.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
	b.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
	b.image = tex.image;
	b.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	b.subresourceRange.baseMipLevel = tex.start_mip;
	b.subresourceRange.levelCount = tex.mip_count;
	b.subresourceRange.baseArrayLayer = tex.start_array;
	b.subresourceRange.layerCount = tex.array_size;
	if (
		tex.fmt == texture_format_d16 ||
		tex.fmt == texture_format_d24s8 ||
		tex.fmt == texture_format_d32
	) {
		if (src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
			src_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
		if (dst_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
			dst_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
		if (tex.fmt == texture_format_d24s8) {
			b.subresourceRange.aspectMask =
				VK_IMAGE_ASPECT_DEPTH_BIT |
				VK_IMAGE_ASPECT_STENCIL_BIT;
		} else
			b.subresourceRange.aspectMask =
				VK_IMAGE_ASPECT_DEPTH_BIT;
		b.oldLayout = src_layout;
		b.newLayout = dst_layout;
	}
	if (
		src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
		dst_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
	) {
		b.srcAccessMask = 0;
		b.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
		src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
		dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
	) {
		b.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
		b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
		src_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
		dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
	) {
		b.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
		b.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
		src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
		dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
		dst_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
	) {
		b.srcAccessMask = 0;
		b.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
		src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
		dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
		dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
	) {
		b.srcAccessMask = 0;
		b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
		src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
		dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
	) {
		b.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
		b.dstAccessMask = 0;
		src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
		dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
	) {
		b.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
		b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
		src_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
		dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
	) {
		b.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
		b.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
		src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
		dst_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
		dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
	) {
		b.srcAccessMask = 0;
		b.dstAccessMask = 0;
		src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
		dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
		dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
	) {
		b.srcAccessMask = 0;
		b.dstAccessMask =
			VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
			VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
		src_stage =
			VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
			VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
		dst_stage =
			VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
			VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
	) {
		b.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
		b.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
		src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
		dst_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
	} else if (
		src_layout ==  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
	) {
		b.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
		b.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
		src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
		dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
	} else if (
		src_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
		dst_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
	) {
		b.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
		b.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
		src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
		dst_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
	} else {
		print_err("Bad resource transition.\n");
		assert(0);
		b.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
		b.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
		src_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
		dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
	}
	vkCmdPipelineBarrier(
		ctx->cb,
		src_stage,
		dst_stage,
		0,
		0,
		0,
		0,
		0,
		1,
		&b
	);
}

void Context::debug_push(const char* name) {
#ifdef DEBUG
	VkDebugUtilsLabelEXT l{};
	Context_Vk* ctx = (Context_Vk*)this;
	l.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
	l.pLabelName = name;
	vkCmdBeginDebugUtilsLabelEXT(ctx->cb, &l);
#else
	(void)name;
#endif
}

void Context::debug_pop() {
#ifdef DEBUG
	Context_Vk* ctx = (Context_Vk*)this;
	vkCmdEndDebugUtilsLabelEXT(ctx->cb);
#endif
}

std::pair<Renderpass_Vk&, Framebuffer_Vk&> Context_Vk::begin_rp(
	const Rpo_Key& rpk
) {
	const Render_Pass& rp = rpk.rpo;
	Renderpass_Vk& rpo = dev->get_rpo(rpk);
	Framebuffer_Vk& fbo = dev->get_fbo(rpo, { rp });
	VkRenderPassBeginInfo rpbi{};
	VkClearValue clears[max_colour_attachments + 1];
	VkExtent2D extent{ (uint32_t)fbo.w, (uint32_t)fbo.h };
	int i, c = rp.colour_count, clear_count = 0;
	bool has_depth = rp.depth.id;
	if (last_rpo == rpo.rpo && last_fbo == fbo.fbo)
		return { rpo, fbo };
	check_end_rp();
	last_rpo = 0;
	last_fbo = 0;
	for (i = 0; i < c; i++) {
		VkClearValue clear{};
		const auto& tar = rp.colours[i];
		auto& tex = *(Texture_Vk*)&dev->get_texture(tar.id);
		if (tex.parent)
			transition(tex.id, render_target);
		const auto col = tar.clear.colour;
		clear.color.float32[0] = (float)col.r / 255.0f;
		clear.color.float32[1] = (float)col.g / 255.0f;
		clear.color.float32[2] = (float)col.b / 255.0f;
		clear.color.float32[3] = (float)col.a / 255.0f;
		clears[clear_count++] = clear;
	}
	if (has_depth) {
		VkClearValue dc{};
		auto& tex = *(Texture_Vk*)&dev->get_texture(rp.depth.id);
		if (tex.parent)
			transition(tex.id, render_target);
		dc.depthStencil.depth = rp.depth.clear.depth;
		dc.depthStencil.stencil = 0; /* todo */
		clears[clear_count++] = dc;
	}
	rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
	rpbi.renderPass = rpo.rpo;
	rpbi.framebuffer = fbo.fbo;
	rpbi.renderArea.extent = extent;
	rpbi.clearValueCount = clear_count;
	rpbi.pClearValues = clears;
	vkCmdBeginRenderPass(
		cb,
		&rpbi,
		VK_SUBPASS_CONTENTS_INLINE
	);
	last_rpo = rpo.rpo;
	last_fbo = fbo.fbo;
	return { rpo, fbo };
}

void Context_Vk::end_rp(
	const Render_Pass& rp,
	Renderpass_Vk& rpo,
	Framebuffer_Vk& fbo
) {
	bool has_depth = rp.depth.id;
	int i, c = rp.colour_count;
	for (i = 0; i < c; i++){
		Texture_Vk& tex =
			*(Texture_Vk*)&dev->get_texture(rp.colours[i].id);
		tex.state = Resource_State::render_target;
	}
	if (has_depth) {
		Texture_Vk& tex =
			*(Texture_Vk*)&dev->get_texture(rp.depth.id);
		tex.state = Resource_State::render_target;
	}
	rpo.on_submit();
	fbo.on_submit();
}

void Context_Vk::check_end_rp() {
	if (last_rpo) {
		vkCmdEndRenderPass(cb);
		last_rpo = 0;
	}
}

void Context_Vk::submit_descriptors(
	const Pipeline_Vk& pso,
	const Pipeline& p
) {
	Descriptor_Set_Vk* dso = 0;
	if (p.descriptor_count) {
		dso = &dev->get_dso(pso, *(Dso_Key*)&p);
		if (dso->dset == last_dso) return;

		int i, c = p.descriptor_count;
		for (i = 0; i < c; i++) {
			const auto& desc = p.descriptors[i];
			switch (desc.type) {
				case Descriptor::Type::texture: {
					const auto& td = *(const Texture_Descriptor*)desc.payload;
					transition(td.texture, Resource_State::shader_read);
				} break;
				case Descriptor::Type::constant_buffer:
					break; /* todo */
				case Descriptor::Type::structured_buffer:
					break; /* todo */
			}
		}
		vkCmdBindDescriptorSets(
			cb,
			VK_PIPELINE_BIND_POINT_GRAPHICS,
			pso.lay,
			0,
			1,
			&dso->dset,
			0,
			0
		);
		last_dso = dso->dset;
		dso->on_submit();
	}
}

void Context::submit(const Render_Pass& rp) {
	Context_Vk* ctx = (Context_Vk*)this;
	auto [rpo, fbo] = ctx->begin_rp({
		rp,
		ctx->dev->get_rp_states(rp)
	});
	ctx->end_rp(rp, rpo, fbo);
}

void Context_Vk::init_pool() {
	VkCommandPoolCreateInfo pi{};
	VkResult r;
	pi.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
	pi.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
	pi.queueFamilyIndex = (uint32_t)dev->queue_index;
	r = vkCreateCommandPool(dev->dev, &pi, &dev->ac, &pool);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a command pool.\n");
		pbreak(r);
	}
}

void Context_Vk::init_cb() {
	VkCommandBufferAllocateInfo ci{};
	VkResult r;
	ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
	ci.commandPool = pool;
	ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
	ci.commandBufferCount = 1;
	r = vkAllocateCommandBuffers(dev->dev, &ci, &cb);
	if (r != VK_SUCCESS) {
		print_err("Failed to allocate a command buffer.\n");
		pbreak(r);
	}
}

void Context_Vk::init_sync() {
	VkFenceCreateInfo fi{};
	VkSemaphoreCreateInfo si{};
	VkResult r;
	fi.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
	fi.flags = VK_FENCE_CREATE_SIGNALED_BIT;
	r = vkCreateFence(dev->dev, &fi, &dev->ac, &fence);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a fence.\n");
		pbreak(r);
	}
	si.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
	r = vkCreateSemaphore(dev->dev, &si, &dev->ac, &semaphore);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a semaphore.\n");
		pbreak(r);
	}
}

void Context_Vk::init(Device_Vk* device) {
	dev = device;
	init_pool();
	init_cb();
	init_sync();
	state |= context_state_init;
}

void Context_Vk::begin_record() {
	VkCommandBufferBeginInfo bi{};
	bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
	wait();
	vkResetFences(dev->dev, 1, &fence);
	vkResetCommandBuffer(cb, 0);
	vkBeginCommandBuffer(cb, &bi);
	last_pso = 0;
	last_dso = 0;
	last_rpo = 0;
	last_fbo = 0;
}

Context_Vk& Context_Vk::acquire(Device_Vk* device) {
	if (~state & context_state_init)
		init(device);
	state &= ~context_state_avail;
	begin_record();
#ifdef DEBUG
	if (device->hooks)
		device->hooks->on_acquire(*this);
#endif
	return *this;
}

void Context_Vk::release() {
	state |= context_state_avail;
}

void Context_Vk::destroy() {
	state &= ~context_state_init;
	vkDestroyCommandPool(dev->dev, pool, &dev->ac);
	vkDestroySemaphore(dev->dev, semaphore, &dev->ac);
	vkDestroyFence(dev->dev, fence, &dev->ac);
}

Context& Device::acquire() {
	Device_Vk* vk = (Device_Vk*)this;
	int i;
	for (i = 0; i < max_contexts; i++) {
		if (vk->contexts[i].state & context_state_avail)
			return vk->contexts[i].acquire(vk);
	}
	print_err("Too many active contexts!\n");
	print("Probably a submit was missed.\n");
	pbreak(10000);
	return vk->contexts[0];
}

Context& Device::get_ctx() {
	Device_Vk* vk = (Device_Vk*)this;
	return *vk->current_ctx;
}

void Pipeline_Vk::init_stages(
	Arena& scope,
	Device_Vk* dev,
	VkGraphicsPipelineCreateInfo& info,
	const Pipeline& desc
) {
	int count = 0, i;
	Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader);
	for (i = 0; i < shader_type_count; i++) {
		if (shader.modules[i])
			count++;
	}
	VkPipelineShaderStageCreateInfo* sis =
		(VkPipelineShaderStageCreateInfo*)arena_alloc(
			&scope,
			sizeof *sis * count
		);
	zero(sis, sizeof *sis * count);
	for (i = 0, count = 0; i < shader_type_count; i++) {
		if (shader.modules[i]) {
			int idx = shader.find_module(
				(Shader_Type)i,
				desc.shader_masks[i]
			);
			VkShaderModule mod;
			if (idx < 0) {
				mod = shader.modules[i][0].mod;
				print_war("Shader variant not found; using the default >~<\n");
				print("  ^ mask was 0x%x\n", desc.shader_masks[i]);
			} else
				mod = shader.modules[i][idx].mod;
			assert(idx >= 0);
			auto& si = sis[i];
			si.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
			si.flags = 0;
			si.stage = Shader_Vk::stage((Shader_Type)i);
			si.module = mod;
			si.pName = "main";
			count++;
		}
	}
	info.stageCount = count;
	info.pStages = sis;
}

void Pipeline_Vk::init_vertex_input(
	Arena& scope,
	Device_Vk* dev,
	VkGraphicsPipelineCreateInfo& info,
	const Pipeline& desc
) {
	Vertex_Format_Vk vf = dev->vertex_formats[desc.vertex_format];
	VkPipelineVertexInputStateCreateInfo& vi = 
		*(VkPipelineVertexInputStateCreateInfo*)arena_alloc(
			&scope,
			sizeof vi
		);
	zero(&vi, sizeof vi);
	auto& shader = dev->get_shader(desc.shader);
	if (shader.vf != desc.vertex_format) {
		auto shader_vf = (Vertex_Format_Vk*)&dev->vertex_formats[shader.vf];
		vf.clone(&scope);
		vf.optimise(shader_vf);
	}
	vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
	vi.vertexBindingDescriptionCount = vf.binding_count;
	vi.pVertexBindingDescriptions = vf.bindings;
	vi.vertexAttributeDescriptionCount = vf.attr_count;
	vi.pVertexAttributeDescriptions = vf.attrs;
	info.pVertexInputState = &vi;
}

void Pipeline_Vk::init_input_assembly(
	Arena& scope,
	Device_Vk* dev,
	VkGraphicsPipelineCreateInfo& info,
	const Pipeline& desc
) {
	VkPipelineInputAssemblyStateCreateInfo& ia = 
		*(VkPipelineInputAssemblyStateCreateInfo*)arena_alloc(
			&scope,
			sizeof ia
		);
	(void)dev;
	zero(&ia, sizeof ia);
	ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
	ia.topology = get_topology(desc.geo);
	info.pInputAssemblyState = &ia;
}

void Pipeline_Vk::init_viewport(
	Arena& scope,
	VkGraphicsPipelineCreateInfo& info,
	const Pipeline& desc
) {
	VkPipelineViewportStateCreateInfo& vi = 
		*(VkPipelineViewportStateCreateInfo*)arena_alloc(
			&scope,
			sizeof vi
		);
	VkRect2D& scissor = *(VkRect2D*)arena_alloc(
		&scope,
		sizeof scissor
	);
	VkViewport& viewport = *(VkViewport*)arena_alloc(
		&scope,
		sizeof viewport
	);
	zero(&vi, sizeof vi);
	zero(&scissor, sizeof scissor);
	zero(&viewport, sizeof viewport);
	scissor.offset.x = desc.scissor[0];
	scissor.offset.y = desc.scissor[1];
	scissor.extent.width  = desc.scissor[2];
	scissor.extent.height = desc.scissor[3];
	viewport.x = desc.viewport[0];
	viewport.y = desc.viewport[1];
	viewport.width = desc.viewport[2];
	viewport.height = desc.viewport[3];
	viewport.minDepth = 0.0f;
	viewport.maxDepth = 1.0f;
	vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
	vi.viewportCount = 1;
	vi.pViewports = &viewport;
	vi.scissorCount = 1;
	vi.pScissors = &scissor;
	info.pViewportState = &vi;
}

void Pipeline_Vk::init_rasterisation(
	Arena& scope,
	Device_Vk* dev,
	VkGraphicsPipelineCreateInfo& info,
	const Pipeline& desc
) {
	VkPipelineRasterizationStateCreateInfo& ri = 
		*(VkPipelineRasterizationStateCreateInfo*)arena_alloc(
			&scope,
			sizeof ri
		);
	(void)dev;
	zero(&ri, sizeof ri);
	ri.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
	ri.depthClampEnable = VK_FALSE;
	ri.rasterizerDiscardEnable = VK_FALSE;
	ri.polygonMode = VK_POLYGON_MODE_FILL;
	ri.lineWidth = 1.0f;
	ri.cullMode = get_vk_cull_mode(desc.cull_mode);
	ri.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
	ri.depthBiasEnable = VK_FALSE;
	info.pRasterizationState = &ri;
}

void Pipeline_Vk::init_msaa(
	Arena& scope,
	Device_Vk* dev,
	VkGraphicsPipelineCreateInfo& info,
	const Pipeline& desc,
	const Render_Pass& rpo
) {
	VkPipelineMultisampleStateCreateInfo& mi = 
		*(VkPipelineMultisampleStateCreateInfo*)arena_alloc(
			&scope,
			sizeof mi
		);
	(void)dev;
	(void)desc;
	zero(&mi, sizeof mi);
	mi.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
	mi.sampleShadingEnable = VK_FALSE;
	mi.rasterizationSamples = dev->get_samples(rpo.get_samples());
	info.pMultisampleState = &mi;
}

void Pipeline_Vk::init_depthstencil(
	Arena& scope,
	Device_Vk* dev,
	VkGraphicsPipelineCreateInfo& info,
	const Pipeline& desc
) {
	VkPipelineDepthStencilStateCreateInfo& ds = 
		*(VkPipelineDepthStencilStateCreateInfo*)arena_alloc(
			&scope,
			sizeof ds
		);
	(void)dev;
	(void)desc;
	zero(&ds, sizeof ds);
	ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
	ds.depthTestEnable = desc.depth_test;
	ds.depthWriteEnable = desc.depth_write;
	ds.depthCompareOp = get_compare_op(desc.depth_mode);
	ds.depthBoundsTestEnable = VK_FALSE;
	ds.stencilTestEnable = VK_FALSE;
	info.pDepthStencilState = &ds;
}

void Pipeline_Vk::init_blending(
	Arena& scope,
	Device_Vk* dev,
	VkGraphicsPipelineCreateInfo& info,
	const Render_Pass& rp,
	const Pipeline& desc
) {
	VkPipelineColorBlendStateCreateInfo& bi = 
		*(VkPipelineColorBlendStateCreateInfo*)arena_alloc(
			&scope,
			sizeof bi
		);
	VkPipelineColorBlendAttachmentState* abs;
	(void)dev;
	(void)desc;
	zero(&bi, sizeof bi);
	if (rp.colour_count) {
		int i, c = rp.colour_count;
		abs =
		(VkPipelineColorBlendAttachmentState*)arena_alloc(
			&scope,
			sizeof abs * c
		);
		zero(abs, sizeof *abs);
		for (i = 0; i < c; i++) {
			auto& ab = abs[i];
			ab.colorWriteMask =
				VK_COLOR_COMPONENT_R_BIT |
				VK_COLOR_COMPONENT_G_BIT |
				VK_COLOR_COMPONENT_B_BIT |
				VK_COLOR_COMPONENT_A_BIT;
			ab.blendEnable = desc.blend_enable;
			if (desc.blend_enable) {
				ab.srcColorBlendFactor =
					get_vk_blend_factor(desc.blend_src);
				ab.dstColorBlendFactor =
					get_vk_blend_factor(desc.blend_dst);
				ab.srcAlphaBlendFactor =
					get_vk_blend_factor(desc.blend_src_alpha);
				ab.dstAlphaBlendFactor =
					get_vk_blend_factor(desc.blend_dst_alpha);
				ab.colorBlendOp = get_vk_blend_op(desc.blend_mode);
				ab.alphaBlendOp = get_vk_blend_op(desc.blend_mode_alpha);
			}
		}
	}
	bi.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
	bi.flags = 0;
	bi.logicOpEnable = VK_FALSE;
	bi.attachmentCount = rp.colour_count;
	bi.pAttachments = abs;
	info.pColorBlendState = &bi;
}

void Pipeline_Vk::init_descriptors(
	Device_Vk* dev,
	const Pipeline& desc
) {
	const Descriptor* sdescs = desc.descriptors;
	Shader_Vk& shader = *(Shader_Vk*)&dev->get_shader(desc.shader);
	VkResult r;
	int count = desc.descriptor_count;
	int i;
	{
		VkDescriptorSetLayoutBinding* descs =
			(VkDescriptorSetLayoutBinding*)heap_alloc(
				dev->heap,
				count * sizeof *descs
			);
		VkDescriptorSetLayoutCreateInfo di{};
		zero(descs, count * sizeof *descs);
		for (i = 0; i < count; i++) {
			int j, stage;
			auto& dst = descs[i];
			auto& src = sdescs[i];
			switch (src.type) {
				case Descriptor::Type::texture:
					dst.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
					break;
				case Descriptor::Type::constant_buffer:
					dst.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
					break;
				case Descriptor::Type::structured_buffer:
					dst.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
					break;
			}
			dst.binding = src.slot;
			dst.descriptorCount = 1;
			dst.stageFlags = 0;
			stage = shader.descriptor_stage(src.slot);
			for (j = 0; j < shader_type_count; j++) {
				if (stage & (1 << j)) {
					dst.stageFlags |= Shader_Vk::stage((Shader_Type)j);
				}
			}
		}
		di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
		di.bindingCount = (uint32_t)count;
		di.pBindings = descs;
		r = vkCreateDescriptorSetLayout(
			dev->dev,
			&di,
			&dev->ac,
			&dlay
		);
		if (r != VK_SUCCESS) {
			print_err("Failed to create descriptor set layout.\n");
			pbreak(r);
		}
		heap_free(dev->heap, descs);
	}
}

VkCompareOp Pipeline_Vk::get_compare_op(Depth_Mode m) {
	switch (m) {
		case Depth_Mode::less:          return VK_COMPARE_OP_LESS;
		case Depth_Mode::less_equal:    return VK_COMPARE_OP_LESS_OR_EQUAL;
		case Depth_Mode::equal:         return VK_COMPARE_OP_EQUAL;
		case Depth_Mode::greater:       return VK_COMPARE_OP_GREATER;
		case Depth_Mode::greater_equal: return VK_COMPARE_OP_GREATER_OR_EQUAL;
		case Depth_Mode::always:        return VK_COMPARE_OP_ALWAYS;
		case Depth_Mode::never:         return VK_COMPARE_OP_NEVER;
	}
	assert(0);
	return VK_COMPARE_OP_LESS;
}

void Pipeline_Vk::init_layout(
	Device_Vk* dev,
	const Pipeline& desc
) {
	VkResult r;
	VkPipelineLayoutCreateInfo li{};
	(void)desc;
	int set_count = desc.descriptor_count? 1: 0;
	if (set_count)
		init_descriptors(dev, desc);
	else
		dlay = VK_NULL_HANDLE;
	li.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
	li.setLayoutCount = set_count;
	li.pSetLayouts = &dlay;
	li.pushConstantRangeCount = 0;
	r = vkCreatePipelineLayout(
		dev->dev,
		&li,
		&dev->ac,
		&lay
	);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a pipeline layout.\n");
		pbreak(r);
	}
}

void Pipeline_Vk::init(Device_Vk* dev, const Pso_Key& key) {
	char buffer[1024];
	Arena scope;
	VkResult r;
	const auto& desc = key.pso;
	VkGraphicsPipelineCreateInfo info{};
	init_arena(&scope, buffer, sizeof buffer);
	init_layout(dev, desc);
	init_stages(scope, dev, info, desc);
	init_vertex_input(scope, dev, info, desc);
	init_input_assembly(scope, dev, info, desc);
	init_viewport(scope, info, desc);
	init_rasterisation(scope, dev, info, desc);
	init_msaa(scope, dev, info, desc, key.rpo.rpo);
	init_depthstencil(scope, dev, info, desc);
	init_blending(scope, dev, info, key.rpo.rpo, desc);
	info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
	info.flags = 0;
	info.renderPass = dev->get_rpo(key.rpo).rpo;
	info.subpass = 0;
	info.layout = lay;
	r = vkCreateGraphicsPipelines(
		dev->dev,
		VK_NULL_HANDLE,
		1,
		&info,
		&dev->ac,
		&pip
	);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a pipeline.\n");
		pbreak(r);
	}
}

void Pipeline_Vk::destroy(Device_Vk* dev) {
	if (dlay)
		vkDestroyDescriptorSetLayout(dev->dev, dlay, &dev->ac);
	vkDestroyPipelineLayout(dev->dev, lay, &dev->ac);
	vkDestroyPipeline(dev->dev, pip, &dev->ac);
}

void Descriptor_Set_Vk::init(
	Device_Vk* dev,
	const Pipeline_Vk& pip,
	const Pipeline& desc
) {
	int count = desc.descriptor_count, i;
	int sampler_count = 0, cbuffer_count = 0, sbuffer_count = 0;
	int size_count = 0;
	VkDescriptorSetAllocateInfo da{};
	VkDescriptorPoolSize sizes[4];
	VkResult r;
	for (i = 0; i < count; i++) {
		auto& src = desc.descriptors[i];
		switch (src.type) {
			case Descriptor::Type::texture:
				sampler_count++;
				break;
			case Descriptor::Type::constant_buffer:
				cbuffer_count++;
				break;
			case Descriptor::Type::structured_buffer:
				sbuffer_count++;
				break;
		}
	}
	if (sampler_count) {
		int idx = size_count++;
		sizes[idx] = {
			.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
			.descriptorCount = (uint32_t)sampler_count
		};
	}
	if (cbuffer_count) {
		int idx = size_count++;
		sizes[idx] = {
			.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
			.descriptorCount = (uint32_t)cbuffer_count
		};
	}
	if (sbuffer_count) {
		int idx = size_count++;
		sizes[idx] = {
			.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
			.descriptorCount = (uint32_t)sbuffer_count
		};
	}
	{
		VkDescriptorPoolCreateInfo di{};
		di.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
		di.poolSizeCount = (uint32_t)size_count;
		di.pPoolSizes = sizes;
		di.maxSets = (uint32_t)count;
		r = vkCreateDescriptorPool(dev->dev, &di, &dev->ac, &dp);
		if (r != VK_SUCCESS) {
			print_err("Failed to create a descriptor pool.\n");
			pbreak(r);
		}
	}
	da.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
	da.descriptorPool = dp;
	da.descriptorSetCount = 1;
	da.pSetLayouts = &pip.dlay;
	r = vkAllocateDescriptorSets(
		dev->dev,
		&da,
		&dset
	);
	if (r != VK_SUCCESS) {
		print_err("Failed to allocate descriptor set.\n");
		pbreak(r);
	}
	for (i = 0; i < count; i++) {
		VkDescriptorImageInfo img{};
		VkDescriptorBufferInfo buf{};
		VkWriteDescriptorSet wd{};
		auto& src = desc.descriptors[i];
		wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
		wd.dstSet = dset;
		wd.dstBinding = src.slot;
		wd.dstArrayElement = 0;
		wd.descriptorCount = 1;

		switch (src.type) {
			case Descriptor::Type::texture: {
				Texture_Descriptor* td = (Texture_Descriptor*)src.payload;
				Texture_Vk& t = *(Texture_Vk*)&dev->get_texture(td->texture);
				Sampler_Vk& s = *(Sampler_Vk*)&dev->samplers[td->sampler];
				assert(td->texture);
				assert(td->sampler);
				img.imageView = t.view;
				img.sampler = s.sampler;
				img.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
				wd.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
				wd.pImageInfo = &img;
			} break;
			case Descriptor::Type::constant_buffer: {
				Buffer_Descriptor* cd =
					(Buffer_Descriptor*)src.payload;
				Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(cd->buffer);
				assert(cd->buffer);
				buf.buffer = b.buf;
				buf.offset = cd->offset;
				buf.range = cd->size? cd->size: b.size;
				wd.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
				wd.pBufferInfo = &buf;
			} break;
			case Descriptor::Type::structured_buffer: {
				Buffer_Descriptor* cd =
					(Buffer_Descriptor*)src.payload;
				Buffer_Vk& b = *(Buffer_Vk*)&dev->get_buffer(cd->buffer);
				assert(cd->buffer);
				buf.buffer = b.buf;
				buf.offset = cd->offset;
				buf.range = cd->size? cd->size: b.size;
				wd.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
				wd.pBufferInfo = &buf;
			} break;
		}
		vkUpdateDescriptorSets(dev->dev, 1, &wd, 0, 0);
	}
}

void Descriptor_Set_Vk::destroy(Device_Vk* dev) {
	vkDestroyDescriptorPool(dev->dev, dp, &dev->ac);
}

VkFormat Vertex_Format_Vk::format_from_svar_type(
	SVariable_Type type
) {
	switch (type) {
		case svariable_type_float:
			return VK_FORMAT_R32_SFLOAT;
		case svariable_type_vec2:
			return VK_FORMAT_R32G32_SFLOAT;
		case svariable_type_vec3:
			return VK_FORMAT_R32G32B32_SFLOAT;
		case svariable_type_vec4:
			return VK_FORMAT_R32G32B32A32_SFLOAT;
		default: assert(0); /* todo */
	}
	return (VkFormat)0;
}

void Vertex_Format_Vk::init(
	Device_Vk* dev,
	const Vertex_Format_Desc& desc
) {
	int i;
	binding_count = desc.binding_count;
	attr_count = desc.attribute_count;
	bindings = (VkVertexInputBindingDescription*)heap_alloc(
		dev->heap,
		binding_count * sizeof *bindings
	);
	attrs = (VkVertexInputAttributeDescription*)heap_alloc(
		dev->heap,
		attr_count * sizeof *attrs
	);
	zero(bindings, binding_count * sizeof *bindings);
	zero(attrs, attr_count * sizeof *attrs);
	for (i = 0; i < binding_count; i++) {
		auto& dst = bindings[i];
		const auto& src = desc.bindings[i];
		dst.binding = src.binding;
		dst.stride = src.stride;
		dst.inputRate =
			src.rate == sbinding_rate_instance?
			VK_VERTEX_INPUT_RATE_INSTANCE:
			VK_VERTEX_INPUT_RATE_VERTEX;
	}
	for (i = 0; i < attr_count; i++) {
		auto& dst = attrs[i];
		auto& src = desc.attributes[i];
		dst.binding = src.binding;
		dst.location = src.index;
		dst.format = format_from_svar_type(src.type);
		dst.offset = src.offset;
	}
}

void Vertex_Format_Vk::destroy(Device_Vk* dev) {
	heap_free(dev->heap, attrs);
	heap_free(dev->heap, bindings);
}

void Vertex_Format_Vk::clone(Arena* arena) {
	int bc = binding_count * sizeof *bindings;
	int ac = attr_count * sizeof *attrs;
	auto nb = (VkVertexInputBindingDescription*)arena_alloc(
		arena,
		bc
	);
	auto na = (VkVertexInputAttributeDescription*)arena_alloc(
		arena,
		ac
	);
	memcpy(nb, bindings, bc);
	memcpy(na, attrs, ac);
	bindings = nb;
	attrs = na;
}

void Vertex_Format_Vk::optimise(const Vertex_Format_Vk* shadervf) {
	int i;
	if (shadervf->attr_count >= attr_count) return;
	for (i = attr_count - 1; i >= 0; i--) {
		auto& a = attrs[i];
		int j, idx = -1;
		for (j = 0; j < shadervf->attr_count; j++) {
			auto& b = shadervf->attrs[j];
			if (b.binding == a.binding && b.location == a.location) {
				idx = j;
				break;
			}
		}
		if (idx == -1) {
			int last = attr_count - 1;
			attrs[i] = attrs[last];
			attr_count = last;
		}
	}
}

VkShaderModule Shader_Vk::make_module(
	Device_Vk* dev,
	char* buf,
	int size
) {
	VkResult r;
	VkShaderModule m;
	VkShaderModuleCreateInfo mi{};
	mi.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
	mi.codeSize = size;
	mi.pCode = (uint32_t*)buf;
	r = vkCreateShaderModule(dev->dev, &mi, &dev->ac, &m);
	if (r == VK_SUCCESS)
		return m;
	return 0;
}

int Shader_Vk::Vertex_Format::find_binding(const char* name) {
	int i;
	int bucket = (int)(hash_string(name) % binding_count);
	for (i = 0; i < binding_count; i++) {
		Binding& binding = bindings[bucket];
		if (
			!binding.name[0] ||
			!strcmp(binding.name, name)
		) return bucket;
		bucket = (bucket + 1) % binding_count;
	}
	return -1;
}

int Shader_Vk::Vertex_Format::find_attribute(const char* name) {
	int i;
	int bucket = (int)(hash_string(name) % attr_count);
	for (i = 0; i < attr_count; i++) {
		Attribute& attr = attributes[bucket];
		if (
			!attr.name[0] ||
			!strcmp(attr.name, name)
		) return bucket;
		bucket = (bucket + 1) % attr_count;
	}
	return -1;
}

bool Shader_Vk::Vertex_Format::init(
	Device_Vk* dev,
	Pack_File* f
) {
	int i, attr_index = 0;
	int start = pack_tell(f);
	attr_count = 0;
	for (i = 0; i < binding_count; i++) {
		char name[24];
		int count, j;
		SBinding_Rate rate;
		pack_read(f, name, sizeof name);
		pack_read(f, &rate, 4);
		pack_read(f, &count, 4);
		for (j = 0; j < count; j++) {
			char aname[28];
			SVariable_Type type;
			pack_read(f, aname, sizeof aname);
			pack_read(f, &type, 4);
			attr_count++;
		}
	}
	pack_seek(f, start, seek_rel_start);
	bindings = (Binding*)heap_alloc(
		dev->heap,
		binding_count * sizeof *bindings
	);
	attributes = (Attribute*)heap_alloc(
		dev->heap,
		attr_count * sizeof *attributes
	);
	for (i = 0; i < binding_count; i++)
		bindings[i].name[0] = 0;
	for (i = 0; i < attr_count; i++)
		attributes[i].name[0] = 0;
	for (i = 0; i < binding_count; i++) {
		Binding* binding;
		char name[24];
		int count, j;
		SBinding_Rate rate;
		pack_read(f, name, sizeof name);
		pack_read(f, &rate, 4);
		pack_read(f, &count, 4);
		binding = &bindings[find_binding(name)];
		strcpy(binding->name, name);
		binding->rate = rate;
		binding->attr_count = count;
		binding->attributes = (int*)heap_alloc(
			dev->heap,
			count * sizeof *binding->attributes
		);
		binding->index = i;
		for (j = 0; j < count; j++, attr_index++) {
			int bucket;
			Attribute* attr;
			char aname[28];
			SVariable_Type type;
			pack_read(f, aname, sizeof aname);
			pack_read(f, &type, 4);
			bucket = find_attribute(aname);
			binding->attributes[j] = bucket;
			attr = &attributes[bucket];
			strcpy(attr->name, aname);
			attr->index = j;
			attr->type = type;
		}
	}
	return true;
}

void Shader_Vk::Vertex_Format::destroy(Device_Vk* dev) {
	int i;
	for (i = 0; i < binding_count; i++)
		heap_free(dev->heap, bindings[i].attributes);
	heap_free(dev->heap, bindings);
	heap_free(dev->heap, attributes);
}

void Shader_Vk::destroy(Device_Vk* dev) {
	int i;
	for (i = 0; i < shader_type_count; i++)
		if (modules[i]) {
			int j, e = module_count[i];
			for (j = 0; j < e; j++)
				vkDestroyShaderModule(dev->dev, modules[i][j].mod, &dev->ac);
		}
	vfd.destroy(dev);
	heap_free(dev->heap, descs);
	dev->destroy_vertex_format(vf);
	dev->shaders.remove(id);
}

int Shader::binding_index(const char* name) {
	int idx;
	Shader_Vk* sh = (Shader_Vk*)this;
	idx = sh->vfd.find_binding(name);
	if (idx < 0 || !sh->vfd.bindings[idx].name[0]) return -1;
	return sh->vfd.bindings[idx].index;
}

int Shader::attribute_index(const char* name) {
	int idx;
	Shader_Vk* sh = (Shader_Vk*)this;
	idx = sh->vfd.find_attribute(name);
	if (idx < 0 || !sh->vfd.attributes[idx].name[0]) return -1;
	return sh->vfd.attributes[idx].index;
}

int Shader::descriptor_binding(const char* name) {
	int idx;
	Shader_Vk* sh = (Shader_Vk*)this;
	idx = sh->find_descriptor(name);
	if (idx < 0 || !sh->descs[idx].name[0]) return -1;
	return sh->descs[idx].slot;
}

int Shader::opt_mask(Shader_Type type, const char* name) {
	Shader_Vk* sh = (Shader_Vk*)this;
	return sh->find_opt(type, name);
}

int Shader::descriptor_stage(int slot) {
	Shader_Vk* sh = (Shader_Vk*)this;
	int i;
	for (i = 0; i < sh->desc_count; i++) {
		if (sh->descs[i].slot == slot) {
			return sh->descs[i].stage;
		}
	}
	return 0;
}

void Buffer_Vk::init(
	Device_Vk* dev,
	int flags,
	VkDeviceSize s
) {
	VkBufferCreateInfo bi{};
	VkMemoryRequirements req;
	VkResult r;
	size = s;
	bi.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
	bi.size = size;
	bi.usage = get_usage(flags);
	bi.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
	r = vkCreateBuffer(dev->dev, &bi, &dev->ac, &buf);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a buffer.\n");
		pbreak(r);
	}
	vkGetBufferMemoryRequirements(dev->dev, buf, &req);
	{
		VkMemoryPropertyFlags props = get_memory_flags(flags);
		int mt = dev->find_memory_type(req.memoryTypeBits, props);
		memory = dev->vrama.alloc(mt, req.size, req.alignment);
		if (!memory.valid()) {
			print_err("Failed to allocate memory for buffer.\n");
			pbreak(900);
		}
	}
	vkBindBufferMemory(dev->dev, buf, memory.mem, memory.offset());
}

void Buffer_Vk::destroy(Device_Vk* dev) {
	vkDestroyBuffer(dev->dev, buf, &dev->ac);
	dev->vrama.free(memory);
	dev->buffers.remove(id);
}

void Buffer_Vk::set_name(Device_Vk* dev, const char* name) {
#ifdef DEBUG
	VkDebugUtilsObjectNameInfoEXT i{};
	i.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
	i.pObjectName = name;
	i.objectType = VK_OBJECT_TYPE_BUFFER;
	i.objectHandle = (uint64_t)buf;
	vkSetDebugUtilsObjectNameEXT(dev->dev, &i);
#else
	(void)dev;
	(void)name;
#endif
}

Buffer_Id Device::create_buffer(
	const char* name,
	size_t size,
	int flags
) {
	Device_Vk* dev = (Device_Vk*)this;
	Buffer_Id id = dev->alloc_buffer();
	Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id);
	buf.init(dev, flags, (VkDeviceSize)size);
	buf.set_name(dev, name);
	return id;
}

void Device::destroy_buffer(Buffer_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	Buffer_Vk* buf = (Buffer_Vk*)&get_buffer(id);
	dev->queue_destroy(buf);
}

void Device::destroy_bufferi(Buffer_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	Buffer_Vk* buf = (Buffer_Vk*)&get_buffer(id);
	buf->destroy(dev);
}

void* Device::map_buffer(
	Buffer_Id id,
	size_t offset,
	size_t size
) {
	Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id);
	(void)size;
	return buf.memory.map(offset);
}

void Device::unmap_buffer(Buffer_Id id) {
	(void)id;
/*	Device_Vk* dev = (Device_Vk*)this;
	Buffer_Vk& buf = *(Buffer_Vk*)&get_buffer(id);
	vkUnmapMemory(dev->dev, buf.memory.mem);*/
}

Buffer& Device::get_buffer(Buffer_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	assert(id.index);
	assert(dev->buffers.has(id));
	return dev->buffers[id];
}

Texture_Id Device::create_texture(
	const char* name,
	Texture_Format fmt,
	int flags,
	int w,
	int h,
	int d,
	int mip_count,
	int array_size,
	Buffer_Id init,
	int samples
) {
	VkImageCreateInfo ii{};
	VkResult r;
	Device_Vk* dev = (Device_Vk*)this;
	Texture_Id id = dev->alloc_texture();
	Texture_Vk& tex = *(Texture_Vk*)&dev->get_texture(id);
	VkImage image;
	VkImageView view;
	Vram_Allocator::Allocation mem;
	VkMemoryRequirements req;
	VkImageAspectFlags aspect = get_image_aspect(fmt, flags);
	VkImageViewCreateInfo vi{};
	VkImageCreateFlags image_flags = 0;
	if (flags & Texture_Flags::cubemap) {
		array_size *= 6;
		image_flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
	}
	if (mip_count == 0)
		mip_count = (int)(std::floor(std::log2(std::max(w, h)))) + 1;
	ii.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
	ii.imageType = d == 1? VK_IMAGE_TYPE_2D: VK_IMAGE_TYPE_3D;
	ii.extent.width = w;
	ii.extent.height = h;
	ii.extent.depth = d;
	ii.mipLevels = mip_count;
	ii.arrayLayers = array_size;
	ii.format = get_vk_format(fmt);
	ii.tiling = VK_IMAGE_TILING_OPTIMAL;
	ii.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
	ii.usage = get_texture_usage(flags);
	ii.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
	ii.samples = dev->get_samples(samples);
	ii.flags = image_flags;
	r = vkCreateImage(dev->dev, &ii, &dev->ac, &image);
	if (r != VK_SUCCESS) {
		print_err("Failed to create an image.\n");
	}
	vkGetImageMemoryRequirements(dev->dev, image, &req);
	{
		VkMemoryPropertyFlags props =
			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
		int mt = dev->find_memory_type(req.memoryTypeBits, props);
		if (mt < 0) {
			print("Failed to find a satisfying memory type index.\n");
			pbreak(mt);
		}
		mem = dev->vrama.alloc(mt, req.size, req.alignment);
		if (!mem.valid()) {
			print_err("Failed to allocate memory for texture.\n");
			pbreak(900);
		}
	}
	vkBindImageMemory(
		dev->dev,
		image,
		mem.mem,
		mem.offset()
	);
	vi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
	vi.image = image;
	vi.viewType = get_view_type(array_size, d, flags);
	vi.format = ii.format;
	vi.subresourceRange.aspectMask = aspect;
	vi.subresourceRange.baseMipLevel = 0;
	vi.subresourceRange.levelCount = mip_count;
	vi.subresourceRange.baseArrayLayer = 0;
	vi.subresourceRange.layerCount = array_size;
	r = vkCreateImageView(dev->dev, &vi, &dev->ac, &view);
	if (r != VK_SUCCESS) {
		print_err("Failed to make image view.\n");
		pbreak((int)r);
	}
	Texture_Vk::init(
		&tex,
		id,
		0,
		image,
		view,
		mem,
		Resource_State::undefined,
		fmt,
		flags,
		w,
		h,
		d,
		mip_count,
		array_size,
		0,
		0,
		false,
		samples
	);
	if (init) {
		Context& ctx = dev->acquire();
		ctx.copy(id, init);
		dev->submit(ctx);
	}
	tex.set_name(dev, name);
	return id;
}

Texture_Id Device::alias_texture(
	Texture_Id o,
	const char* name,
	Texture_Format fmt,
	int flags,
	int w,
	int h,
	int d,
	int mip_count,
	int array_size,
	int start_mip,
	int start_array
) {
	Device_Vk* dev = (Device_Vk*)this;
	Texture_Vk& texture = *(Texture_Vk*)&dev->get_texture(o);
	Texture_Id ntid = dev->alloc_texture();
	Texture_Vk& nt = *(Texture_Vk*)&dev->get_texture(ntid);
	VkImageViewCreateInfo vi{};
	VkImageView view;
	VkResult r;
	assert(!texture.alias);
	vi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
	vi.image = texture.image;
	vi.viewType = get_view_type(array_size, d, flags);
	vi.format = get_vk_format(fmt);
	vi.subresourceRange.aspectMask = get_image_aspect(fmt, flags);
	vi.subresourceRange.baseMipLevel = start_mip;
	vi.subresourceRange.levelCount = mip_count;
	vi.subresourceRange.baseArrayLayer = start_array;
	vi.subresourceRange.layerCount = array_size;
	r = vkCreateImageView(dev->dev, &vi, &dev->ac, &view);
	if (r != VK_SUCCESS) {
		print_err("Failed to alias texture.\n");
		pbreak(r);
	}
	Texture_Vk::init(
		&nt,
		ntid,
		o,
		texture.image,
		view,
		Vram_Allocator::Allocation::null(),
		texture.state,
		fmt,
		flags,
		w,
		h,
		d,
		mip_count,
		array_size,
		start_mip,
		start_array,
		true,
		texture.samples
	);
	nt.set_name(dev, name);
	return ntid;
}

void Texture_Vk::set_name(Device_Vk* dev, const char* name) {
#ifdef DEBUG
	VkDebugUtilsObjectNameInfoEXT i{};
	i.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
	i.pObjectName = name;
	i.objectType = VK_OBJECT_TYPE_IMAGE_VIEW;
	i.objectHandle = (uint64_t)view;
	vkSetDebugUtilsObjectNameEXT(dev->dev, &i);
	if (!alias) {
		i.objectType = VK_OBJECT_TYPE_IMAGE;
		i.objectHandle = (uint64_t)image;
		vkSetDebugUtilsObjectNameEXT(dev->dev, &i);
	}
#else
	(void)dev;
	(void)name;
#endif
}

Shader& Device::get_shader(Shader_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	assert(id.index);
	assert(dev->shaders.has(id));
	return dev->shaders[id];
}

Sampler_Id Device::create_sampler(
	const char* name,
	const Sampler_State& state
) {
	Device_Vk* dev = (Device_Vk*)this;
	Sampler_Id id = dev->alloc_sampler();
	Sampler_Vk& s = dev->samplers[id];
	s.init(dev, state);
	s.set_name(dev, name);
	return id;
}

void Device::destroy_sampler(Sampler_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	Sampler_Vk& s = dev->samplers[id];
	dev->queue_destroy(&s);
}

void Device::destroy_sampleri(Sampler_Id id) {
	Device_Vk* dev = (Device_Vk*)this;
	Sampler_Vk& s = dev->samplers[id];
	s.destroy(dev);
}

int Device::swap_w() {
	Device_Vk* dev = (Device_Vk*)this;
	return (int)dev->swapchain.size.width;
}

int Device::swap_h() {
	Device_Vk* dev = (Device_Vk*)this;
	return (int)dev->swapchain.size.height;
}

void Shader_Loader::init(Device_Vk* d) {
	dev = d;
}

Asset* Shader_Loader::load(
	Arena* a,
	Arena* s,
	const char* filename,
	Pack_File* f
) {
	Shader_Vk* shader;
	Shader_Id id;
	(void)s;
	(void)filename;
	id = dev->alloc_shader();
	shader = (Shader_Vk*)&dev->get_shader(id);
	if (!shader->init(dev, a, f)) {
		dev->shaders.remove(id);
		return 0;
	}
	return shader;
}

void Shader_Loader::unload(Asset* a) {
	Shader_Vk* sh = (Shader_Vk*)a;
	dev->queue_destroy(sh);
}

int Shader_Vk::find_descriptor(const char* name) {
	int i;
	int bucket = (int)(hash_string(name) % desc_count);
	for (i = 0; i < desc_count; i++) {
		Desc& desc = descs[bucket];
		if (
			!desc.name[0] ||
			!strcmp(desc.name, name)
		) return bucket;
		bucket = (bucket + 1) % desc_count;
	}
	return -1;
}

int Shader_Vk::find_module(
	Shader_Type type,
	int mask
) {
	int i;
	int count = module_count[type];
	Shader_Module* arr = modules[type];
	int bucket = (int)(
		fnv1a64((uint8_t*)&mask, sizeof mask) %
		count
	);
	for (i = 0; i < count; i++) {
		Shader_Module& mod = arr[bucket];
		if (mod.mask == mask)
			return bucket;
		bucket = (bucket + 1) % count;
	}
	return -1;
}

int Shader_Vk::find_opt(
	Shader_Type type,
	const char* name
) {
	int count = opt_count, i;
	int bucket = (int)(
		hash_string(name) %
		count
	);
	int stage = 1 << type;
	for (i = 0; i < count; i++) {
		Option& o = options[bucket];
		if (string_equal(name, o.name) && (o.stage & stage))
			return o.mask;
		bucket = (bucket + 1) % count;
	}
	return 0;
}

bool Shader_Vk::init(Device_Vk* dev, Arena* a, Pack_File* f) {
	char magic[4];
	int binding_count, target_count, i;
	pack_read(f, magic, 4);
	if (
		magic[0] != 'C' ||
		magic[1] != 'S' ||
		magic[2] != 'H' ||
		magic[3] != '2'
	) return false;
	pack_read(f, &type, 4);
	pack_read(f, &binding_count, 4);
	pack_read(f, &target_count, 4);
	pack_read(f, &desc_count, 4);
	pack_read(f, &opt_count, 4);
	assert(binding_count);
	vfd.binding_count = binding_count;
	if (!vfd.init(dev, f))
		return false;
	{
		Vertex_Format_Desc desc{};
		desc.binding_count = vfd.binding_count;
		desc.attribute_count = vfd.attr_count;
		desc.bindings = (Vertex_Format_Desc::Binding*)heap_alloc(
			dev->heap,
			sizeof *desc.bindings
		);
		desc.attributes = (Vertex_Format_Desc::Attribute*)heap_alloc(
			dev->heap,
			sizeof *desc.attributes * desc.attribute_count
		);
		for (i = 0; i < vfd.binding_count; i++) {
			int j, stride = 0;
			auto& src = vfd.bindings[i];
			auto& dst = desc.bindings[src.index];
			for (j = 0; j < src.attr_count; j++) {
				auto& src_attr = vfd.attributes[src.attributes[j]];
				auto& dst_attr = desc.attributes[src.attributes[j]];
				dst_attr.binding = src.index;
				dst_attr.index = j;
				dst_attr.type = src_attr.type;
				dst_attr.offset = stride;
				stride += svariable_type_size(src_attr.type);
			}
			dst.binding = src.index;
			dst.stride = stride;
			dst.rate = src.rate;
		}
		vf = dev->create_vertex_format(desc);
		heap_free(dev->heap, desc.attributes);
		heap_free(dev->heap, desc.bindings);
	}
	pack_seek(
		f,
		32 * target_count,
		seek_rel_cur
	);
	descs = (Desc*)heap_alloc(
		dev->heap,
		desc_count * sizeof *descs
	);
	pack_read(f, descs, desc_count * sizeof *descs);
	options = (Option*)arena_alloc(
		a,
		opt_count * sizeof *options
	);
	pack_read(f, options, opt_count * sizeof *options);
	for (i = 0; i < shader_type_count; i++) {
		int c;
		pack_read(f, &c, 4);
		module_count[i] = c;
		if (c) {
			int o, s, mask;
			int bucket, j;
			Shader_Module* m = (Shader_Module*)arena_alloc(
				a,
				c * sizeof *m
			);
			for (j = 0; j < c; j++) {
				m[j].mask = -1;
			}
			for (j = 0; j < c; j++) {
				int k;
				pack_read(f, &o,    4); /* H_Variant */
				pack_read(f, &s,    4);
				pack_read(f, &mask, 4);
				pack_seek(f, 4, seek_rel_cur);
				bucket = (int)(
					fnv1a64((uint8_t*)&m, sizeof m) %
					c
				);
				for (k = 0; k < c; k++) {
					Shader_Module& mod = m[bucket];
					if (mod.mask == -1)
						goto found;
					bucket = (bucket + 1) % c;
				}
				assert(0);
				{
					found:
					char* buf = (char*)heap_alloc(dev->heap, s);
					VkShaderModule r;
					int before = pack_tell(f);
					pack_seek(f, o, seek_rel_start);
					pack_read(f, buf, s);
					r = make_module(dev, buf, s);
					heap_free(dev->heap, buf);
					pack_seek(f, before, seek_rel_start);
					if (!r) return false;
					m[bucket] = Shader_Module { mask, r };
				}
			}
			modules[i] = m;
		} else {
			modules[i] = 0;
		}
	}
	return true;
}

void Texture_Loader::init(Device_Vk* d) {
	dev = d;
}

size_t Texture_Loader::calc_size(
	Texture_Format fmt,
	int w,
	int h
) {
	switch (fmt) {
		case texture_format_bc1:
		case texture_format_bc4:
			return (w / 4) * (h / 4) * 8;
		case texture_format_bc5:
			return (w / 4) * (h / 4) * 16;
		case texture_format_r8i:
			return w * h;
		case texture_format_rgb16f:
			return w * h * 6;
		case texture_format_rgb32f:
			return w * h * 12;
		case texture_format_rgba16f:
			return w * h * 8;
		case texture_format_rgba32f:
			return w * h * 16;
		default:
			print_err("Can't load this texture format.\n"); 
			pbreak(45498);
			return 0;
	}
}

Asset* Texture_Loader::load(
	Arena* a,
	Arena* s,
	const char* filename,
	Pack_File* f
) {
	char magic[4];
	int w, h;
	size_t size;
	Texture_Format fmt = texture_format_r8i;
	int mips = 0;
	(void)a;
	(void)s;
	pack_read(f, magic, 4);
	pack_read(f, &w, 4);
	pack_read(f, &h, 4);
	pack_read(f, &fmt, 2);
	pack_read(f, &mips, 2);
	size = calc_size(fmt, w, h);
	{
		Buffer_Id buf = dev->create_buffer(
			"texture stage",
			size,
			Buffer_Flags::copy_src |
			Buffer_Flags::cpu_readwrite
		);
		Texture_Id tex = dev->create_texture(
			filename,
			fmt,
			Texture_Flags::sampleable | Texture_Flags::copy_dst,
			w,
			h,
			1,
			mips,
			1,
			0
		);
		{
			int i;
			for (i = 0; i < mips; i++) {
				size = calc_size(fmt, w, h);
				void* mem = dev->map_buffer(buf, 0, size);
				pack_read(f, mem, size);
				dev->unmap_buffer(buf);
				auto& ctx = dev->acquire();
				ctx.copy(tex, buf, i, 0, 0, w, h);
				dev->submit(ctx);
				w >>= 1;
				h >>= 1;
			}
		}
		dev->destroy_bufferi(buf);
		return &dev->get_texture(tex);
	}
}

void Texture_Loader::unload(Asset* a) {
	Texture_Vk* tex = (Texture_Vk*)a;
	dev->destroy_texture(tex->id);
}

void Texture_Vk::init(
	Texture_Vk* t,
	Texture_Id id,
	Texture_Id parent,
	VkImage img,
	VkImageView v,
	Vram_Allocator::Allocation mem,
	Resource_State st,
	Texture_Format fmt,
	int flags,
	int w,
	int h,
	int d,
	int mip_count,
	int array_size,
	int start_mip,
	int start_array,
	bool alias,
	int samples
) {
	t->id = id;
	t->parent = parent;
	t->image = img;
	t->view = v;
	t->memory = mem;
	t->state = st;
	t->w = w;
	t->h = h;
	t->d = d;
	t->mip_count = mip_count;
	t->array_size = array_size;
	t->fmt = fmt;
	t->flags = flags;
	t->mip_count = mip_count;
	t->array_size = array_size;
	t->start_mip = start_mip;
	t->start_array = start_array;
	t->alias = alias;
	t->samples = samples;
}

void Texture_Vk::destroy(Device_Vk* dev) {
	if (!alias) {
		vkDestroyImage(dev->dev, image, &dev->ac);
		dev->vrama.free(memory);
	}
	vkDestroyImageView(dev->dev, view, &dev->ac);
	dev->textures.remove(id);
}

VkFilter Sampler_Vk::get_filter(Filter_Mode mode) {
	switch (mode) {
		case Filter_Mode::point:  return VK_FILTER_NEAREST;
		case Filter_Mode::linear: return VK_FILTER_LINEAR;
	}
	assert(0);
	return (VkFilter)0;
}

VkSamplerMipmapMode Sampler_Vk::get_mipmap_mode(
	Filter_Mode mode
) {
	switch (mode) {
		case Filter_Mode::point:
			return VK_SAMPLER_MIPMAP_MODE_NEAREST;
		case Filter_Mode::linear:
			return VK_SAMPLER_MIPMAP_MODE_LINEAR;
	}
	assert(0);
	return (VkSamplerMipmapMode)0;
}

VkSamplerAddressMode Sampler_Vk::get_mode(
	Address_Mode mode
) {
	switch (mode) {
		case Address_Mode::repeat:
			return VK_SAMPLER_ADDRESS_MODE_REPEAT;
		case Address_Mode::mirror:
			return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT;
		case Address_Mode::clamp:
			return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
		case Address_Mode::border:
			return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
	}
	assert(0);
	return (VkSamplerAddressMode)0;
}

void Sampler_Vk::init(Device_Vk* dev, const Sampler_State& s) {
	VkSamplerCreateInfo si{};
	VkSamplerCustomBorderColorCreateInfoEXT bi{};
	VkClearColorValue col{};
	VkResult r;
	col.float32[0] = s.border[0];
	col.float32[1] = s.border[1];
	col.float32[2] = s.border[2];
	col.float32[3] = s.border[3];
	si.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
	si.magFilter = get_filter(s.mag);
	si.minFilter = get_filter(s.min);
	si.mipmapMode = get_mipmap_mode(s.mip);
	si.addressModeU = get_mode(s.address_u);
	si.addressModeV = get_mode(s.address_v);
	si.addressModeW = get_mode(s.address_w);
	si.borderColor = VK_BORDER_COLOR_FLOAT_CUSTOM_EXT;
	si.maxLod = VK_LOD_CLAMP_NONE;
	si.pNext = &bi;
	if ((int)s.compare) {
		si.compareEnable = VK_TRUE;
		si.compareOp = Pipeline_Vk::get_compare_op(s.compare);
	}
	bi.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT;
	bi.customBorderColor = col;
	bi.format = VK_FORMAT_R32G32B32A32_SFLOAT;
	r = vkCreateSampler(dev->dev, &si, &dev->ac, &sampler);
	if (r != VK_SUCCESS) {
		print_err("Failed to create a sampler.\n");
		pbreak(r);
	}
}

void Sampler_Vk::destroy(Device_Vk* dev) {
	vkDestroySampler(dev->dev, sampler, &dev->ac);
	dev->samplers.remove(id);
}

void Sampler_Vk::set_name(Device_Vk* dev, const char* name) {
#ifdef DEBUG
	VkDebugUtilsObjectNameInfoEXT i{};
	i.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
	i.pObjectName = name;
	i.objectType = VK_OBJECT_TYPE_SAMPLER;
	i.objectHandle = (uint64_t)sampler;
	vkSetDebugUtilsObjectNameEXT(dev->dev, &i);
#else
	(void)dev;
	(void)name;
#endif
}

void Vram_Allocator::Page::init(
	Device_Vk* dev,
	VkDeviceSize s,
	int t
) {
	VkMemoryAllocateInfo ai{};
	Chunk* chunk;
	VkResult r;
	const auto& props = dev->mem_props.memoryTypes[t];
	ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
	ai.allocationSize = s;
	ai.memoryTypeIndex = t;
	size = s;
	type = t;
	next = 0;
	r = vkAllocateMemory(dev->dev, &ai, &dev->ac, &memory);
	if (r == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
		print_err("Out of VRAM.\n");
		pbreak(r);
	}
	if (r != VK_SUCCESS) {
		print_err("vkAllocateMemory failed.\n");
		pbreak(r);
	}
	if (
		props.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ||
		props.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
	) {
		vkMapMemory(
			dev->dev,
			memory,
			0,
			size,
			0,
			&mapping
		);
	} else
		mapping = 0;
	chunk = (Chunk*)heap_alloc(dev->heap, sizeof *chunk);
	chunk->offset = 0;
	chunk->pad = 0;
	chunk->size = s;
	chunk->next = 0;
	chunk->free = true;
	chunks = chunk;
#ifdef DEBUG
	if (dev->hooks)
		dev->hooks->on_page_alloc(s);
#endif
}

Vram_Allocator::Allocation Vram_Allocator::Page::imp_alloc(
	Device_Vk* dev,
	VkDeviceSize asize
) {
	Chunk* chunk;
	for (chunk = chunks; chunk; chunk = chunk->next) {
		if (chunk->free) {
			if (chunk->size == asize) {
				chunk->free = false;
				return { memory, 0, chunk };
			} else if (chunk->size > asize) {
				Chunk* nc = (Chunk*)heap_alloc(dev->heap, sizeof *nc);
				nc->offset = chunk->offset + asize;
				nc->pad = 0;
				nc->size = chunk->size - asize;
				nc->next = chunk->next;
				nc->free = true;
				chunk->next = nc;
				chunk->size = asize;
				chunk->pad = 0;
				chunk->free = false;
				return { memory, 0, chunk };
			}
		}
	}
	return { 0, 0, 0 };
}

void Vram_Allocator::Page::defrag(Device_Vk* dev) {
	Chunk* chunk;
	for (chunk = chunks; chunk;) {
		if (chunk->free) {
			Chunk* end = chunk->next;
			VkDeviceSize csize = chunk->size;
			for (; end && end->free;) {
				Chunk* next = end->next;
				csize += end->size;
				heap_free(dev->heap, end);
				end = next;
			}
			chunk->next = end;
			chunk->size = csize;
			if (end) {
				chunk = end->next;
			} else chunk = 0;
		} else
 	 	 chunk = chunk->next;
	}
}

Vram_Allocator::Allocation Vram_Allocator::Page::alloc(
	Device_Vk* dev,
	VkDeviceSize asize,
	VkDeviceSize align
) {
	VkDeviceSize as = asize + align;
	VkDeviceSize al;
	Allocation a = imp_alloc(dev, as);
	if (!a.chunk) {
		defrag(dev);
		a = imp_alloc(dev, as);
	}
	if (!a.chunk) return a;
	al = align_address((uintptr_t)a.chunk->offset, (size_t)align);
	a.chunk->pad = al - a.chunk->offset;
#if DEBUG
	if (dev->hooks)
		dev->hooks->on_vram_alloc(as, align);
#endif
	return a;
}

void Vram_Allocator::init(Device_Vk* d) {
	pages = 0;
	dev = d;
}

void Vram_Allocator::destroy() {
	Page* page = pages;
	for (; page; page = page->next) {
		Chunk* chunk = page->chunks;
		if (page->mapping)
			vkUnmapMemory(dev->dev, page->memory);
		vkFreeMemory(dev->dev, page->memory, &dev->ac);
		for (; chunk; chunk = chunk->next)
			heap_free(dev->heap, chunk);
		heap_free(dev->heap, page);
	}
}

Vram_Allocator::Allocation Vram_Allocator::alloc(
	int type,
	VkDeviceSize size,
	VkDeviceSize align
) {
	Page* page = pages;
	for (; page; page = page->next) {
		if (page->type == type) {
			auto a = page->alloc(dev, size, align);
			if (a.chunk) {
				a.page = page;
				return a;
			}
		}
	}
	page = (Page*)heap_alloc(dev->heap, sizeof *page);
	page->init(
		dev,
		(VkDeviceSize)align_address(
			(uintptr_t)size + 1,
			(size_t)size_alignment
		),
		type
	);
	page->next = pages;
	pages = page;
	auto a = page->alloc(dev, size, align);
	if (a.chunk)
		a.page = page;
	return a;
}

void Vram_Allocator::free(Allocation& alloc) {
	alloc.chunk->free = true;
}

void Staged_Buffer::init(
	Device* dev,
	const char* name,
	int s,
	int flags
) {
	size = s;
	stage = dev->create_buffer(
		name,
		size,
		Buffer_Flags::copy_src |
		Buffer_Flags::cpu_readwrite
	);
	gpuonly = dev->create_buffer(
		name,
		size,
		Buffer_Flags::copy_dst |
		flags
	);
}

void Staged_Buffer::destroy(Device* dev) {
	dev->destroy_buffer(stage);
	dev->destroy_buffer(gpuonly);
}

void* Staged_Buffer::map(Device* dev) {
	return dev->map_buffer(stage, 0, size);
}

void Staged_Buffer::unmap(Device* dev) {
	dev->unmap_buffer(stage);
}

void Staged_Buffer::update(Context& ctx) {
	ctx.copy(gpuonly, stage);
}

void Device_Debug_Hooks::on_rpo_create(const Render_Pass& rpo) {
	(void)rpo;
}

void Device_Debug_Hooks::on_rpo_destroy(const Render_Pass& rpo) {
	(void)rpo;
}

void Device_Debug_Hooks::on_fbo_create(const Render_Pass& pass) {
	(void)pass;
}

void Device_Debug_Hooks::on_fbo_destroy(const Render_Pass& pass) {
	(void)pass;
}

void Device_Debug_Hooks::on_pso_create(const Pipeline& pso) {
	(void)pso;
}

void Device_Debug_Hooks::on_pso_destroy(const Pipeline& pso) {
	(void)pso;
}

void Device_Debug_Hooks::on_dso_create(const Pipeline& pso) {
	(void)pso;
}

void Device_Debug_Hooks::on_dso_destroy(const Pipeline& pso) {
	(void)pso;
}

void Device_Debug_Hooks::on_acquire(Context& ctx) {
	(void)ctx;
}

void Device_Debug_Hooks::on_submit(Context& ctx) {
	(void)ctx;
}

void Device_Debug_Hooks::on_present(Context& ctx) {
	(void)ctx;
}

void Device_Debug_Hooks::on_page_alloc(size_t size) {
	(void)size;
}

void Device_Debug_Hooks::on_vram_alloc(size_t size, size_t align) {
	(void)size;
	(void)align;
}

int Device_Debug_Hooks::query_psos(Pipeline* psos) {
	Device_Vk* dv = (Device_Vk*)dev;
	int count = 0;
	if (psos) {
		for (auto i : dv->pso_cache) {
			auto& psok = i.first;
			psos[count++] = psok.pso;
		}
	} else {
		for (auto i : dv->pso_cache) {
			(void)i;
			count++;
		}
	}
	return count;
}