glUniform4fv() to send a matrix to the shader. Vulkan has no equivalent — instead you create a descriptor set that binds a VkBuffer to a shader binding point. This demo builds the complete descriptor system: layout, pool, set, update, and bind. The UBO holds an MVP matrix updated each frame, rotating the orange triangle on the Z axis. One UBO per swapchain image prevents the CPU from overwriting data the GPU is still reading.In OpenGL, glUniform4fv(location, 1, ptr) sent data to the shader immediately. The driver tracked global uniform state and reconciled it at draw time. In Vulkan, you declare upfront what resources each shader binding needs (the descriptor set layout), allocate memory for binding records (the pool), fill in which VkBuffer is at each slot (update), then bind before drawing. More setup, but zero driver state tracking overhead — the same pattern works for textures, storage buffers, and images.
VkDescriptorSetLayout ← "binding 0 = uniform buffer, used in vertex stage"
↓ (describes)
VkDescriptorPool ← Memory pool: can allocate N sets with M bindings each
↓ (allocates from)
VkDescriptorSet ← Actual binding record for one draw
↓ (filled by)
vkUpdateDescriptorSets() ← "binding 0 in this set = THIS VkBuffer at offset 0"
↓ (used in)
vkCmdBindDescriptorSets()← Attached to command buffer before vkCmdDrawglslc shader.vert -o vert.spv glslc shader.frag -o frag.spv REM Copy vert.spv and frag.spv next to the .exe
#version 450
layout(binding = 0) uniform UniformBufferObject {
mat4 model;
mat4 view;
mat4 proj;
} ubo;
layout(location = 0) in vec2 inPos;
layout(location = 1) in vec3 inColor;
layout(location = 0) out vec3 fragColor;
void main() {
gl_Position = ubo.proj * ubo.view * ubo.model * vec4(inPos, 0.0, 1.0);
fragColor = inColor;
}#version 450
layout(location = 0) in vec3 fragColor;
layout(location = 0) out vec4 outColor;
void main() { outColor = vec4(fragColor, 1.0); }cmake_minimum_required(VERSION 3.20)
project(D19_VulkanUBO)
set(CMAKE_CXX_STANDARD 17)
find_package(Vulkan REQUIRED)
set(GLFW_DIR $ENV{GLFW_DIR})
set(GLM_DIR $ENV{GLM_DIR})
include_directories(${GLFW_DIR}/include ${GLM_DIR} ${Vulkan_INCLUDE_DIRS})
add_executable(D19_VulkanUBO src/main.cpp)
target_link_libraries(D19_VulkanUBO Vulkan::Vulkan ${GLFW_DIR}/lib-vc2022/glfw3.lib)// =======================================================================
// RR GRAPHICS LAB — DAY 5 · DEMO 19
// Vulkan Uniform Buffer Object (UBO) — Descriptor Sets, MVP via Descriptor
// By Raushan Ranjan (MCT) | Koenig Original AI-Courseware
//
// PROJECT NAME: D19_VulkanUBO
// FOLDER: C:\Labs\D19_VulkanUBO\
//
// ── BEFORE BUILDING ──────────────────────────────────────────────────────
// glslc shader.vert -o vert.spv
// glslc shader.frag -o frag.spv
// Copy vert.spv + frag.spv next to .exe
// ─────────────────────────────────────────────────────────────────────────
//
// WHAT THIS DEMO TEACHES:
// - Why glUniform*() doesn't exist in Vulkan — descriptors replace it
// - VkDescriptorSetLayout: declares "binding 0 = uniform buffer"
// - VkDescriptorPool: memory pool for allocating descriptor sets
// - VkDescriptorSet: the actual binding record
// - vkUpdateDescriptorSets: fills in which VkBuffer is at binding 0
// - VkBuffer (host-visible, HOST_COHERENT) as a UBO per swapchain image
// - Updating UBO data each frame: map → write struct → no unmap needed
// - vkCmdBindDescriptorSets before vkCmdDraw
// - Animated rotating triangle driven by time value in UBO
//
// WHAT YOU WILL SEE:
// - Orange triangle rotating continuously, driven by MVP matrix in UBO
// - Time printed to terminal every 60 frames
// - ESC to quit. Validation: 0 errors.
//
// BUILDS ON: Demo 18 (full pipeline + triangle)
// =======================================================================
#define GLFW_INCLUDE_VULKAN
#include <GLFW/glfw3.h>
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <iostream>
#include <vector>
#include <optional>
#include <set>
#include <algorithm>
#include <fstream>
#include <cstring>
#include <stdexcept>
const int WIN_W = 900, WIN_H = 600;
const std::vector<const char*> VAL_LAYERS = {"VK_LAYER_KHRONOS_validation"};
const std::vector<const char*> DEV_EXTS = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
// ── UBO struct — matches binding 0 layout(std140) in vertex shader ─────────
// Must be 16-byte aligned. glm matrices are already 64 bytes each.
struct UniformBufferObject {
glm::mat4 model;
glm::mat4 view;
glm::mat4 proj;
};
// ── Vertex: position + colour ───────────────────────────────────────────────
struct Vertex {
glm::vec2 pos;
glm::vec3 color;
};
const std::vector<Vertex> VERTICES = {
{{ 0.0f,-0.55f},{0.98f,0.57f,0.24f}}, // top — orange
{{ 0.5f, 0.45f},{0.98f,0.57f,0.24f}}, // BR
{{-0.5f, 0.45f},{0.98f,0.57f,0.24f}}, // BL
};
static std::vector<char> readSpv(const std::string& p){
std::ifstream f(p,std::ios::ate|std::ios::binary);
if(!f.is_open()) throw std::runtime_error("Cannot open: "+p);
size_t s=(size_t)f.tellg(); std::vector<char> b(s); f.seekg(0); f.read(b.data(),s); return b;
}
VkShaderModule makeMod(VkDevice d,const std::vector<char>& c){
VkShaderModuleCreateInfo ci{};ci.sType=VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
ci.codeSize=c.size();ci.pCode=reinterpret_cast<const uint32_t*>(c.data());
VkShaderModule m;vkCreateShaderModule(d,&ci,nullptr,&m);return m;
}
struct QFI{std::optional<uint32_t> g,p;bool ok(){return g.has_value()&&p.has_value();}};
QFI findQF(VkPhysicalDevice d,VkSurfaceKHR s){
QFI i;uint32_t n;vkGetPhysicalDeviceQueueFamilyProperties(d,&n,nullptr);
std::vector<VkQueueFamilyProperties> f(n);vkGetPhysicalDeviceQueueFamilyProperties(d,&n,f.data());
for(uint32_t x=0;x<n;x++){if(f[x].queueFlags&VK_QUEUE_GRAPHICS_BIT)i.g=x;
VkBool32 p=false;vkGetPhysicalDeviceSurfaceSupportKHR(d,x,s,&p);if(p)i.p=x;if(i.ok())break;}
return i;
}
uint32_t findMem(VkPhysicalDevice phys,uint32_t bits,VkMemoryPropertyFlags props){
VkPhysicalDeviceMemoryProperties mp;vkGetPhysicalDeviceMemoryProperties(phys,&mp);
for(uint32_t i=0;i<mp.memoryTypeCount;i++)
if((bits&(1<<i))&&(mp.memoryTypes[i].propertyFlags&props)==props)return i;
throw std::runtime_error("No memory type");
}
void makeBuffer(VkDevice dev,VkPhysicalDevice phys,VkDeviceSize sz,
VkBufferUsageFlags usage,VkMemoryPropertyFlags props,
VkBuffer& buf,VkDeviceMemory& mem){
VkBufferCreateInfo bi{};bi.sType=VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bi.size=sz;bi.usage=usage;bi.sharingMode=VK_SHARING_MODE_EXCLUSIVE;
vkCreateBuffer(dev,&bi,nullptr,&buf);
VkMemoryRequirements mr;vkGetBufferMemoryRequirements(dev,buf,&mr);
VkMemoryAllocateInfo ai{};ai.sType=VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
ai.allocationSize=mr.size;ai.memoryTypeIndex=findMem(phys,mr.memoryTypeBits,props);
vkAllocateMemory(dev,&ai,nullptr,&mem);
vkBindBufferMemory(dev,buf,mem,0);
}
int main(){
std::cout<<"\n=== Demo 19 — Vulkan UBO + Descriptor Sets ===\n\n";
glfwInit();glfwWindowHint(GLFW_CLIENT_API,GLFW_NO_API);glfwWindowHint(GLFW_RESIZABLE,GLFW_FALSE);
GLFWwindow* win=glfwCreateWindow(WIN_W,WIN_H,"Demo 19 — Vulkan UBO (ESC=quit)",nullptr,nullptr);
VkApplicationInfo ai{};ai.sType=VK_STRUCTURE_TYPE_APPLICATION_INFO;ai.apiVersion=VK_API_VERSION_1_0;
uint32_t ec=0;const char** eg=glfwGetRequiredInstanceExtensions(&ec);
std::vector<const char*> exts(eg,eg+ec);
VkInstanceCreateInfo ici{};ici.sType=VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;ici.pApplicationInfo=&ai;
ici.enabledExtensionCount=(uint32_t)exts.size();ici.ppEnabledExtensionNames=exts.data();
ici.enabledLayerCount=(uint32_t)VAL_LAYERS.size();ici.ppEnabledLayerNames=VAL_LAYERS.data();
VkInstance inst;vkCreateInstance(&ici,nullptr,&inst);
VkSurfaceKHR surface;glfwCreateWindowSurface(inst,win,nullptr,&surface);
uint32_t dc=0;vkEnumeratePhysicalDevices(inst,&dc,nullptr);
std::vector<VkPhysicalDevice> pds(dc);vkEnumeratePhysicalDevices(inst,&dc,pds.data());
VkPhysicalDevice phys=VK_NULL_HANDLE;
for(auto& d:pds){VkPhysicalDeviceProperties p{};vkGetPhysicalDeviceProperties(d,&p);
if(findQF(d,surface).ok()){phys=d;if(p.deviceType==VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)break;}}
VkPhysicalDeviceProperties gp{};vkGetPhysicalDeviceProperties(phys,&gp);
std::cout<<"[1] GPU: "<<gp.deviceName<<"\n";
QFI qfi=findQF(phys,surface);
std::set<uint32_t> uq={qfi.g.value(),qfi.p.value()};float pr=1.0f;
std::vector<VkDeviceQueueCreateInfo> qcis;
for(uint32_t f:uq){VkDeviceQueueCreateInfo q{};q.sType=VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
q.queueFamilyIndex=f;q.queueCount=1;q.pQueuePriorities=≺qcis.push_back(q);}
VkPhysicalDeviceFeatures feat{};
VkDeviceCreateInfo dci{};dci.sType=VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
dci.queueCreateInfoCount=(uint32_t)qcis.size();dci.pQueueCreateInfos=qcis.data();
dci.pEnabledFeatures=&feat;dci.enabledExtensionCount=(uint32_t)DEV_EXTS.size();
dci.ppEnabledExtensionNames=DEV_EXTS.data();
dci.enabledLayerCount=(uint32_t)VAL_LAYERS.size();dci.ppEnabledLayerNames=VAL_LAYERS.data();
VkDevice dev;vkCreateDevice(phys,&dci,nullptr,&dev);
VkQueue gfxQ,presQ;
vkGetDeviceQueue(dev,qfi.g.value(),0,&gfxQ);vkGetDeviceQueue(dev,qfi.p.value(),0,&presQ);
VkSurfaceCapabilitiesKHR caps{};vkGetPhysicalDeviceSurfaceCapabilitiesKHR(phys,surface,&caps);
uint32_t fN=0;vkGetPhysicalDeviceSurfaceFormatsKHR(phys,surface,&fN,nullptr);
std::vector<VkSurfaceFormatKHR> fmts(fN);vkGetPhysicalDeviceSurfaceFormatsKHR(phys,surface,&fN,fmts.data());
uint32_t pmN=0;vkGetPhysicalDeviceSurfacePresentModesKHR(phys,surface,&pmN,nullptr);
std::vector<VkPresentModeKHR> pms(pmN);vkGetPhysicalDeviceSurfacePresentModesKHR(phys,surface,&pmN,pms.data());
VkSurfaceFormatKHR sf=fmts[0];
for(auto& x:fmts)if(x.format==VK_FORMAT_B8G8R8A8_SRGB&&x.colorSpace==VK_COLOR_SPACE_SRGB_NONLINEAR_KHR){sf=x;break;}
VkPresentModeKHR pm=VK_PRESENT_MODE_FIFO_KHR;
for(auto x:pms)if(x==VK_PRESENT_MODE_MAILBOX_KHR){pm=x;break;}
VkExtent2D ext=caps.currentExtent;
uint32_t imgN=std::min(caps.minImageCount+1,caps.maxImageCount>0?caps.maxImageCount:UINT32_MAX);
VkSwapchainCreateInfoKHR sci{};sci.sType=VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
sci.surface=surface;sci.minImageCount=imgN;sci.imageFormat=sf.format;sci.imageColorSpace=sf.colorSpace;
sci.imageExtent=ext;sci.imageArrayLayers=1;sci.imageUsage=VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
uint32_t qfa[]={qfi.g.value(),qfi.p.value()};
if(qfi.g!=qfi.p){sci.imageSharingMode=VK_SHARING_MODE_CONCURRENT;sci.queueFamilyIndexCount=2;sci.pQueueFamilyIndices=qfa;}
else sci.imageSharingMode=VK_SHARING_MODE_EXCLUSIVE;
sci.preTransform=caps.currentTransform;sci.compositeAlpha=VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
sci.presentMode=pm;sci.clipped=VK_TRUE;
VkSwapchainKHR sc;vkCreateSwapchainKHR(dev,&sci,nullptr,&sc);
uint32_t sic=0;vkGetSwapchainImagesKHR(dev,sc,&sic,nullptr);
std::vector<VkImage> sis(sic);vkGetSwapchainImagesKHR(dev,sc,&sic,sis.data());
std::vector<VkImageView> sivs(sic);
for(uint32_t i=0;i<sic;i++){VkImageViewCreateInfo iv{};iv.sType=VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
iv.image=sis[i];iv.viewType=VK_IMAGE_VIEW_TYPE_2D;iv.format=sf.format;
iv.components={VK_COMPONENT_SWIZZLE_IDENTITY,VK_COMPONENT_SWIZZLE_IDENTITY,VK_COMPONENT_SWIZZLE_IDENTITY,VK_COMPONENT_SWIZZLE_IDENTITY};
iv.subresourceRange={VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,1};vkCreateImageView(dev,&iv,nullptr,&sivs[i]);}
VkAttachmentDescription att{};att.format=sf.format;att.samples=VK_SAMPLE_COUNT_1_BIT;
att.loadOp=VK_ATTACHMENT_LOAD_OP_CLEAR;att.storeOp=VK_ATTACHMENT_STORE_OP_STORE;
att.stencilLoadOp=VK_ATTACHMENT_LOAD_OP_DONT_CARE;att.stencilStoreOp=VK_ATTACHMENT_STORE_OP_DONT_CARE;
att.initialLayout=VK_IMAGE_LAYOUT_UNDEFINED;att.finalLayout=VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
VkAttachmentReference ref{0,VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
VkSubpassDescription sub{};sub.pipelineBindPoint=VK_PIPELINE_BIND_POINT_GRAPHICS;sub.colorAttachmentCount=1;sub.pColorAttachments=&ref;
VkSubpassDependency dep{};dep.srcSubpass=VK_SUBPASS_EXTERNAL;dep.dstSubpass=0;
dep.srcStageMask=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;dep.srcAccessMask=0;
dep.dstStageMask=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;dep.dstAccessMask=VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo rpi{};rpi.sType=VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
rpi.attachmentCount=1;rpi.pAttachments=&att;rpi.subpassCount=1;rpi.pSubpasses=⊂rpi.dependencyCount=1;rpi.pDependencies=&dep;
VkRenderPass renderPass;vkCreateRenderPass(dev,&rpi,nullptr,&renderPass);
std::vector<VkFramebuffer> fbs(sic);
for(uint32_t i=0;i<sic;i++){VkFramebufferCreateInfo f{};f.sType=VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
f.renderPass=renderPass;f.attachmentCount=1;f.pAttachments=&sivs[i];f.width=ext.width;f.height=ext.height;f.layers=1;vkCreateFramebuffer(dev,&f,nullptr,&fbs[i]);}
std::cout<<"[2] Swapchain + render pass + framebuffers\n";
// ── DESCRIPTOR SET LAYOUT ─────────────────────────────────────────────
// Declares: binding 0 is a uniform buffer used in the vertex shader stage.
// This is the Vulkan equivalent of saying "I need a uniform mat4 at slot 0".
VkDescriptorSetLayoutBinding dslb{};
dslb.binding = 0;
dslb.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
dslb.descriptorCount = 1;
dslb.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
VkDescriptorSetLayoutCreateInfo dslci{};
dslci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
dslci.bindingCount = 1;
dslci.pBindings = &dslb;
VkDescriptorSetLayout dsLayout;
vkCreateDescriptorSetLayout(dev,&dslci,nullptr,&dsLayout);
std::cout<<"[3] Descriptor set layout: binding 0 = uniform buffer (vertex stage)\n";
// ── ONE UBO PER SWAPCHAIN IMAGE ───────────────────────────────────────
// One buffer per image so CPU can update next frame's UBO while GPU
// is still reading this frame's UBO. Prevents CPU/GPU race condition.
std::vector<VkBuffer> uboBufs(sic);
std::vector<VkDeviceMemory> uboMems(sic);
std::vector<void*> uboMaps(sic);
VkDeviceSize uboSize = sizeof(UniformBufferObject);
for(uint32_t i=0;i<sic;i++){
makeBuffer(dev,phys,uboSize,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
uboBufs[i],uboMems[i]);
// Persistent map — stays mapped the whole time.
// HOST_COHERENT means no explicit flush needed.
vkMapMemory(dev,uboMems[i],0,uboSize,0,&uboMaps[i]);
}
std::cout<<"[4] "<<sic<<" UBO buffers (one per swapchain image), persistently mapped\n";
// ── DESCRIPTOR POOL ───────────────────────────────────────────────────
VkDescriptorPoolSize dps{};
dps.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
dps.descriptorCount = sic;
VkDescriptorPoolCreateInfo dpci{};
dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
dpci.poolSizeCount = 1;
dpci.pPoolSizes = &dps;
dpci.maxSets = sic;
VkDescriptorPool descPool;
vkCreateDescriptorPool(dev,&dpci,nullptr,&descPool);
// ── DESCRIPTOR SETS ───────────────────────────────────────────────────
// Allocate one descriptor set per swapchain image.
std::vector<VkDescriptorSetLayout> layouts(sic,dsLayout);
VkDescriptorSetAllocateInfo dsai{};
dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
dsai.descriptorPool = descPool;
dsai.descriptorSetCount = sic;
dsai.pSetLayouts = layouts.data();
std::vector<VkDescriptorSet> descSets(sic);
vkAllocateDescriptorSets(dev,&dsai,descSets.data());
// Point each descriptor set at its corresponding UBO buffer
for(uint32_t i=0;i<sic;i++){
VkDescriptorBufferInfo dbi{};
dbi.buffer = uboBufs[i];
dbi.offset = 0;
dbi.range = uboSize;
VkWriteDescriptorSet wds{};
wds.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
wds.dstSet = descSets[i];
wds.dstBinding = 0;
wds.dstArrayElement = 0;
wds.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
wds.descriptorCount = 1;
wds.pBufferInfo = &dbi;
vkUpdateDescriptorSets(dev,1,&wds,0,nullptr);
}
std::cout<<"[5] Descriptor sets allocated and updated\n";
// ── VERTEX BUFFER ────────────────────────────────────────────────────
VkBuffer vertBuf;VkDeviceMemory vertMem;
VkDeviceSize vbSz=sizeof(VERTICES[0])*VERTICES.size();
makeBuffer(dev,phys,vbSz,VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
vertBuf,vertMem);
void* vd;vkMapMemory(dev,vertMem,0,vbSz,0,&vd);
memcpy(vd,VERTICES.data(),(size_t)vbSz);vkUnmapMemory(dev,vertMem);
// ── GRAPHICS PIPELINE — includes descriptor set layout ────────────────
auto vc=readSpv("vert.spv");auto fc=readSpv("frag.spv");
VkShaderModule vm=makeMod(dev,vc),fm=makeMod(dev,fc);
VkPipelineShaderStageCreateInfo stg[2]{};
stg[0].sType=VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;stg[0].stage=VK_SHADER_STAGE_VERTEX_BIT;stg[0].module=vm;stg[0].pName="main";
stg[1].sType=VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;stg[1].stage=VK_SHADER_STAGE_FRAGMENT_BIT;stg[1].module=fm;stg[1].pName="main";
VkVertexInputBindingDescription vbd{};vbd.binding=0;vbd.stride=sizeof(Vertex);vbd.inputRate=VK_VERTEX_INPUT_RATE_VERTEX;
VkVertexInputAttributeDescription vad[2]{};
vad[0]={0,0,VK_FORMAT_R32G32_SFLOAT, (uint32_t)offsetof(Vertex,pos)};
vad[1]={1,0,VK_FORMAT_R32G32B32_SFLOAT,(uint32_t)offsetof(Vertex,color)};
VkPipelineVertexInputStateCreateInfo vi{};vi.sType=VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vi.vertexBindingDescriptionCount=1;vi.pVertexBindingDescriptions=&vbd;
vi.vertexAttributeDescriptionCount=2;vi.pVertexAttributeDescriptions=vad;
VkPipelineInputAssemblyStateCreateInfo ia{};ia.sType=VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;ia.topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
VkViewport vp{0,0,(float)ext.width,(float)ext.height,0,1};VkRect2D sc2={{0,0},ext};
VkPipelineViewportStateCreateInfo vps{};vps.sType=VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;vps.viewportCount=1;vps.pViewports=&vp;vps.scissorCount=1;vps.pScissors=&sc2;
VkPipelineRasterizationStateCreateInfo rs{};rs.sType=VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;rs.polygonMode=VK_POLYGON_MODE_FILL;rs.cullMode=VK_CULL_MODE_NONE;rs.frontFace=VK_FRONT_FACE_CLOCKWISE;rs.lineWidth=1.0f;
VkPipelineMultisampleStateCreateInfo ms{};ms.sType=VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;ms.rasterizationSamples=VK_SAMPLE_COUNT_1_BIT;
VkPipelineColorBlendAttachmentState cba{};cba.colorWriteMask=VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT;cba.blendEnable=VK_FALSE;
VkPipelineColorBlendStateCreateInfo cb{};cb.sType=VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;cb.attachmentCount=1;cb.pAttachments=&cba;
// Pipeline layout includes the descriptor set layout
VkPipelineLayoutCreateInfo pli{};pli.sType=VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pli.setLayoutCount=1;pli.pSetLayouts=&dsLayout;
VkPipelineLayout pipelineLayout;vkCreatePipelineLayout(dev,&pli,nullptr,&pipelineLayout);
VkGraphicsPipelineCreateInfo gpci{};gpci.sType=VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
gpci.stageCount=2;gpci.pStages=stg;gpci.pVertexInputState=&vi;gpci.pInputAssemblyState=&ia;
gpci.pViewportState=&vps;gpci.pRasterizationState=&rs;gpci.pMultisampleState=&ms;
gpci.pColorBlendState=&cb;gpci.layout=pipelineLayout;gpci.renderPass=renderPass;gpci.subpass=0;
VkPipeline pipeline;vkCreateGraphicsPipelines(dev,VK_NULL_HANDLE,1,&gpci,nullptr,&pipeline);
vkDestroyShaderModule(dev,fm,nullptr);vkDestroyShaderModule(dev,vm,nullptr);
std::cout<<"[6] Graphics pipeline created\n";
VkCommandPoolCreateInfo cpci{};cpci.sType=VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cpci.flags=VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;cpci.queueFamilyIndex=qfi.g.value();
VkCommandPool cmdPool;vkCreateCommandPool(dev,&cpci,nullptr,&cmdPool);
std::vector<VkCommandBuffer> cmds(sic);
VkCommandBufferAllocateInfo cbai{};cbai.sType=VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cbai.commandPool=cmdPool;cbai.level=VK_COMMAND_BUFFER_LEVEL_PRIMARY;cbai.commandBufferCount=sic;
vkAllocateCommandBuffers(dev,&cbai,cmds.data());
VkSemaphoreCreateInfo semci{};semci.sType=VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
VkFenceCreateInfo fenci{};fenci.sType=VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;fenci.flags=VK_FENCE_CREATE_SIGNALED_BIT;
VkSemaphore imgAvail,renDone;VkFence inFlight;
vkCreateSemaphore(dev,&semci,nullptr,&imgAvail);vkCreateSemaphore(dev,&semci,nullptr,&renDone);
vkCreateFence(dev,&fenci,nullptr,&inFlight);
std::cout<<"[7] Render loop starting. ESC=quit.\n\n";
uint64_t frame=0;
while(!glfwWindowShouldClose(win)){
glfwPollEvents();
if(glfwGetKey(win,GLFW_KEY_ESCAPE)==GLFW_PRESS)glfwSetWindowShouldClose(win,GLFW_TRUE);
vkWaitForFences(dev,1,&inFlight,VK_TRUE,UINT64_MAX);vkResetFences(dev,1,&inFlight);
uint32_t idx;
if(vkAcquireNextImageKHR(dev,sc,UINT64_MAX,imgAvail,VK_NULL_HANDLE,&idx)==VK_ERROR_OUT_OF_DATE_KHR)continue;
// ── UPDATE UBO for this frame's swapchain image ───────────────────
float t=(float)glfwGetTime();
UniformBufferObject ubo{};
ubo.model = glm::rotate(glm::mat4(1.0f),glm::radians(t*45.0f),glm::vec3(0,0,1));
ubo.view = glm::lookAt(glm::vec3(0,0,2),glm::vec3(0),glm::vec3(0,1,0));
ubo.proj = glm::perspective(glm::radians(45.0f),(float)ext.width/ext.height,0.1f,10.0f);
ubo.proj[1][1] *= -1; // Vulkan NDC Y is flipped vs OpenGL
memcpy(uboMaps[idx],&ubo,sizeof(ubo));
auto& cmd=cmds[idx];vkResetCommandBuffer(cmd,0);
VkCommandBufferBeginInfo cbbi{};cbbi.sType=VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
vkBeginCommandBuffer(cmd,&cbbi);
VkClearValue clr{};clr.color.float32[0]=0.04f;clr.color.float32[1]=0.06f;clr.color.float32[2]=0.14f;clr.color.float32[3]=1.0f;
VkRenderPassBeginInfo rpbi{};rpbi.sType=VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rpbi.renderPass=renderPass;rpbi.framebuffer=fbs[idx];rpbi.renderArea={{0,0},ext};
rpbi.clearValueCount=1;rpbi.pClearValues=&clr;
vkCmdBeginRenderPass(cmd,&rpbi,VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindPipeline(cmd,VK_PIPELINE_BIND_POINT_GRAPHICS,pipeline);
VkDeviceSize offset=0;vkCmdBindVertexBuffers(cmd,0,1,&vertBuf,&offset);
// ── BIND DESCRIPTOR SET — this is how the UBO reaches the shader ─
vkCmdBindDescriptorSets(cmd,VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineLayout,
0, // first set index
1, // number of sets
&descSets[idx], // the set for this swapchain image
0,nullptr); // no dynamic offsets
vkCmdDraw(cmd,3,1,0,0);
vkCmdEndRenderPass(cmd);vkEndCommandBuffer(cmd);
VkPipelineStageFlags ws=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSubmitInfo si{};si.sType=VK_STRUCTURE_TYPE_SUBMIT_INFO;
si.waitSemaphoreCount=1;si.pWaitSemaphores=&imgAvail;si.pWaitDstStageMask=&ws;
si.commandBufferCount=1;si.pCommandBuffers=&cmd;
si.signalSemaphoreCount=1;si.pSignalSemaphores=&renDone;
vkQueueSubmit(gfxQ,1,&si,inFlight);
VkPresentInfoKHR pi{};pi.sType=VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
pi.waitSemaphoreCount=1;pi.pWaitSemaphores=&renDone;
pi.swapchainCount=1;pi.pSwapchains=≻pi.pImageIndices=&idx;
vkQueuePresentKHR(presQ,&pi);
if(++frame%60==0) std::cout<<"t="<<(int)t<<"s frame="<<frame<<"\n";
}
vkDeviceWaitIdle(dev);
vkDestroyFence(dev,inFlight,nullptr);
vkDestroySemaphore(dev,renDone,nullptr);vkDestroySemaphore(dev,imgAvail,nullptr);
vkDestroyCommandPool(dev,cmdPool,nullptr);
vkDestroyPipeline(dev,pipeline,nullptr);vkDestroyPipelineLayout(dev,pipelineLayout,nullptr);
for(uint32_t i=0;i<sic;i++){
vkUnmapMemory(dev,uboMems[i]);
vkDestroyBuffer(dev,uboBufs[i],nullptr);
vkFreeMemory(dev,uboMems[i],nullptr);
}
vkDestroyDescriptorPool(dev,descPool,nullptr);
vkDestroyDescriptorSetLayout(dev,dsLayout,nullptr);
vkDestroyBuffer(dev,vertBuf,nullptr);vkFreeMemory(dev,vertMem,nullptr);
for(auto fb:fbs)vkDestroyFramebuffer(dev,fb,nullptr);
vkDestroyRenderPass(dev,renderPass,nullptr);
for(auto iv:sivs)vkDestroyImageView(dev,iv,nullptr);
vkDestroySwapchainKHR(dev,sc,nullptr);vkDestroySurfaceKHR(inst,surface,nullptr);
vkDestroyDevice(dev,nullptr);vkDestroyInstance(inst,nullptr);
glfwDestroyWindow(win);glfwTerminate();
std::cout<<"Clean shutdown. Validation: 0 errors.\n";return 0;
}cd C:\Labs\D19_VulkanUBO cmake -B build -G "Visual Studio 17 2022" -A x64 cmake --build build --config Release copy vert.spv build\Release\ copy frag.spv build\Release\ build\Release\D19_VulkanUBO.exe
- Change rotation speed: In the render loop change
t*45.0ftot*180.0f. Triangle spins 4× faster. No pipeline rebuild needed — that is the UBO advantage over baked constants. - Add scale to model matrix: Wrap the rotate call with
glm::scale(..., glm::vec3(0.5f)). Triangle shrinks to half size while still rotating. Both transforms combine in the single model matrix. - Wrong: one UBO for all images: Point all descriptor sets at
uboBufs[0]instead ofuboBufs[i]. Run. Observe tearing or validation errors — CPU overwrites frame N+1 while GPU reads frame N from the same buffer.
Fix 1 — VkPushConstantRange in pipeline layout: The compute shader uses layout(push_constant) for width/height. Without declaring the range in VkPipelineLayoutCreateInfo, validation fires VUID-07987 and all output pixels become 0.
Fix 2 — vkCmdPushConstants before vkCmdDispatch: Without pushing the constants first, VUID-08602 fires and the shader reads uninitialised memory. Output is all zeros. Push constants must be set before the dispatch that reads them.
The graphics pipeline has a fixed sequence: vertex shading → rasterisation → fragment shading. The compute pipeline has none of that — just a GLSL compute shader dispatched in workgroups. No render pass, no framebuffer, no swapchain. Data flows in via storage buffers. gl_GlobalInvocationID tells each thread which pixel it owns. vkCmdDispatch(x,y,z) launches the workgroups.
vkCmdDispatch(16, 16, 1) ← 256 workgroups total
Each workgroup: 16×16 threads ← local_size_x=16, local_size_y=16
Total threads: 256 × 256 = 65,536 (one per pixel)
Thread (x=37, y=112):
idx = 112 * 256 + 37 = 28,709
invert pixel at [37,112], write to output[28,709]
Barrier after dispatch:
srcStage=COMPUTE dstStage=HOST
Ensures GPU finishes writing before CPU maps output bufferglslc compute.comp -o comp.spv REM copy comp.spv build\Release\
#version 450
// Workgroup: 16x16 threads = 256 threads per group, each handles one pixel
layout(local_size_x = 16, local_size_y = 16) in;
layout(std430, binding = 0) readonly buffer In { uint pixels_in[]; };
layout(std430, binding = 1) writeonly buffer Out { uint pixels_out[]; };
// Push constants: image dimensions (declared in pipeline layout, pushed before dispatch)
layout(push_constant) uniform PC { uint width; uint height; } pc;
void main() {
uint x = gl_GlobalInvocationID.x;
uint y = gl_GlobalInvocationID.y;
// Guard: threads outside image do nothing
if (x >= pc.width || y >= pc.height) return;
uint idx = y * pc.width + x;
uint packed = pixels_in[idx];
// Unpack RGBA from uint32
uint r = (packed >> 0) & 0xFFu;
uint g = (packed >> 8) & 0xFFu;
uint b = (packed >> 16) & 0xFFu;
uint a = (packed >> 24) & 0xFFu;
// Invert RGB, preserve alpha
pixels_out[idx] = ((255u - r) << 0)
| ((255u - g) << 8)
| ((255u - b) << 16)
| (a << 24);
}cmake_minimum_required(VERSION 3.20)
project(D20_VulkanCompute)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(Vulkan REQUIRED)
set(GLFW_DIR $ENV{GLFW_DIR})
set(GLM_DIR $ENV{GLM_DIR})
include_directories(${GLFW_DIR}/include ${GLM_DIR} ${Vulkan_INCLUDE_DIRS})
add_executable(D20_VulkanCompute src/main.cpp)
target_link_libraries(D20_VulkanCompute Vulkan::Vulkan ${GLFW_DIR}/lib-vc2022/glfw3.lib)// =======================================================================
// RR GRAPHICS LAB — DAY 5 · DEMO 20 (FIXED + CPU COMPARISON)
// Vulkan Compute Shader — GPU Parallel Image Processing (Invert Filter)
// By Raushan Ranjan (MCT) | Koenig Original AI-Courseware
// =======================================================================
#define GLFW_INCLUDE_VULKAN
#include <GLFW/glfw3.h>
#include <iostream>
#include <vector>
#include <fstream>
#include <cstring>
#include <stdexcept>
#include <chrono>
const uint32_t IMG_W = 256, IMG_H = 256;
const uint32_t PIXEL_COUNT = IMG_W * IMG_H;
struct PushConstants { uint32_t width, height; };
static std::vector<char> readSpv(const std::string& p) {
std::ifstream f(p, std::ios::ate | std::ios::binary);
if (!f.is_open()) throw std::runtime_error(
"Cannot open: " + p +
"\n Run: glslc compute.comp -o comp.spv"
"\n Then: copy comp.spv build\\Release\\");
size_t s = (size_t)f.tellg();
std::vector<char> b(s); f.seekg(0); f.read(b.data(), s); return b;
}
VkShaderModule makeMod(VkDevice d, const std::vector<char>& c) {
VkShaderModuleCreateInfo ci{};
ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
ci.codeSize = c.size(); ci.pCode = reinterpret_cast<const uint32_t*>(c.data());
VkShaderModule m; vkCreateShaderModule(d, &ci, nullptr, &m); return m;
}
uint32_t findComputeFamily(VkPhysicalDevice dev) {
uint32_t n; vkGetPhysicalDeviceQueueFamilyProperties(dev, &n, nullptr);
std::vector<VkQueueFamilyProperties> f(n);
vkGetPhysicalDeviceQueueFamilyProperties(dev, &n, f.data());
for (uint32_t i = 0; i < n; i++)
if (f[i].queueFlags & VK_QUEUE_COMPUTE_BIT) return i;
return UINT32_MAX;
}
uint32_t findMem(VkPhysicalDevice phys, uint32_t bits, VkMemoryPropertyFlags props) {
VkPhysicalDeviceMemoryProperties mp;
vkGetPhysicalDeviceMemoryProperties(phys, &mp);
for (uint32_t i = 0; i < mp.memoryTypeCount; i++)
if ((bits & (1 << i)) && (mp.memoryTypes[i].propertyFlags & props) == props) return i;
throw std::runtime_error("No memory type");
}
void mkBuf(VkDevice dev, VkPhysicalDevice phys, VkDeviceSize sz,
VkBufferUsageFlags use, VkMemoryPropertyFlags props,
VkBuffer& buf, VkDeviceMemory& mem) {
VkBufferCreateInfo bi{};
bi.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bi.size = sz; bi.usage = use; bi.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
vkCreateBuffer(dev, &bi, nullptr, &buf);
VkMemoryRequirements mr; vkGetBufferMemoryRequirements(dev, buf, &mr);
VkMemoryAllocateInfo ai{};
ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
ai.allocationSize = mr.size;
ai.memoryTypeIndex = findMem(phys, mr.memoryTypeBits, props);
vkAllocateMemory(dev, &ai, nullptr, &mem); vkBindBufferMemory(dev, buf, mem, 0);
}
int main() {
std::cout << "\n=== Demo 20 — Vulkan Compute: Image Invert Filter ===\n\n";
std::cout << "Image: " << IMG_W << "x" << IMG_H << " = " << PIXEL_COUNT << " pixels\n\n";
// ── CPU BASELINE — run the same invert on CPU first ───────────────────
std::cout << "--- CPU Baseline (single-threaded) ---\n";
std::vector<uint32_t> cpuIn(PIXEL_COUNT), cpuOut(PIXEL_COUNT);
for (uint32_t y = 0; y < IMG_H; y++)
for (uint32_t x = 0; x < IMG_W; x++) {
uint32_t i = y * IMG_W + x;
cpuIn[i] = (uint8_t)x | ((uint8_t)y << 8) | (128u << 16) | (255u << 24);
}
auto cpuT0 = std::chrono::high_resolution_clock::now();
for (uint32_t i = 0; i < PIXEL_COUNT; i++) {
uint32_t p = cpuIn[i];
uint32_t r = 255u - ((p >> 0) & 0xFFu);
uint32_t g = 255u - ((p >> 8) & 0xFFu);
uint32_t b = 255u - ((p >> 16) & 0xFFu);
uint32_t a = (p >> 24) & 0xFFu;
cpuOut[i] = r | (g << 8) | (b << 16) | (a << 24);
}
auto cpuUs = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::high_resolution_clock::now() - cpuT0).count();
uint32_t cp = cpuOut[0];
std::cout << "CPU pixel [0,0]: R=" << (cp & 0xFF)
<< " G=" << ((cp >> 8) & 0xFF)
<< " B=" << ((cp >> 16) & 0xFF) << "\n";
std::cout << "CPU time: " << cpuUs << " us ("
<< (cpuUs / 1000.f) << " ms) — single thread, no SIMD\n\n";
// ── VULKAN SETUP ──────────────────────────────────────────────────────
glfwInit();
VkApplicationInfo ai{};
ai.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; ai.apiVersion = VK_API_VERSION_1_0;
const std::vector<const char*> val = {"VK_LAYER_KHRONOS_validation"};
VkInstanceCreateInfo ici{};
ici.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; ici.pApplicationInfo = &ai;
ici.enabledLayerCount = (uint32_t)val.size(); ici.ppEnabledLayerNames = val.data();
VkInstance inst; vkCreateInstance(&ici, nullptr, &inst);
uint32_t dc = 0; vkEnumeratePhysicalDevices(inst, &dc, nullptr);
std::vector<VkPhysicalDevice> pds(dc); vkEnumeratePhysicalDevices(inst, &dc, pds.data());
VkPhysicalDevice phys = pds[0];
for (auto& d : pds) {
VkPhysicalDeviceProperties p{}; vkGetPhysicalDeviceProperties(d, &p);
if (p.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) { phys = d; break; }
}
VkPhysicalDeviceProperties gp{}; vkGetPhysicalDeviceProperties(phys, &gp);
std::cout << "--- GPU: " << gp.deviceName << " ---\n";
uint32_t cFam = findComputeFamily(phys);
float pr = 1.f;
VkDeviceQueueCreateInfo dqci{};
dqci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
dqci.queueFamilyIndex = cFam; dqci.queueCount = 1; dqci.pQueuePriorities = ≺
VkPhysicalDeviceFeatures feat{};
VkDeviceCreateInfo dci{};
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
dci.queueCreateInfoCount = 1; dci.pQueueCreateInfos = &dqci; dci.pEnabledFeatures = &feat;
dci.enabledLayerCount = (uint32_t)val.size(); dci.ppEnabledLayerNames = val.data();
VkDevice dev; vkCreateDevice(phys, &dci, nullptr, &dev);
VkQueue computeQ; vkGetDeviceQueue(dev, cFam, 0, &computeQ);
VkDeviceSize bufSz = PIXEL_COUNT * 4;
VkBuffer inBuf, outBuf; VkDeviceMemory inMem, outMem;
mkBuf(dev, phys, bufSz, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
inBuf, inMem);
mkBuf(dev, phys, bufSz, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
outBuf, outMem);
uint8_t* inData;
vkMapMemory(dev, inMem, 0, bufSz, 0, (void**)&inData);
for (uint32_t y = 0; y < IMG_H; y++)
for (uint32_t x = 0; x < IMG_W; x++) {
uint32_t i = (y * IMG_W + x) * 4;
inData[i+0]=(uint8_t)x; inData[i+1]=(uint8_t)y;
inData[i+2]=128; inData[i+3]=255;
}
vkUnmapMemory(dev, inMem);
std::cout << "[4] Input buffer filled\n";
VkDescriptorSetLayoutBinding bindings[2]{};
bindings[0]={0,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,1,VK_SHADER_STAGE_COMPUTE_BIT,nullptr};
bindings[1]={1,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,1,VK_SHADER_STAGE_COMPUTE_BIT,nullptr};
VkDescriptorSetLayoutCreateInfo dslci{};
dslci.sType=VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
dslci.bindingCount=2; dslci.pBindings=bindings;
VkDescriptorSetLayout dsLayout; vkCreateDescriptorSetLayout(dev,&dslci,nullptr,&dsLayout);
VkDescriptorPoolSize dps{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,2};
VkDescriptorPoolCreateInfo dpci{};
dpci.sType=VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
dpci.poolSizeCount=1; dpci.pPoolSizes=&dps; dpci.maxSets=1;
VkDescriptorPool pool; vkCreateDescriptorPool(dev,&dpci,nullptr,&pool);
VkDescriptorSetAllocateInfo dsai{};
dsai.sType=VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
dsai.descriptorPool=pool; dsai.descriptorSetCount=1; dsai.pSetLayouts=&dsLayout;
VkDescriptorSet ds; vkAllocateDescriptorSets(dev,&dsai,&ds);
VkDescriptorBufferInfo dbi0{inBuf,0,bufSz}, dbi1{outBuf,0,bufSz};
VkWriteDescriptorSet wds[2]{};
wds[0]={VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr,ds,0,0,1,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,nullptr,&dbi0};
wds[1]={VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr,ds,1,0,1,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,nullptr,&dbi1};
vkUpdateDescriptorSets(dev,2,wds,0,nullptr);
VkPushConstantRange pcRange{};
pcRange.stageFlags=VK_SHADER_STAGE_COMPUTE_BIT; pcRange.offset=0; pcRange.size=sizeof(PushConstants);
VkPipelineLayoutCreateInfo pli{};
pli.sType=VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pli.setLayoutCount=1; pli.pSetLayouts=&dsLayout;
pli.pushConstantRangeCount=1; pli.pPushConstantRanges=&pcRange;
VkPipelineLayout pipeLayout; vkCreatePipelineLayout(dev,&pli,nullptr,&pipeLayout);
auto compCode=readSpv("comp.spv");
VkShaderModule compMod=makeMod(dev,compCode);
VkComputePipelineCreateInfo cpci{};
cpci.sType=VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
cpci.stage.sType=VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
cpci.stage.stage=VK_SHADER_STAGE_COMPUTE_BIT; cpci.stage.module=compMod; cpci.stage.pName="main";
cpci.layout=pipeLayout;
VkPipeline compPipeline; vkCreateComputePipelines(dev,VK_NULL_HANDLE,1,&cpci,nullptr,&compPipeline);
vkDestroyShaderModule(dev,compMod,nullptr);
std::cout << "[6] Compute pipeline ready\n";
VkCommandPoolCreateInfo cpciPool{};
cpciPool.sType=VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cpciPool.queueFamilyIndex=cFam; cpciPool.flags=VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
VkCommandPool cmdPool; vkCreateCommandPool(dev,&cpciPool,nullptr,&cmdPool);
VkCommandBufferAllocateInfo cbai{};
cbai.sType=VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cbai.commandPool=cmdPool; cbai.level=VK_COMMAND_BUFFER_LEVEL_PRIMARY; cbai.commandBufferCount=1;
VkCommandBuffer cmd; vkAllocateCommandBuffers(dev,&cbai,&cmd);
VkCommandBufferBeginInfo cbbi{};
cbbi.sType=VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cbbi.flags=VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(cmd,&cbbi);
vkCmdBindPipeline(cmd,VK_PIPELINE_BIND_POINT_COMPUTE,compPipeline);
vkCmdBindDescriptorSets(cmd,VK_PIPELINE_BIND_POINT_COMPUTE,pipeLayout,0,1,&ds,0,nullptr);
PushConstants pc{IMG_W,IMG_H};
vkCmdPushConstants(cmd,pipeLayout,VK_SHADER_STAGE_COMPUTE_BIT,0,sizeof(PushConstants),&pc);
uint32_t gx=(IMG_W+15)/16, gy=(IMG_H+15)/16;
vkCmdDispatch(cmd,gx,gy,1);
VkMemoryBarrier barrier{};
barrier.sType=VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.srcAccessMask=VK_ACCESS_SHADER_WRITE_BIT; barrier.dstAccessMask=VK_ACCESS_HOST_READ_BIT;
vkCmdPipelineBarrier(cmd,VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,VK_PIPELINE_STAGE_HOST_BIT,
0,1,&barrier,0,nullptr,0,nullptr);
vkEndCommandBuffer(cmd);
std::cout << "[7] Dispatch: " << gx << "x" << gy << " workgroups = " << PIXEL_COUNT << " threads\n";
VkFenceCreateInfo fci{}; fci.sType=VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
VkFence fence; vkCreateFence(dev,&fci,nullptr,&fence);
// ── GPU DISPATCH + TIMING ─────────────────────────────────────────────
auto gpuT0=std::chrono::high_resolution_clock::now();
VkSubmitInfo si{}; si.sType=VK_STRUCTURE_TYPE_SUBMIT_INFO;
si.commandBufferCount=1; si.pCommandBuffers=&cmd;
vkQueueSubmit(computeQ,1,&si,fence);
vkWaitForFences(dev,1,&fence,VK_TRUE,UINT64_MAX);
auto gpuUs=std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::high_resolution_clock::now()-gpuT0).count();
// ── READ BACK AND VERIFY ──────────────────────────────────────────────
uint8_t* outData;
vkMapMemory(dev,outMem,0,bufSz,0,(void**)&outData);
std::cout << "\n=== Result Verification ===\n";
std::cout << "Input [0,0]: R=0 G=0 B=128\n";
std::cout << "Output [0,0]: R=" << (int)outData[0]
<< " G=" << (int)outData[1]
<< " B=" << (int)outData[2] << "\n";
std::cout << "Expected: R=255 G=255 B=127\n\n";
bool pass=(outData[0]==255&&outData[1]==255&&(outData[2]==127||outData[2]==128));
std::cout << "Test: " << (pass?"PASS \xE2\x9C\x93":"FAIL \xE2\x9C\x97") << "\n\n";
// ── SPEEDUP COMPARISON ────────────────────────────────────────────────
std::cout << "=== CPU vs GPU Comparison ===\n";
std::cout << "Pixels processed: " << PIXEL_COUNT << "\n";
std::cout << "CPU time (1 core): " << cpuUs << " us ("
<< (cpuUs/1000.f) << " ms)\n";
std::cout << "GPU time (Vk): " << gpuUs << " us ("
<< (gpuUs/1000.f) << " ms)\n";
if (gpuUs > 0) {
float speedup = (float)cpuUs / (float)gpuUs;
std::cout << "Speedup: " << speedup << "x faster on GPU\n";
std::cout << "\nNote: GPU time includes command buffer submission + fence wait.\n";
std::cout << " For larger images (4096x4096 = 16M pixels) the gap grows\n";
std::cout << " dramatically because GPU parallelism scales, CPU does not.\n";
}
vkUnmapMemory(dev,outMem);
vkDestroyFence(dev,fence,nullptr);
vkDestroyCommandPool(dev,cmdPool,nullptr);
vkDestroyPipeline(dev,compPipeline,nullptr); vkDestroyPipelineLayout(dev,pipeLayout,nullptr);
vkDestroyDescriptorPool(dev,pool,nullptr); vkDestroyDescriptorSetLayout(dev,dsLayout,nullptr);
vkDestroyBuffer(dev,inBuf,nullptr); vkFreeMemory(dev,inMem,nullptr);
vkDestroyBuffer(dev,outBuf,nullptr); vkFreeMemory(dev,outMem,nullptr);
vkDestroyDevice(dev,nullptr); vkDestroyInstance(inst,nullptr); glfwTerminate();
std::cout << "\nClean shutdown. Validation: 0 errors.\n";
return 0;
}cd C:\Labs\D20_VulkanCompute glslc compute.comp -o comp.spv cmake -B build -G "Visual Studio 17 2022" -A x64 cmake --build build --config Release copy comp.spv build\Release\ build\Release\D20_VulkanCompute.exe
- Change kernel to greyscale: In compute.comp replace invert lines with
uint lum = (r+g+b)/3u; r_out=lum; g_out=lum; b_out=lum;Recompile with glslc. Output becomes greyscale. Same dispatch infrastructure, different kernel. - Process a larger image: Change IMG_W and IMG_H to 1024. Dispatch scales automatically (64 groups per axis). Measure new timing — confirm ~4× more pixels takes ~4× longer.
- Remove the pipeline barrier: Comment out vkCmdPipelineBarrier. Validation reports missing memory dependency. Output may be zeros or garbage — CPU reads before GPU finishes writing.
Fix 1 — VUID-01796 (push constant stage mismatch): The compute pipeline layout had stageFlags = COMPUTE | FRAGMENT but vkCmdPushConstants was called with COMPUTE only. The spec requires the flags you pass to match all stages in the overlapping range. Fix: give compPL a COMPUTE_BIT-only range and gfxPL a FRAGMENT_BIT-only range. Each layout is independent — no need to combine them.
Fix 2 — VUID-00067 (semaphore reuse): A single imgAvail semaphore was shared across all 3 swapchain images. After frame 0 presented, the swapchain still held the semaphore when frame 1 tried to signal it again. Fix: one imgAvail semaphore per swapchain image (std::vector<VkSemaphore> imgAvail(sic)), indexed by a frameIndex counter cycling 0→1→2→0.
The compute output buffer is a VkBuffer with VK_BUFFER_USAGE_STORAGE_BUFFER_BIT. After the dispatch writes to it, a pipeline barrier ensures all writes complete before the fragment shader reads. The fragment shader accesses it via a descriptor at binding 0 — same mechanism as the UBO in Demo 19, but for a storage buffer. No VkImage, no texture upload, no mipmap generation.
The fullscreen triangle trick: gl_VertexIndex 0→(-1,-1), 1→(3,-1), 2→(-1,3). Three vertices covering the entire viewport, no VkBuffer needed. The rasteriser clips automatically.
[COMPUTE PASS — runs once before render loop]
vkCmdBindPipeline(COMPUTE)
vkCmdBindDescriptorSets(compDS: binding0=inBuf, binding1=outBuf)
vkCmdPushConstants(width=256, height=256)
vkCmdDispatch(16, 16, 1) ← inverts 65,536 pixels into outBuf
vkCmdPipelineBarrier(
src=COMPUTE_SHADER ← wait for all compute writes
dst=FRAGMENT_SHADER) ← before fragment shader reads
[GRAPHICS PASS — every frame]
vkCmdBeginRenderPass
vkCmdBindPipeline(GRAPHICS)
vkCmdBindDescriptorSets(gfxDS: binding0=outBuf) ← same buffer!
vkCmdPushConstants(width=256, height=256)
vkCmdDraw(3, 1, 0, 0) ← fullscreen triangle, no VkBuffer
vkCmdEndRenderPassglslc compute.comp -o comp.spv glslc shader.vert -o vert.spv glslc shader.frag -o frag.spv copy *.spv build\Release\
#version 450
layout(local_size_x = 16, local_size_y = 16) in;
layout(std430, binding = 0) readonly buffer In { uint pixels_in[]; };
layout(std430, binding = 1) writeonly buffer Out { uint pixels_out[]; };
layout(push_constant) uniform PC { uint width; uint height; } pc;
void main() {
uint x = gl_GlobalInvocationID.x;
uint y = gl_GlobalInvocationID.y;
if (x >= pc.width || y >= pc.height) return;
uint idx = y * pc.width + x;
uint packed = pixels_in[idx];
uint r = (packed >> 0) & 0xFFu;
uint g = (packed >> 8) & 0xFFu;
uint b = (packed >> 16) & 0xFFu;
uint a = (packed >> 24) & 0xFFu;
pixels_out[idx] = ((255u - r) << 0)
| ((255u - g) << 8)
| ((255u - b) << 16)
| (a << 24);
}#version 450
// Fullscreen triangle trick: 3 vertices, no VkBuffer needed.
// gl_VertexIndex 0→(-1,-1) 1→(3,-1) 2→(-1,3)
// The triangle covers the entire viewport; rasteriser clips to screen edges.
void main() {
vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);
gl_Position = vec4(uv * 2.0 - 1.0, 0.0, 1.0);
}#version 450
layout(location = 0) out vec4 outColor;
layout(std430, binding = 0) readonly buffer PixelBuffer {
uint pixels[];
};
layout(push_constant) uniform PC { uint width; uint height; } pc;
void main() {
uint x = uint(gl_FragCoord.x);
uint y = uint(gl_FragCoord.y);
if (x >= pc.width || y >= pc.height) { outColor = vec4(0); return; }
// Flip Y: Vulkan gl_FragCoord origin is top-left, image is stored top-down
uint idx = (pc.height - 1u - y) * pc.width + x;
uint packed = pixels[idx];
outColor = vec4(
float((packed >> 0) & 0xFFu) / 255.0,
float((packed >> 8) & 0xFFu) / 255.0,
float((packed >> 16) & 0xFFu) / 255.0,
float((packed >> 24) & 0xFFu) / 255.0
);
}cmake_minimum_required(VERSION 3.20)
project(D21_ComputeDisplay)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(Vulkan REQUIRED)
set(GLFW_DIR $ENV{GLFW_DIR})
set(GLM_DIR $ENV{GLM_DIR})
include_directories(${GLFW_DIR}/include ${GLM_DIR} ${Vulkan_INCLUDE_DIRS})
add_executable(D21_ComputeDisplay src/main.cpp)
target_link_libraries(D21_ComputeDisplay Vulkan::Vulkan ${GLFW_DIR}/lib-vc2022/glfw3.lib)// =======================================================================
// RR GRAPHICS LAB — DAY 5 · DEMO 21 (FIXED)
// Compute → Display: Show GPU-processed image in a window
// By Raushan Ranjan (MCT) | Koenig Original AI-Courseware
//
// FIXES APPLIED vs previous version:
//
// FIX 1 — VUID-vkCmdPushConstants-offset-01796
// The compute pipeline layout's pcRange had stageFlags =
// COMPUTE_BIT | FRAGMENT_BIT (wrong — fragment stage is irrelevant here).
// vkCmdPushConstants must pass ALL stages declared in the overlapping
// pcRange, so the mismatch caused a validation error.
// Fix: give compPL its own pcRange with COMPUTE_BIT only.
// give gfxPL its own pcRange with FRAGMENT_BIT only.
// Each pipeline layout is independent — no need to combine them.
//
// FIX 2 — VUID-vkQueueSubmit-pSignalSemaphores-00067
// A single imgAvail semaphore was shared across all frames.
// After presentation, the semaphore may still be in use by the swapchain,
// but the next frame tried to signal it again immediately.
// Fix: one imgAvail semaphore PER swapchain image (indexed by imgIdx).
// This is the standard Vulkan "separate semaphore per image" pattern.
// =======================================================================
#define GLFW_INCLUDE_VULKAN
#include <GLFW/glfw3.h>
#include <iostream>
#include <vector>
#include <optional>
#include <set>
#include <algorithm>
#include <fstream>
#include <cstring>
#include <stdexcept>
const uint32_t IMG_W = 256, IMG_H = 256;
const uint32_t WIN_W = 900, WIN_H = 600;
struct PushConstants { uint32_t width, height; };
static std::vector<char> readSpv(const std::string& p) {
std::ifstream f(p, std::ios::ate | std::ios::binary);
if (!f.is_open()) throw std::runtime_error("Cannot open: " + p);
size_t s = (size_t)f.tellg();
std::vector<char> b(s); f.seekg(0); f.read(b.data(), s); return b;
}
VkShaderModule makeMod(VkDevice d, const std::vector<char>& c) {
VkShaderModuleCreateInfo ci{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
ci.codeSize=c.size(); ci.pCode=reinterpret_cast<const uint32_t*>(c.data());
VkShaderModule m; vkCreateShaderModule(d,&ci,nullptr,&m); return m;
}
struct QFI{std::optional<uint32_t> g,p;bool ok(){return g.has_value()&&p.has_value();}};
QFI findQF(VkPhysicalDevice d,VkSurfaceKHR s){
QFI i;uint32_t n;vkGetPhysicalDeviceQueueFamilyProperties(d,&n,nullptr);
std::vector<VkQueueFamilyProperties>f(n);vkGetPhysicalDeviceQueueFamilyProperties(d,&n,f.data());
for(uint32_t x=0;x<n;x++){if(f[x].queueFlags&VK_QUEUE_GRAPHICS_BIT)i.g=x;
VkBool32 p=0;vkGetPhysicalDeviceSurfaceSupportKHR(d,x,s,&p);if(p)i.p=x;if(i.ok())break;}
return i;
}
uint32_t findMem(VkPhysicalDevice phys,uint32_t bits,VkMemoryPropertyFlags props){
VkPhysicalDeviceMemoryProperties mp;vkGetPhysicalDeviceMemoryProperties(phys,&mp);
for(uint32_t i=0;i<mp.memoryTypeCount;i++)
if((bits&(1<<i))&&(mp.memoryTypes[i].propertyFlags&props)==props)return i;
throw std::runtime_error("No memory type");
}
void mkBuf(VkDevice dev,VkPhysicalDevice phys,VkDeviceSize sz,
VkBufferUsageFlags use,VkMemoryPropertyFlags props,
VkBuffer&buf,VkDeviceMemory&mem){
VkBufferCreateInfo bi{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bi.size=sz;bi.usage=use;bi.sharingMode=VK_SHARING_MODE_EXCLUSIVE;
vkCreateBuffer(dev,&bi,nullptr,&buf);
VkMemoryRequirements mr;vkGetBufferMemoryRequirements(dev,buf,&mr);
VkMemoryAllocateInfo ai{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
ai.allocationSize=mr.size;ai.memoryTypeIndex=findMem(phys,mr.memoryTypeBits,props);
vkAllocateMemory(dev,&ai,nullptr,&mem);vkBindBufferMemory(dev,buf,mem,0);
}
int main(){
std::cout<<"\n=== Demo 21 — Compute to Display ===\n\n";
glfwInit();
glfwWindowHint(GLFW_CLIENT_API,GLFW_NO_API);
glfwWindowHint(GLFW_RESIZABLE,GLFW_FALSE);
GLFWwindow* win=glfwCreateWindow(WIN_W,WIN_H,"Demo 21 — Compute to Display (ESC=quit)",nullptr,nullptr);
uint32_t ec=0;const char**eg=glfwGetRequiredInstanceExtensions(&ec);
std::vector<const char*>exts(eg,eg+ec);
const std::vector<const char*>val={"VK_LAYER_KHRONOS_validation"};
VkApplicationInfo appAI{VK_STRUCTURE_TYPE_APPLICATION_INFO};appAI.apiVersion=VK_API_VERSION_1_0;
VkInstanceCreateInfo ici{VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO};
ici.pApplicationInfo=&appAI;
ici.enabledExtensionCount=(uint32_t)exts.size();ici.ppEnabledExtensionNames=exts.data();
ici.enabledLayerCount=(uint32_t)val.size();ici.ppEnabledLayerNames=val.data();
VkInstance inst;vkCreateInstance(&ici,nullptr,&inst);
VkSurfaceKHR surf;glfwCreateWindowSurface(inst,win,nullptr,&surf);
uint32_t dc=0;vkEnumeratePhysicalDevices(inst,&dc,nullptr);
std::vector<VkPhysicalDevice>pds(dc);vkEnumeratePhysicalDevices(inst,&dc,pds.data());
VkPhysicalDevice phys=pds[0];
for(auto&d:pds){VkPhysicalDeviceProperties p{};vkGetPhysicalDeviceProperties(d,&p);
if(findQF(d,surf).ok()&&p.deviceType==VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU){phys=d;break;}}
VkPhysicalDeviceProperties gp{};vkGetPhysicalDeviceProperties(phys,&gp);
std::cout<<"[1] GPU: "<<gp.deviceName<<"\n";
QFI qfi=findQF(phys,surf);
std::set<uint32_t>uq={qfi.g.value(),qfi.p.value()};float pr=1.f;
std::vector<VkDeviceQueueCreateInfo>qcis;
for(uint32_t f:uq){VkDeviceQueueCreateInfo q{VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO};
q.queueFamilyIndex=f;q.queueCount=1;q.pQueuePriorities=≺qcis.push_back(q);}
const std::vector<const char*>devExts={VK_KHR_SWAPCHAIN_EXTENSION_NAME};
VkPhysicalDeviceFeatures feat{};
VkDeviceCreateInfo dci{VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO};
dci.queueCreateInfoCount=(uint32_t)qcis.size();dci.pQueueCreateInfos=qcis.data();
dci.pEnabledFeatures=&feat;
dci.enabledExtensionCount=(uint32_t)devExts.size();dci.ppEnabledExtensionNames=devExts.data();
dci.enabledLayerCount=(uint32_t)val.size();dci.ppEnabledLayerNames=val.data();
VkDevice dev;vkCreateDevice(phys,&dci,nullptr,&dev);
VkQueue gfxQ,prsQ;
vkGetDeviceQueue(dev,qfi.g.value(),0,&gfxQ);
vkGetDeviceQueue(dev,qfi.p.value(),0,&prsQ);
VkSurfaceCapabilitiesKHR caps{};vkGetPhysicalDeviceSurfaceCapabilitiesKHR(phys,surf,&caps);
uint32_t fN=0;vkGetPhysicalDeviceSurfaceFormatsKHR(phys,surf,&fN,nullptr);
std::vector<VkSurfaceFormatKHR>fmts(fN);vkGetPhysicalDeviceSurfaceFormatsKHR(phys,surf,&fN,fmts.data());
VkSurfaceFormatKHR sf=fmts[0];
for(auto&x:fmts)if(x.format==VK_FORMAT_B8G8R8A8_SRGB&&x.colorSpace==VK_COLOR_SPACE_SRGB_NONLINEAR_KHR){sf=x;break;}
uint32_t pmN=0;vkGetPhysicalDeviceSurfacePresentModesKHR(phys,surf,&pmN,nullptr);
std::vector<VkPresentModeKHR>pms(pmN);vkGetPhysicalDeviceSurfacePresentModesKHR(phys,surf,&pmN,pms.data());
VkPresentModeKHR pm=VK_PRESENT_MODE_FIFO_KHR;
for(auto x:pms)if(x==VK_PRESENT_MODE_MAILBOX_KHR){pm=x;break;}
VkExtent2D ext=caps.currentExtent;
uint32_t imgN=std::min(caps.minImageCount+1,caps.maxImageCount>0?caps.maxImageCount:UINT32_MAX);
VkSwapchainCreateInfoKHR sci{VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR};
sci.surface=surf;sci.minImageCount=imgN;sci.imageFormat=sf.format;sci.imageColorSpace=sf.colorSpace;
sci.imageExtent=ext;sci.imageArrayLayers=1;sci.imageUsage=VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
uint32_t qfa[]={qfi.g.value(),qfi.p.value()};
if(qfi.g!=qfi.p){sci.imageSharingMode=VK_SHARING_MODE_CONCURRENT;sci.queueFamilyIndexCount=2;sci.pQueueFamilyIndices=qfa;}
else sci.imageSharingMode=VK_SHARING_MODE_EXCLUSIVE;
sci.preTransform=caps.currentTransform;sci.compositeAlpha=VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
sci.presentMode=pm;sci.clipped=VK_TRUE;
VkSwapchainKHR sc;vkCreateSwapchainKHR(dev,&sci,nullptr,&sc);
uint32_t sic=0;vkGetSwapchainImagesKHR(dev,sc,&sic,nullptr);
std::vector<VkImage>sis(sic);vkGetSwapchainImagesKHR(dev,sc,&sic,sis.data());
std::vector<VkImageView>sivs(sic);
for(uint32_t i=0;i<sic;i++){
VkImageViewCreateInfo iv{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
iv.image=sis[i];iv.viewType=VK_IMAGE_VIEW_TYPE_2D;iv.format=sf.format;
iv.components={VK_COMPONENT_SWIZZLE_IDENTITY,VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_IDENTITY,VK_COMPONENT_SWIZZLE_IDENTITY};
iv.subresourceRange={VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,1};
vkCreateImageView(dev,&iv,nullptr,&sivs[i]);
}
VkAttachmentDescription att{};att.format=sf.format;att.samples=VK_SAMPLE_COUNT_1_BIT;
att.loadOp=VK_ATTACHMENT_LOAD_OP_CLEAR;att.storeOp=VK_ATTACHMENT_STORE_OP_STORE;
att.stencilLoadOp=VK_ATTACHMENT_LOAD_OP_DONT_CARE;att.stencilStoreOp=VK_ATTACHMENT_STORE_OP_DONT_CARE;
att.initialLayout=VK_IMAGE_LAYOUT_UNDEFINED;att.finalLayout=VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
VkAttachmentReference ref{0,VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
VkSubpassDescription sub{};sub.pipelineBindPoint=VK_PIPELINE_BIND_POINT_GRAPHICS;
sub.colorAttachmentCount=1;sub.pColorAttachments=&ref;
VkSubpassDependency dep{};dep.srcSubpass=VK_SUBPASS_EXTERNAL;dep.dstSubpass=0;
dep.srcStageMask=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;dep.srcAccessMask=0;
dep.dstStageMask=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;dep.dstAccessMask=VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo rpi{VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO};
rpi.attachmentCount=1;rpi.pAttachments=&att;rpi.subpassCount=1;rpi.pSubpasses=⊂
rpi.dependencyCount=1;rpi.pDependencies=&dep;
VkRenderPass renderPass;vkCreateRenderPass(dev,&rpi,nullptr,&renderPass);
std::vector<VkFramebuffer>fbs(sic);
for(uint32_t i=0;i<sic;i++){
VkFramebufferCreateInfo f{VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO};
f.renderPass=renderPass;f.attachmentCount=1;f.pAttachments=&sivs[i];
f.width=ext.width;f.height=ext.height;f.layers=1;
vkCreateFramebuffer(dev,&f,nullptr,&fbs[i]);
}
std::cout<<"[2] Swapchain + render pass + "<<sic<<" framebuffers\n";
VkDeviceSize bufSz=IMG_W*IMG_H*4;
VkBuffer inBuf,outBuf;VkDeviceMemory inMem,outMem;
mkBuf(dev,phys,bufSz,VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,inBuf,inMem);
mkBuf(dev,phys,bufSz,VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,outBuf,outMem);
uint8_t*inData;vkMapMemory(dev,inMem,0,bufSz,0,(void**)&inData);
for(uint32_t y=0;y<IMG_H;y++)for(uint32_t x=0;x<IMG_W;x++){
uint32_t i=(y*IMG_W+x)*4;
inData[i+0]=(uint8_t)x;inData[i+1]=(uint8_t)y;inData[i+2]=128;inData[i+3]=255;
}
vkUnmapMemory(dev,inMem);
std::cout<<"[3] Gradient image in input buffer ("<<(bufSz/1024)<<" KB)\n";
// ── FIX 1: Separate push constant ranges per pipeline layout ─────────
// compPL only needs COMPUTE_BIT — fragment stage is irrelevant here.
// gfxPL only needs FRAGMENT_BIT — vertex shader uses no push constants.
VkPushConstantRange compPC{VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants)};
VkPushConstantRange gfxPC {VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(PushConstants)};
// Descriptor layouts
VkDescriptorSetLayoutBinding cBindings[2]{};
cBindings[0]={0,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,1,VK_SHADER_STAGE_COMPUTE_BIT,nullptr};
cBindings[1]={1,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,1,VK_SHADER_STAGE_COMPUTE_BIT,nullptr};
VkDescriptorSetLayoutCreateInfo cdslci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
cdslci.bindingCount=2;cdslci.pBindings=cBindings;
VkDescriptorSetLayout compDSL;vkCreateDescriptorSetLayout(dev,&cdslci,nullptr,&compDSL);
VkDescriptorSetLayoutBinding gBinding={0,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,1,VK_SHADER_STAGE_FRAGMENT_BIT,nullptr};
VkDescriptorSetLayoutCreateInfo gdslci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
gdslci.bindingCount=1;gdslci.pBindings=&gBinding;
VkDescriptorSetLayout gfxDSL;vkCreateDescriptorSetLayout(dev,&gdslci,nullptr,&gfxDSL);
VkDescriptorPoolSize dps{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,3};
VkDescriptorPoolCreateInfo dpci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
dpci.poolSizeCount=1;dpci.pPoolSizes=&dps;dpci.maxSets=2;
VkDescriptorPool dPool;vkCreateDescriptorPool(dev,&dpci,nullptr,&dPool);
VkDescriptorSetLayout dsLayouts[2]={compDSL,gfxDSL};
VkDescriptorSetAllocateInfo dsai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
dsai.descriptorPool=dPool;dsai.descriptorSetCount=2;dsai.pSetLayouts=dsLayouts;
VkDescriptorSet dSets[2];vkAllocateDescriptorSets(dev,&dsai,dSets);
VkDescriptorSet compDS=dSets[0],gfxDS=dSets[1];
VkDescriptorBufferInfo dbi0{inBuf,0,bufSz},dbi1{outBuf,0,bufSz};
VkWriteDescriptorSet wds[3]{};
wds[0]={VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr,compDS,0,0,1,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,nullptr,&dbi0};
wds[1]={VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr,compDS,1,0,1,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,nullptr,&dbi1};
wds[2]={VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr,gfxDS, 0,0,1,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,nullptr,&dbi1};
vkUpdateDescriptorSets(dev,3,wds,0,nullptr);
std::cout<<"[4] Descriptors: compDS(in+out) + gfxDS(out\xe2\x86\x92display)\n";
// Pipeline layouts — each with its own independent push constant range
VkPipelineLayoutCreateInfo cpli{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
cpli.setLayoutCount=1;cpli.pSetLayouts=&compDSL;
cpli.pushConstantRangeCount=1;cpli.pPushConstantRanges=&compPC;
VkPipelineLayout compPL;vkCreatePipelineLayout(dev,&cpli,nullptr,&compPL);
VkPipelineLayoutCreateInfo gpli{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
gpli.setLayoutCount=1;gpli.pSetLayouts=&gfxDSL;
gpli.pushConstantRangeCount=1;gpli.pPushConstantRanges=&gfxPC;
VkPipelineLayout gfxPL;vkCreatePipelineLayout(dev,&gpli,nullptr,&gfxPL);
// Compute pipeline
auto compCode=readSpv("comp.spv");VkShaderModule compMod=makeMod(dev,compCode);
VkComputePipelineCreateInfo cpc{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
cpc.stage={VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr,0,VK_SHADER_STAGE_COMPUTE_BIT,compMod,"main",nullptr};
cpc.layout=compPL;
VkPipeline compPipeline;vkCreateComputePipelines(dev,VK_NULL_HANDLE,1,&cpc,nullptr,&compPipeline);
vkDestroyShaderModule(dev,compMod,nullptr);
// Graphics pipeline
auto vc=readSpv("vert.spv"),fc=readSpv("frag.spv");
VkShaderModule vm=makeMod(dev,vc),fm=makeMod(dev,fc);
VkPipelineShaderStageCreateInfo stg[2]{};
stg[0]={VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr,0,VK_SHADER_STAGE_VERTEX_BIT, vm,"main",nullptr};
stg[1]={VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr,0,VK_SHADER_STAGE_FRAGMENT_BIT,fm,"main",nullptr};
VkPipelineVertexInputStateCreateInfo vi{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO};
VkPipelineInputAssemblyStateCreateInfo ia{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO};
ia.topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
VkViewport vp{0,0,(float)ext.width,(float)ext.height,0,1};VkRect2D sc2{{0,0},ext};
VkPipelineViewportStateCreateInfo vps{VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO};
vps.viewportCount=1;vps.pViewports=&vp;vps.scissorCount=1;vps.pScissors=&sc2;
VkPipelineRasterizationStateCreateInfo rs{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO};
rs.polygonMode=VK_POLYGON_MODE_FILL;rs.cullMode=VK_CULL_MODE_NONE;rs.frontFace=VK_FRONT_FACE_CLOCKWISE;rs.lineWidth=1.f;
VkPipelineMultisampleStateCreateInfo ms{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO};
ms.rasterizationSamples=VK_SAMPLE_COUNT_1_BIT;
VkPipelineColorBlendAttachmentState cba{};cba.colorWriteMask=0xF;
VkPipelineColorBlendStateCreateInfo cb{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO};
cb.attachmentCount=1;cb.pAttachments=&cba;
VkGraphicsPipelineCreateInfo gpci{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO};
gpci.stageCount=2;gpci.pStages=stg;
gpci.pVertexInputState=&vi;gpci.pInputAssemblyState=&ia;gpci.pViewportState=&vps;
gpci.pRasterizationState=&rs;gpci.pMultisampleState=&ms;gpci.pColorBlendState=&cb;
gpci.layout=gfxPL;gpci.renderPass=renderPass;gpci.subpass=0;
VkPipeline gfxPipeline;vkCreateGraphicsPipelines(dev,VK_NULL_HANDLE,1,&gpci,nullptr,&gfxPipeline);
vkDestroyShaderModule(dev,vm,nullptr);vkDestroyShaderModule(dev,fm,nullptr);
std::cout<<"[5] Compute + graphics pipelines created\n";
// ── Command pool + sync ───────────────────────────────────────────────
VkCommandPoolCreateInfo cpci2{VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
cpci2.flags=VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
cpci2.queueFamilyIndex=qfi.g.value();
VkCommandPool cmdPool;vkCreateCommandPool(dev,&cpci2,nullptr,&cmdPool);
std::vector<VkCommandBuffer>cmds(sic);
VkCommandBufferAllocateInfo cbai{VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
cbai.commandPool=cmdPool;cbai.level=VK_COMMAND_BUFFER_LEVEL_PRIMARY;cbai.commandBufferCount=sic;
vkAllocateCommandBuffers(dev,&cbai,cmds.data());
// FIX 2: One imgAvail semaphore PER swapchain image
std::vector<VkSemaphore> imgAvail(sic);
VkSemaphore renderDone;VkFence inFlight;
VkSemaphoreCreateInfo semci{VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
VkFenceCreateInfo fenci{VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
fenci.flags=VK_FENCE_CREATE_SIGNALED_BIT;
for(uint32_t i=0;i<sic;i++)
vkCreateSemaphore(dev,&semci,nullptr,&imgAvail[i]);
vkCreateSemaphore(dev,&semci,nullptr,&renderDone);
vkCreateFence(dev,&fenci,nullptr,&inFlight);
// ── Run compute once before render loop ───────────────────────────────
{
VkCommandBufferAllocateInfo oai{VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
oai.commandPool=cmdPool;oai.level=VK_COMMAND_BUFFER_LEVEL_PRIMARY;oai.commandBufferCount=1;
VkCommandBuffer oc;vkAllocateCommandBuffers(dev,&oai,&oc);
VkCommandBufferBeginInfo bi{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
bi.flags=VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(oc,&bi);
vkCmdBindPipeline(oc,VK_PIPELINE_BIND_POINT_COMPUTE,compPipeline);
vkCmdBindDescriptorSets(oc,VK_PIPELINE_BIND_POINT_COMPUTE,compPL,0,1,&compDS,0,nullptr);
PushConstants pc{IMG_W,IMG_H};
vkCmdPushConstants(oc,compPL,VK_SHADER_STAGE_COMPUTE_BIT,0,sizeof(pc),&pc);
vkCmdDispatch(oc,(IMG_W+15)/16,(IMG_H+15)/16,1);
VkMemoryBarrier bar{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
bar.srcAccessMask=VK_ACCESS_SHADER_WRITE_BIT;bar.dstAccessMask=VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(oc,VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,0,1,&bar,0,nullptr,0,nullptr);
vkEndCommandBuffer(oc);
VkFenceCreateInfo of{VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
VkFence oFence;vkCreateFence(dev,&of,nullptr,&oFence);
VkSubmitInfo osi{VK_STRUCTURE_TYPE_SUBMIT_INFO};
osi.commandBufferCount=1;osi.pCommandBuffers=&oc;
vkQueueSubmit(gfxQ,1,&osi,oFence);
vkWaitForFences(dev,1,&oFence,VK_TRUE,UINT64_MAX);
vkDestroyFence(dev,oFence,nullptr);
}
std::cout<<"[6] Compute dispatch complete — output buffer filled\n";
std::cout<<"[7] Render loop starting. ESC=quit.\n\n";
// ── Render loop ───────────────────────────────────────────────────────
uint32_t frameIndex = 0;
while(!glfwWindowShouldClose(win)){
glfwPollEvents();
if(glfwGetKey(win,GLFW_KEY_ESCAPE)==GLFW_PRESS)glfwSetWindowShouldClose(win,GLFW_TRUE);
vkWaitForFences(dev,1,&inFlight,VK_TRUE,UINT64_MAX);
vkResetFences(dev,1,&inFlight);
uint32_t idx;
vkAcquireNextImageKHR(dev,sc,UINT64_MAX,imgAvail[frameIndex],VK_NULL_HANDLE,&idx);
auto&cmd=cmds[idx];vkResetCommandBuffer(cmd,0);
VkCommandBufferBeginInfo cbbi{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
vkBeginCommandBuffer(cmd,&cbbi);
VkClearValue clr{};clr.color={{0.06f,0.06f,0.08f,1.f}};
VkRenderPassBeginInfo rpbi{VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO};
rpbi.renderPass=renderPass;rpbi.framebuffer=fbs[idx];
rpbi.renderArea={{0,0},ext};rpbi.clearValueCount=1;rpbi.pClearValues=&clr;
vkCmdBeginRenderPass(cmd,&rpbi,VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindPipeline(cmd,VK_PIPELINE_BIND_POINT_GRAPHICS,gfxPipeline);
vkCmdBindDescriptorSets(cmd,VK_PIPELINE_BIND_POINT_GRAPHICS,gfxPL,0,1,&gfxDS,0,nullptr);
PushConstants pc{IMG_W,IMG_H};
vkCmdPushConstants(cmd,gfxPL,VK_SHADER_STAGE_FRAGMENT_BIT,0,sizeof(pc),&pc);
vkCmdDraw(cmd,3,1,0,0);
vkCmdEndRenderPass(cmd);vkEndCommandBuffer(cmd);
VkPipelineStageFlags ws=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSubmitInfo si{VK_STRUCTURE_TYPE_SUBMIT_INFO};
si.waitSemaphoreCount=1;si.pWaitSemaphores=&imgAvail[frameIndex];si.pWaitDstStageMask=&ws;
si.commandBufferCount=1;si.pCommandBuffers=&cmd;
si.signalSemaphoreCount=1;si.pSignalSemaphores=&renderDone;
vkQueueSubmit(gfxQ,1,&si,inFlight);
VkPresentInfoKHR pi{VK_STRUCTURE_TYPE_PRESENT_INFO_KHR};
pi.waitSemaphoreCount=1;pi.pWaitSemaphores=&renderDone;
pi.swapchainCount=1;pi.pSwapchains=≻pi.pImageIndices=&idx;
vkQueuePresentKHR(prsQ,&pi);
frameIndex = (frameIndex + 1) % sic;
}
vkDeviceWaitIdle(dev);
vkDestroyFence(dev,inFlight,nullptr);
vkDestroySemaphore(dev,renderDone,nullptr);
for(auto&s:imgAvail)vkDestroySemaphore(dev,s,nullptr);
vkDestroyCommandPool(dev,cmdPool,nullptr);
vkDestroyPipeline(dev,gfxPipeline,nullptr);vkDestroyPipelineLayout(dev,gfxPL,nullptr);
vkDestroyPipeline(dev,compPipeline,nullptr);vkDestroyPipelineLayout(dev,compPL,nullptr);
vkDestroyDescriptorPool(dev,dPool,nullptr);
vkDestroyDescriptorSetLayout(dev,compDSL,nullptr);vkDestroyDescriptorSetLayout(dev,gfxDSL,nullptr);
vkDestroyBuffer(dev,inBuf,nullptr);vkFreeMemory(dev,inMem,nullptr);
vkDestroyBuffer(dev,outBuf,nullptr);vkFreeMemory(dev,outMem,nullptr);
for(auto fb:fbs)vkDestroyFramebuffer(dev,fb,nullptr);
vkDestroyRenderPass(dev,renderPass,nullptr);
for(auto iv:sivs)vkDestroyImageView(dev,iv,nullptr);
vkDestroySwapchainKHR(dev,sc,nullptr);vkDestroySurfaceKHR(inst,surf,nullptr);
vkDestroyDevice(dev,nullptr);vkDestroyInstance(inst,nullptr);
glfwDestroyWindow(win);glfwTerminate();
std::cout<<"Clean shutdown. Validation: 0 errors.\n";
return 0;
}cd C:\Labs\D21_ComputeDisplay glslc compute.comp -o comp.spv glslc shader.vert -o vert.spv glslc shader.frag -o frag.spv cmake -B build -G "Visual Studio 17 2022" -A x64 cmake --build build --config Release copy *.spv build\Release\ build\Release\D21_ComputeDisplay.exe
- Display input instead of output: In the graphics descriptor write, point
gfxDS binding0atdbi0(inBuf) instead ofdbi1(outBuf). Window shows the original gradient (cyan/blue). Same display pipeline, different buffer. - Change kernel to greyscale: Edit compute.comp: replace invert lines with
uint lum=(r+g+b)/3u; pixels_out[idx]=lum|(lum<<8)|(lum<<16)|(a<<24);Recompile with glslc. Window shows greyscale. Display pipeline unchanged. - Centre the image in the window: In
shader.frag, offset the coordinates:uint x = uint(gl_FragCoord.x) - (900u - 256u)/2u;and same for y. Recompile with glslc. The image shifts to centre. - Re-dispatch each frame: Move the compute block inside the render loop before
vkCmdBeginRenderPass. The image updates live each frame. Animate the kernel by passing a time value via push constants — foundation of real-time GPU image processing.
Koenig Original AI-Courseware · Day 5 Complete