December 6, 2023
Radiance Cascades 2d GI implementation
static void gi_on_gpu(u8* in_bitmap, int w, int h) {
#define num_cascades 7
static bool initialized;
static gpu_bindgroup_t texture_bindgroup[2];
static gpu_bindgroup_t cascade_uniform_bindgroup[num_cascades];
static gpu_bindgroup_t render_uniform_bindgroup;
static gpu_buffer_t vertex_buffer;
static gpu_buffer_t uniform_buffer;
static gpu_pipeline_t pipeline;
static gpu_bindgroup_layout_t uniform_bindgroup_layout;
static gpu_bindgroup_layout_t texture_bindgroup_layout;
static lifetime_t texture_lifetime;
static gpu_texture_t textures[2];
static gpu_texture_t input_texture;
lifetime_t* lifetime = g_platform->lifetime;
f32 d0 = 1.f; // distance between probes in cascade 0
int r0 = 4; // number of rays in cascade 0
int n0 = (int)floorf(2*w/d0); // number of probes in cascade 0 per dimension
int cn = num_cascades;
typedef struct {
f32 d0;
int r0;
int n0;
int ci;
int cn;
int do_render;
int add_sky_light;
int padding;
v2 resolution;
v2 padding2;
} uniform_t;
if (!initialized) {
lifetime_t temp_lifetime = {0};
initialized = true;
// create bindgroup layouts
uniform_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
.name = "gi uniform bgl",
.entries = {
.visibility = gpu_visibility_fragment,
.type = gpu_binding_type_buffer,
.buffer.type = gpu_buffer_binding_type_uniform,
texture_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){
.name = "gi texture bgl",
.entries = {
.visibility = gpu_visibility_fragment,
.type = gpu_binding_type_sampler,
.visibility = gpu_visibility_fragment,
.type = gpu_binding_type_sampler,
// create pipeline
pipeline = gpu_pipeline_make(lifetime, &(gpu_pipeline_desc_t){
.name = "gi render shader",
.code = file_read("shaders/gi.glsl", &temp_lifetime).bytes,
.bgls = {
// create uniform buffer (we pack all our different uniforms in one buffer), one per cascade and one for rendering
gpu_uniform_packer_t p = gpu_uniform_packer_begin(sizeof(uniform_t), num_cascades+1, lifetime);
uniform_buffer = p.handle;
// set cascade uniforms
for (int i = 0; i < num_cascades; ++i) {
*(uniform_t*) = (uniform_t){
.d0 = d0,
.r0 = r0,
.n0 = n0,
.ci = i,
.cn = num_cascades,
.add_sky_light = 1,
.resolution = {(f32)w,(f32)h},
cascade_uniform_bindgroup[i] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = uniform_bindgroup_layout,
.entries = {gpu_uniform_packer_bindgroup_entry(&p)},
// set render uniform
*(uniform_t*) = (uniform_t){
.d0 = d0,
.r0 = r0,
.n0 = n0,
.ci = 0,
.cn = num_cascades,
.do_render = 1,
.resolution = {(f32)w,(f32)h},
render_uniform_bindgroup = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = uniform_bindgroup_layout,
.entries = {gpu_uniform_packer_bindgroup_entry(&p)},
// create textures
input_texture = gpu_texture_make(w, h, gpu_texture_format_rgb8, filter_type_nearest, false, lifetime);
gpu_texture_set_border(input_texture, (color_t){1,1,1,1});
textures[0] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);
textures[1] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime);
texture_bindgroup[0] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = texture_bindgroup_layout,
.entries = {
{.sampler = {input_texture}},
{.sampler = {textures[0]}},
texture_bindgroup[1] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){
.name = "gi",
.layout = texture_bindgroup_layout,
.entries = {
{.sampler = {input_texture}},
{.sampler = {textures[1]}},
// update input texture
gpu_texture_set_data(input_texture, in_bitmap);
// clear texture for pingponging
gpu_texture_clear(textures[(cn-1)%2], (color_t){0});
// build cascades
for (int i = cn-1; i >= 0; --i) {
.pipeline = pipeline,
.last_vertex = 6,
.bindgroups = {cascade_uniform_bindgroup[i], texture_bindgroup[i%2]},
.outputs = {textures[(i+1)%2]},
// render
.pipeline = pipeline,
.last_vertex = 6,
.bindgroups = {render_uniform_bindgroup, texture_bindgroup[cn%2]},
#undef num_cascades
out vec2 fuv;
// a vertex shader that spits out a screen-size quad
// call with vertex count = 6
void main(void) {
vec2[] positions = vec2[](
vec2 vpos = positions[gl_VertexID%6];
gl_Position = vec4(vpos, 0, 1);
fuv = vpos*0.5+0.5;
fuv.y = 1 - fuv.y;
#endif /* VERTEX_SHADER */
layout (std140, binding = 0) uniform Uniform
float d0; // distance between probes in cascade 0
int r0; // number of rays in cascade 0
int n0; // number of probes in cascade 0 (per dimension)
int ci; // cascade number
int cn; // total number of cascades
int should_do_render; // we switch on this to render instead of building the cascades
int add_sky_light; // set to 1 to add sky lighting to uppermost cascade
int padding;
vec2 u_resolution; // resolution of the input texture
vec2 padding4;
layout(binding = 1) uniform sampler2D u_input; // world data that we raytrace through
layout(binding = 2) uniform sampler2D u_prev; // previous cascade (ping-pong this and the output texture)
in vec2 fuv;
layout(location = 0) out vec4 ocolor;
const float PI = 3.1415927;
// raymarch2d: Implementation of Amanatides & Woo voxel marching algo
struct raymarch2d_t {
int x;
int y;
int sx;
int sy;
int ex;
int ey;
float tmx;
float tmy;
float tdx;
float tdy;
raymarch2d_t raymarch2d_make(float x0, float y0, float x1, float y1) {
raymarch2d_t res;
res.x = int(floor(x0));
res.y = int(floor(y0)); = x0 < x1 ? 1 : x1 < x0 ? -1 : 0; = y0 < y1 ? 1 : y1 < y0 ? -1 : 0;
res.ex = int(floor(x1)) + 2*;
res.ey = int(floor(y1)) + 2*;
float dx = x1 - x0;
float dy = y1 - y0;
float l = 1.f/sqrt(dx*dx + dy*dy);
dx *= l;
dy *= l;
res.tmx = dx == 0 ? 10000000 : (x0 - res.x)/dx;
res.tmy = dy == 0 ? 10000000 : (y0 - res.y)/dy;
res.tdx = dx == 0 ? 0 :;
res.tdy = dy == 0 ? 0 :;
return res;
bool raymarch2d_next(inout raymarch2d_t r) {
if (r.tmx < r.tmy) {
r.tmx += r.tdx;
r.x +=;
return r.x != r.ex;
else {
r.tmy += r.tdy;
r.y +=;
return r.y != r.ey;
vec3 tonemap_aces(vec3 color) {
const float slope = 12.0;
vec4 x = vec4(
color.r, color.g, color.b,
(color.r * 0.299) + (color.g * 0.587) + (color.b * 0.114)
const float a = 2.51f;
const float b = 0.03f;
const float c = 2.43f;
const float d = 0.59f;
const float e = 0.14f;
vec4 tonemap = clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0, 1.0);
float t = x.a;
t = t * t / (slope + t);
return mix(tonemap.rgb,, t);
vec3 sky_(vec2 angle) {
float a1 = angle[1];
float a0 = angle[0];
// Sky integral formula taken from
// Analytic Direct Illumination - Mathis
const vec3 SkyColor = vec3(0.2,0.5,1.);
const vec3 SunColor = vec3(1.,0.7,0.1)*10.;
const float SunA = 2.0;
const float SunS = 64.0;
const float SSunS = sqrt(SunS);
const float ISSunS = 1./SSunS;
vec3 SI = SkyColor*(a1-a0-0.5*(cos(a1)-cos(a0)));
SI += SunColor*(atan(SSunS*(SunA-a0))-atan(SSunS*(SunA-a1)))*ISSunS;
return SI / 6.0;
vec3 sky(vec2 angle) {
// Integrate the radiance from the sky over an interval of directions
if (angle[1] < 2.0 * PI)
return sky_(angle);
sky_(vec2(angle[0], 2.0 * PI)) +
sky_(vec2(0.0, angle[1] - 2.0 * PI));
void main(void) {
if (should_do_render == 1) {
// sample probe in cascade 0
float x = fuv.x * u_resolution.x;
float y = fuv.y * u_resolution.y;
float xi = round(x/d0);
float yi = round(y/d0);
vec3 c = vec3(0,0,0);
for (int r = 0; r < r0; ++r) {
vec2 pixelcoord = floor(vec2(xi*r0 + r, yi)) + 0.5;
c += texture(u_prev, pixelcoord / textureSize(u_prev, 0)).rgb;
ocolor = vec4(tonemap_aces(c/r0),1);
else {
// build cascade
int u = int(gl_FragCoord.x);
int v = int(gl_FragCoord.y);
int lm = 2;// ray distance branching factor. ray distance = 2^(lm*ci)
int rm = 1;// ray count branching factor. Num rays for cascade ci = r0*2^(rm*ci) = r0*(1 << rm*ci). NOTE: increasing this removes the property that total size of all cascades converges to 2x size of cascade 0, and instead leads to linear size increase
int n = n0 >> ci; // number of probes in one dimension
float d = d0*(1 << ci); // distance between probes
int rn = r0 << (rm*ci); // number of pixels/rays per probe
int yi = v; // probe index
int xi = u/rn; // probe index
int r = u - xi*rn; // ray index
float dx = d0*0.5f*(1 << ci);
float x = xi * d + dx; // probe pos
float y = yi * d + dx; // probe pos
float l = 0.5 * d0; // length of ray
float intensity = 1.0;
if (xi >= n || xi < 0 || yi >= n || yi < 0) {
ocolor = vec4(0,0,0,0);
float ra = ci == 0 ? 0 : l*(1 << ((ci-1)*lm)); // start of ray length interval
float rb = l*(1 << (ci*lm)); // end of ray length interval
float alpha = 2*PI*(float(r)+0.5)/rn;
vec2 rot = vec2(cos(alpha), sin(alpha));
vec2 a = vec2(x,y) + rot*ra; // start of ray
vec2 b = vec2(x,y) + rot*rb; // end of ray
raymarch2d_t raym = raymarch2d_make(a.x, a.y, b.x, b.y);
vec4 col = vec4(0,0,0,0);
while (raymarch2d_next(raym)) {
vec3 v = texture(u_input, vec2((raym.x+0.5)/u_resolution.x, (raym.y+0.5)/u_resolution.y)).rgb;
if (v != vec3(1,1,1)) {
col = vec4(v*intensity,1);
// if no hit, get from upper cascade
// TODO: do proper alpha blending to support transparent materials. Since we're only dealing with opaque materials for now it's fine
if (col.a == 0) {
if (ci == cn-1) {
if (add_sky_light != 0)
col = vec4(sky(vec2(alpha, alpha + 2*PI/rn)) / (2*PI/rn), 1);
col = vec4(0,0,0,0);
else {
int xi2 = (xi+1)/2; // probe index in upper
int yi2 = (yi+1)/2; // probe index in upper
int r2 = r << rm; // ray index in upper
int rn2 = rn << rm; // num rays in upper
int n2 = n >> 1; // num probes in upper
float tx = 0.75 - 0.5*float(xi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid
float ty = 0.75 - 0.5*float(yi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid
// loop through all the nearby rays in the upper cascade
// TODO: in the case where there are >2 rays in the upper cascade for each ray in this cascade (i.e. rm > 1),
// we should choose a better weighting than just treating them all equally
vec4 upper = vec4(0,0,0,0);
float frac = 1.0 / (1 << rm);
for (int ri = 0; ri < (1 << rm); ++ri) {
vec2 pc1 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec2 pc2 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec2 pc3 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec2 pc4 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri
vec4 c = mix(
mix(texture(u_prev, pc1 / textureSize(u_prev, 0)), texture(u_prev, pc2 / textureSize(u_prev, 0)), tx),
mix(texture(u_prev, pc3 / textureSize(u_prev, 0)), texture(u_prev, pc4 / textureSize(u_prev, 0)), tx),
upper += c*frac;
col = upper;
ocolor = vec4(col.rgb, 1);
#endif /* FRAGMENT_SHADER */
