Compute nodes
Supported bindings: ossia
Compute is simpler than draw, as the pipeline only has one shader (the compute shader).
Instead of draw, the method in which to run compute dispatch calls is called dispatch.
Here is an example:
#pragma once
#include <avnd/common/member_reflection.hpp>
#include <fmt/format.h>
#include <fmt/printf.h>
#include <gpp/commands.hpp>
#include <gpp/meta.hpp>
#include <gpp/ports.hpp>
#include <halp/controls.hpp>
#include <halp/static_string.hpp>
#include <vector>
namespace examples
{
struct GpuComputeExample
{
// halp_meta is a short hand for defining a static function:
// #define halp_meta(name, val) static constexpr auto name() return { val; }
halp_meta(name, "Average color");
halp_meta(uuid, "03bce361-a2ca-4959-95b4-6aac3b6c07b5");
halp_meta(category, "Visuals/Analysis")
halp_meta(c_name, "average_color")
halp_meta(author, "Jean-Michaël Celerier")
halp_meta(
manual_url,
"https://ossia.io/score-docs/processes/"
"computer-vision-utilities.html#average-color")
halp_meta(description, "Extract the average color of an input video feed")
static constexpr int downscale = 16;
// Define the layout of our pipeline in C++ simply through the structure of a struct
struct layout
{
halp_meta(local_size_x, 16)
halp_meta(local_size_y, 16)
halp_meta(local_size_z, 1)
halp_flags(compute);
struct bindings
{
// Each binding is a struct member
struct
{
halp_meta(name, "my_buf");
halp_meta(binding, 0);
halp_flags(std140, buffer, load, store);
using color = float[4];
gpp::uniform<"result", color*> values;
} my_buf;
// Define the members of our ubos
struct custom_ubo
{
halp_meta(name, "custom");
halp_meta(binding, 1);
halp_flags(std140, ubo);
gpp::uniform<"width", int> width;
gpp::uniform<"height", int> height;
} ubo;
struct
{
halp_meta(name, "img")
halp_meta(format, "rgba32f")
halp_meta(binding, 2);
halp_flags(image2D, readonly);
} image;
} bindings;
};
using bindings = decltype(layout::bindings);
using uniforms = decltype(bindings::ubo);
// Definition of our ports which will get parsed by the
// software that instantiate this class
struct
{
// Here we use some helper types in the usual fashion
gpp::image_input_port<"Image", &bindings::image> tex;
gpp::uniform_control_port<
halp::hslider_i32<"Width", halp::range{1, 1000, 100}>, &uniforms::width>
width;
gpp::uniform_control_port<
halp::hslider_i32<"Height", halp::range{1, 1000, 100}>, &uniforms::height>
height;
} inputs;
// The output port on which we write the average color
struct
{
struct
{
halp_meta(name, "color")
float value[4];
} color_out;
} outputs;
std::string_view compute()
{
return R"_(
void main()
{
// Note: the algorithm is most likely wrong as I know FUCK ALL
// about compute shaders ; fixes welcome ;p
ivec2 call = ivec2(gl_GlobalInvocationID.xy);
vec4 color = vec4(0.0, 0.0,0,0);
for(int i = 0; i < gl_WorkGroupSize.x; i++)
{
for(int j = 0; j < gl_WorkGroupSize.y; j++)
{
uint x = call.x * gl_WorkGroupSize.x + i;
uint y = call.y * gl_WorkGroupSize.y + j;
if (x < width && y < height)
{
color += imageLoad(img, ivec2(x,y));
}
}
}
if(gl_LocalInvocationIndex < ((width * height) / gl_WorkGroupSize.x * gl_WorkGroupSize.y))
{
result[gl_GlobalInvocationID.y * gl_WorkGroupSize.x + gl_GlobalInvocationID.x] = color;
}
}
)_";
}
// Allocate and update buffers
gpp::co_update update()
{
// Deallocate if the size changed
const int w = this->inputs.width / downscale;
const int h = this->inputs.height / downscale;
if(last_w != w || last_h != h)
{
if(this->buf)
{
co_yield gpp::buffer_release{.handle = buf};
buf = nullptr;
}
last_w = w;
last_h = h;
}
if(w > 0 && h > 0)
{
// No buffer: reallocate
const int bytes = w * h * sizeof(float) * 4;
if(!this->buf)
{
this->buf = co_yield gpp::static_allocation{
.binding = lay.bindings.my_buf.binding(), .size = bytes};
}
}
}
// Relaease allocated data
gpp::co_release release()
{
if(buf)
{
co_yield gpp::buffer_release{.handle = buf};
buf = nullptr;
}
}
// Do the GPU dispatch call
gpp::co_dispatch dispatch()
{
if(!buf)
co_return;
const int w = this->inputs.width / downscale;
const int h = this->inputs.height / downscale;
const int downscaled_pixels_count = w * h;
const int bytes = downscaled_pixels_count * sizeof(float) * 4;
// Run a pass
co_yield gpp::begin_compute_pass{};
co_yield gpp::compute_dispatch{.x = 1, .y = 1, .z = 1};
// Request an asynchronous readback
gpp::buffer_awaiter readback
= co_yield gpp::readback_buffer{.handle = buf, .offset = 0, .size = bytes};
co_yield gpp::end_compute_pass{};
// The readback can be fetched once the compute pass is done
// (this needs to be improved in terms of asyncness)
auto [data, size] = co_yield readback;
using color = float[4];
auto flt = reinterpret_cast<const color*>(data);
// finish summing on the cpu
auto& final = outputs.color_out.value;
final[0] = 0.f;
final[1] = 0.f;
final[2] = 0.f;
final[3] = 0.f;
for(int i = 0; i < downscaled_pixels_count; i++)
{
for(int j = 0; j < 4; j++)
{
final[j] += flt[i][j];
}
}
final[0] /= downscaled_pixels_count;
final[1] /= downscaled_pixels_count;
final[2] /= downscaled_pixels_count;
final[3] /= downscaled_pixels_count;
}
private:
static constexpr auto lay = layout{};
int last_w{}, last_h{};
gpp::buffer_handle buf{};
std::vector<float> zeros{};
};
}