Compute Shader Support

This commit is contained in:
MaxNiftyNine 2026-02-22 16:41:15 -05:00
parent 2016e429c1
commit 81ffe74c69
7 changed files with 232 additions and 2 deletions

18
include/gx2/draw.h Normal file → Executable file
View File

@ -12,6 +12,21 @@
extern "C" {
#endif
typedef struct GX2DispatchParams GX2DispatchParams;
struct GX2DispatchParams
{
uint32_t numGroupsX;
uint32_t numGroupsY;
uint32_t numGroupsZ;
uint32_t _padding;
};
WUT_CHECK_OFFSET(GX2DispatchParams, 0x00, numGroupsX);
WUT_CHECK_OFFSET(GX2DispatchParams, 0x04, numGroupsY);
WUT_CHECK_OFFSET(GX2DispatchParams, 0x08, numGroupsZ);
WUT_CHECK_OFFSET(GX2DispatchParams, 0x0C, _padding);
WUT_CHECK_SIZE(GX2DispatchParams, 0x10);
void
GX2SetAttribBuffer(uint32_t index,
uint32_t size,
@ -59,6 +74,9 @@ GX2DrawIndexedImmediateEx(GX2PrimitiveMode mode,
void
GX2SetPrimitiveRestartIndex(uint32_t index);
void
GX2DispatchCompute(GX2DispatchParams *dispatchParams);
#ifdef __cplusplus
}
#endif

90
include/gx2/shaders.h Normal file → Executable file
View File

@ -18,6 +18,7 @@ extern "C" {
typedef struct GX2AttribVar GX2AttribVar;
typedef struct GX2AttribStream GX2AttribStream;
typedef struct GX2FetchShader GX2FetchShader;
typedef struct GX2ComputeShader GX2ComputeShader;
typedef struct GX2GeometryShader GX2GeometryShader;
typedef struct GX2LoopVar GX2LoopVar;
typedef struct GX2PixelShader GX2PixelShader;
@ -336,6 +337,57 @@ WUT_CHECK_OFFSET(GX2GeometryShader, 0x90, streamOutStride);
WUT_CHECK_OFFSET(GX2GeometryShader, 0xA0, gx2rBuffer);
WUT_CHECK_SIZE(GX2GeometryShader, 0xB0);
struct GX2ComputeShader
{
uint32_t regs[12];
uint32_t size;
void *program;
uint32_t uniformBlockCount;
GX2UniformBlock *uniformBlocks;
uint32_t uniformVarCount;
GX2UniformVar *uniformVars;
uint32_t initialValueCount;
GX2UniformInitialValue *initialValues;
uint32_t loopVarCount;
GX2LoopVar *loopVars;
uint32_t samplerVarCount;
GX2SamplerVar *samplerVars;
uint32_t workgroupSizeX;
uint32_t workgroupSizeY;
uint32_t workgroupSizeZ;
BOOL over64Mode;
uint32_t numWavesPerSimd;
GX2RBuffer gx2rBuffer;
};
WUT_CHECK_OFFSET(GX2ComputeShader, 0x00, regs);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x30, size);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x34, program);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x38, uniformBlockCount);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x3C, uniformBlocks);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x40, uniformVarCount);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x44, uniformVars);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x48, initialValueCount);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x4C, initialValues);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x50, loopVarCount);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x54, loopVars);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x58, samplerVarCount);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x5C, samplerVars);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x60, workgroupSizeX);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x64, workgroupSizeY);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x68, workgroupSizeZ);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x6C, over64Mode);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x70, numWavesPerSimd);
WUT_CHECK_OFFSET(GX2ComputeShader, 0x74, gx2rBuffer);
WUT_CHECK_SIZE(GX2ComputeShader, 0x84);
struct GX2AttribStream
{
uint32_t location;
@ -388,6 +440,9 @@ GX2SetPixelShader(const GX2PixelShader *shader);
void
GX2SetGeometryShader(const GX2GeometryShader *shader);
void
GX2SetComputeShader(const GX2ComputeShader *shader);
void
GX2SetVertexSampler(const GX2Sampler *sampler,
uint32_t id);
@ -400,6 +455,10 @@ void
GX2SetGeometrySampler(const GX2Sampler *sampler,
uint32_t id);
void
GX2SetComputeSampler(const GX2Sampler *sampler,
uint32_t id);
void
GX2SetVertexUniformReg(uint32_t offset,
uint32_t count,
@ -425,6 +484,11 @@ GX2SetGeometryUniformBlock(uint32_t location,
uint32_t size,
const void *data);
void
GX2SetComputeUniformBlock(uint32_t location,
uint32_t size,
const void *data);
void
GX2SetShaderModeEx(GX2ShaderMode mode,
uint32_t numVsGpr,
@ -502,6 +566,19 @@ GX2GetVertexUniformBlock(const GX2VertexShader *shader,
return NULL;
}
static inline GX2UniformBlock *
GX2GetComputeUniformBlock(const GX2ComputeShader *shader,
const char *name)
{
for (uint32_t i = 0; i < shader->uniformBlockCount; ++i) {
if (strcmp(name, shader->uniformBlocks[i].name) == 0) {
return &shader->uniformBlocks[i];
}
}
return NULL;
}
static inline GX2UniformVar *
GX2GetGeometryUniformVar(const GX2GeometryShader *shader,
const char *name)
@ -541,6 +618,19 @@ GX2GetVertexUniformVar(const GX2VertexShader *shader,
return NULL;
}
static inline GX2UniformVar *
GX2GetComputeUniformVar(const GX2ComputeShader *shader,
const char *name)
{
for (uint32_t i = 0; i < shader->uniformVarCount; ++i) {
if (strcmp(name, shader->uniformVars[i].name) == 0) {
return &shader->uniformVars[i];
}
}
return NULL;
}
static inline void
GX2SetShaderMode(GX2ShaderMode mode)
{

4
include/gx2/texture.h Normal file → Executable file
View File

@ -49,6 +49,10 @@ void
GX2SetGeometryTexture(const GX2Texture *texture,
uint32_t unit);
void
GX2SetComputeTexture(const GX2Texture *texture,
uint32_t unit);
#ifdef __cplusplus
}
#endif

17
libraries/libgfd/include/gfd.h Normal file → Executable file
View File

@ -111,6 +111,23 @@ WUT_CHECK_SIZE(GFDRelocationHeader, 0x28);
char *
GFDGetLastErrorString();
uint32_t
GFDGetComputeShaderCount(const void *file);
uint32_t
GFDGetComputeShaderHeaderSize(uint32_t index,
const void *file);
uint32_t
GFDGetComputeShaderProgramSize(uint32_t index,
const void *file);
BOOL
GFDGetComputeShader(GX2ComputeShader *shader,
void *program,
uint32_t index,
const void *file);
uint32_t
GFDGetGeometryShaderCount(const void *file);

6
libraries/libgfd/src/gfd.c Normal file → Executable file
View File

@ -559,13 +559,16 @@ GFDGetComputeShaderProgramSize(uint32_t index,
file);
}
/*
BOOL
GFDGetComputeShader(GX2ComputeShader *shader,
void *program,
uint32_t index,
const void *file)
{
if (!_GFDCheckShaderAlign(program)) {
return FALSE;
}
return _GFDGetGenericBlock(GFD_BLOCK_COMPUTE_SHADER_HEADER,
shader,
GFD_BLOCK_COMPUTE_SHADER_PROGRAM,
@ -577,7 +580,6 @@ GFDGetComputeShader(GX2ComputeShader *shader,
index,
file);
}
*/
uint32_t
GFDGetGeometryShaderCount(const void *file)

7
libraries/libwhb/include/whb/gfx.h Normal file → Executable file
View File

@ -67,6 +67,13 @@ WHBGfxLoadGFDVertexShader(uint32_t index,
BOOL
WHBGfxFreeVertexShader(GX2VertexShader *shader);
GX2ComputeShader *
WHBGfxLoadGFDComputeShader(uint32_t index,
const void *file);
BOOL
WHBGfxFreeComputeShader(GX2ComputeShader *shader);
BOOL
WHBGfxLoadGFDShaderGroup(WHBGfxShaderGroup *group,
uint32_t index,

92
libraries/libwhb/src/gfx_shader.c Normal file → Executable file
View File

@ -192,6 +192,98 @@ WHBGfxFreeVertexShader(GX2VertexShader *shader)
return TRUE;
}
GX2ComputeShader *
WHBGfxLoadGFDComputeShader(uint32_t index,
const void *file)
{
uint32_t headerSize, programSize;
GX2ComputeShader *shader = NULL;
void *program = NULL;
if (index >= GFDGetComputeShaderCount(file)) {
WHBLogPrintf("%s: index %u >= %u GFDGetComputeShaderCount(file)",
__FUNCTION__,
index,
GFDGetComputeShaderCount(file));
goto error;
}
headerSize = GFDGetComputeShaderHeaderSize(index, file);
if (!headerSize) {
WHBLogPrintf("%s: headerSize == 0", __FUNCTION__);
goto error;
}
programSize = GFDGetComputeShaderProgramSize(index, file);
if (!programSize) {
WHBLogPrintf("%s: programSize == 0", __FUNCTION__);
goto error;
}
shader = (GX2ComputeShader *)GfxHeapAllocMEM2(headerSize, 64);
if (!shader) {
WHBLogPrintf("%s: GfxHeapAllocMEM2(%u, 64) failed", __FUNCTION__,
headerSize);
goto error;
}
shader->gx2rBuffer.flags = GX2R_RESOURCE_BIND_SHADER_PROGRAM |
GX2R_RESOURCE_USAGE_CPU_READ |
GX2R_RESOURCE_USAGE_CPU_WRITE |
GX2R_RESOURCE_USAGE_GPU_READ;
shader->gx2rBuffer.elemSize = programSize;
shader->gx2rBuffer.elemCount = 1;
shader->gx2rBuffer.buffer = NULL;
if (!GX2RCreateBuffer(&shader->gx2rBuffer)) {
WHBLogPrintf("%s: GX2RCreateBuffer failed with programSize = %u",
__FUNCTION__, programSize);
goto error;
}
program = GX2RLockBufferEx(&shader->gx2rBuffer, 0);
if (!program) {
WHBLogPrintf("%s: GX2RLockBufferEx failed", __FUNCTION__);
goto error;
}
if (!GFDGetComputeShader(shader, program, index, file)) {
WHBLogPrintf("%s: GFDGetComputeShader failed", __FUNCTION__);
GX2RUnlockBufferEx(&shader->gx2rBuffer,
GX2R_RESOURCE_DISABLE_CPU_INVALIDATE |
GX2R_RESOURCE_DISABLE_GPU_INVALIDATE);
goto error;
}
GX2RUnlockBufferEx(&shader->gx2rBuffer, 0);
// For some reason we still need to manually invalidate the buffers,
// even though GX2RUnlockBuffer SHOULD be doing that for us
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->program, shader->size);
return shader;
error:
if (shader) {
if (shader->gx2rBuffer.buffer) {
GX2RDestroyBufferEx(&shader->gx2rBuffer, 0);
}
GfxHeapFreeMEM2(shader);
}
return NULL;
}
BOOL
WHBGfxFreeComputeShader(GX2ComputeShader *shader)
{
if (shader->gx2rBuffer.buffer) {
GX2RDestroyBufferEx(&shader->gx2rBuffer, 0);
}
GfxHeapFreeMEM2(shader);
return TRUE;
}
BOOL
WHBGfxLoadGFDShaderGroup(WHBGfxShaderGroup *group,
uint32_t index,