gl_shader_cache: Specialize local memory size for compute shaders
Local memory size in compute shaders was stubbed with an arbitary size. This commit specializes local memory size from guest GPU parameters.merge-requests/60/head
parent
dbeb523879
commit
287ae2b9e8
|
|
@ -178,7 +178,12 @@ public:
|
|||
BitField<24, 5, u32> gpr_alloc;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x11);
|
||||
union {
|
||||
BitField<0, 20, u32> local_crs_alloc;
|
||||
BitField<24, 5, u32> sass_version;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x10);
|
||||
} launch_description{};
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
|
||||
launch_desc.block_dim_z, launch_desc.shared_alloc);
|
||||
launch_desc.block_dim_z, launch_desc.shared_alloc,
|
||||
launch_desc.local_pos_alloc);
|
||||
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
|
||||
state.draw.program_pipeline = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
|
|||
source += fmt::format("shared uint smem[{}];",
|
||||
Common::AlignUp(variant.shared_memory_size, 4) / 4);
|
||||
}
|
||||
|
||||
if (variant.local_memory_size > 0) {
|
||||
source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
|
||||
Common::AlignUp(variant.local_memory_size, 4) / 4);
|
||||
}
|
||||
}
|
||||
|
||||
source += '\n';
|
||||
|
|
|
|||
|
|
@ -510,10 +510,14 @@ private:
|
|||
}
|
||||
|
||||
void DeclareLocalMemory() {
|
||||
// TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
|
||||
// specialization time.
|
||||
const u64 local_memory_size =
|
||||
stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
|
||||
if (stage == ProgramType::Compute) {
|
||||
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
|
||||
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
|
||||
code.AddLine("#endif");
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 local_memory_size = header.GetLocalMemorySize();
|
||||
if (local_memory_size == 0) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -851,9 +855,6 @@ private:
|
|||
}
|
||||
|
||||
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
||||
if (stage == ProgramType::Compute) {
|
||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
||||
}
|
||||
return {
|
||||
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
|
||||
Type::Uint};
|
||||
|
|
@ -1228,9 +1229,6 @@ private:
|
|||
}
|
||||
target = std::move(*output);
|
||||
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
|
||||
if (stage == ProgramType::Compute) {
|
||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
||||
}
|
||||
target = {
|
||||
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
|
||||
Type::Uint};
|
||||
|
|
|
|||
|
|
@ -52,11 +52,11 @@ struct BindlessSamplerKey {
|
|||
Tegra::Engines::SamplerDescriptor sampler{};
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 8;
|
||||
constexpr u32 NativeVersion = 9;
|
||||
|
||||
// Making sure sizes doesn't change by accident
|
||||
static_assert(sizeof(BaseBindings) == 16);
|
||||
static_assert(sizeof(ProgramVariant) == 32);
|
||||
static_assert(sizeof(ProgramVariant) == 36);
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
|
|
|
|||
|
|
@ -64,10 +64,10 @@ struct ProgramVariant final {
|
|||
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
|
||||
|
||||
/// Compute constructor.
|
||||
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
|
||||
u32 shared_memory_size) noexcept
|
||||
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
|
||||
u32 local_memory_size) noexcept
|
||||
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
|
||||
shared_memory_size{shared_memory_size} {}
|
||||
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
|
||||
|
||||
// Graphics specific parameters.
|
||||
BaseBindings base_bindings{};
|
||||
|
|
@ -78,12 +78,13 @@ struct ProgramVariant final {
|
|||
u16 block_y{};
|
||||
u16 block_z{};
|
||||
u32 shared_memory_size{};
|
||||
u32 local_memory_size{};
|
||||
|
||||
bool operator==(const ProgramVariant& rhs) const noexcept {
|
||||
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
|
||||
shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
|
||||
rhs.block_x, rhs.block_y, rhs.block_z,
|
||||
rhs.shared_memory_size);
|
||||
shared_memory_size, local_memory_size) ==
|
||||
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
|
||||
rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
|
||||
}
|
||||
|
||||
bool operator!=(const ProgramVariant& rhs) const noexcept {
|
||||
|
|
@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> {
|
|||
static_cast<std::size_t>(variant.block_x) ^
|
||||
(static_cast<std::size_t>(variant.block_y) << 32) ^
|
||||
(static_cast<std::size_t>(variant.block_z) << 48) ^
|
||||
(static_cast<std::size_t>(variant.shared_memory_size) << 16);
|
||||
(static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
|
||||
(static_cast<std::size_t>(variant.local_memory_size) << 36);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue