query_cache: Abstract OpenGL implementation
Abstract the current OpenGL implementation into the VideoCommon namespace and reimplement it on top of that. Doing this avoids repeating code and logic in the Vulkan implementation.pull/3409/head
parent
73d2d3342d
commit
c31382ced5
|
|
@ -37,6 +37,7 @@ add_library(video_core STATIC
|
|||
memory_manager.h
|
||||
morton.cpp
|
||||
morton.h
|
||||
query_cache.h
|
||||
rasterizer_accelerated.cpp
|
||||
rasterizer_accelerated.h
|
||||
rasterizer_cache.cpp
|
||||
|
|
|
|||
|
|
@ -0,0 +1,323 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <class QueryCache, class HostCounter>
|
||||
class CounterStreamBase {
|
||||
public:
|
||||
explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
|
||||
: cache{cache}, type{type} {}
|
||||
|
||||
/// Updates the state of the stream, enabling or disabling as needed.
|
||||
void Update(bool enabled) {
|
||||
if (enabled) {
|
||||
Enable();
|
||||
} else {
|
||||
Disable();
|
||||
}
|
||||
}
|
||||
|
||||
/// Resets the stream to zero. It doesn't disable the query after resetting.
|
||||
void Reset() {
|
||||
if (current) {
|
||||
current->EndQuery();
|
||||
|
||||
// Immediately start a new query to avoid disabling its state.
|
||||
current = cache.Counter(nullptr, type);
|
||||
}
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
/// Returns the current counter slicing as needed.
|
||||
std::shared_ptr<HostCounter> Current() {
|
||||
if (!current) {
|
||||
return nullptr;
|
||||
}
|
||||
current->EndQuery();
|
||||
last = std::move(current);
|
||||
current = cache.Counter(last, type);
|
||||
return last;
|
||||
}
|
||||
|
||||
/// Returns true when the counter stream is enabled.
|
||||
bool IsEnabled() const {
|
||||
return static_cast<bool>(current);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Enables the stream.
|
||||
void Enable() {
|
||||
if (current) {
|
||||
return;
|
||||
}
|
||||
current = cache.Counter(last, type);
|
||||
}
|
||||
|
||||
// Disables the stream.
|
||||
void Disable() {
|
||||
if (current) {
|
||||
current->EndQuery();
|
||||
}
|
||||
last = std::exchange(current, nullptr);
|
||||
}
|
||||
|
||||
QueryCache& cache;
|
||||
const VideoCore::QueryType type;
|
||||
|
||||
std::shared_ptr<HostCounter> current;
|
||||
std::shared_ptr<HostCounter> last;
|
||||
};
|
||||
|
||||
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
|
||||
class QueryCacheBase {
|
||||
public:
|
||||
explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||
: system{system}, rasterizer{rasterizer}, streams{{CounterStream{
|
||||
static_cast<QueryCache&>(*this),
|
||||
VideoCore::QueryType::SamplesPassed}}} {}
|
||||
|
||||
void InvalidateRegion(CacheAddr addr, std::size_t size) {
|
||||
FlushAndRemoveRegion(addr, size);
|
||||
}
|
||||
|
||||
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
FlushAndRemoveRegion(addr, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a query in GPU mapped memory, potentially marked with a timestamp.
|
||||
* @param gpu_addr GPU address to flush to when the mapped memory is read.
|
||||
* @param type Query type, e.g. SamplesPassed.
|
||||
* @param timestamp Timestamp, when empty the flushed query is assumed to be short.
|
||||
*/
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
|
||||
CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
|
||||
if (!query) {
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||
|
||||
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
|
||||
}
|
||||
|
||||
query->BindCounter(Stream(type).Current(), timestamp);
|
||||
}
|
||||
|
||||
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
||||
void UpdateCounters() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
|
||||
}
|
||||
|
||||
/// Resets a counter to zero. It doesn't disable the query after resetting.
|
||||
void ResetCounter(VideoCore::QueryType type) {
|
||||
Stream(type).Reset();
|
||||
}
|
||||
|
||||
/// Returns a new host counter.
|
||||
std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type) {
|
||||
return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
|
||||
type);
|
||||
}
|
||||
|
||||
/// Returns the counter stream of the specified type.
|
||||
CounterStream& Stream(VideoCore::QueryType type) {
|
||||
return streams[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Flushes a memory range to guest memory and removes it from the cache.
|
||||
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
|
||||
const u64 addr_begin = static_cast<u64>(addr);
|
||||
const u64 addr_end = addr_begin + static_cast<u64>(size);
|
||||
const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
|
||||
const u64 cache_begin = query.CacheAddr();
|
||||
const u64 cache_end = cache_begin + query.SizeInBytes();
|
||||
return cache_begin < addr_end && addr_begin < cache_end;
|
||||
};
|
||||
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
const auto& it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
continue;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
for (auto& query : contents) {
|
||||
if (!in_range(query)) {
|
||||
continue;
|
||||
}
|
||||
rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
|
||||
query.Flush();
|
||||
}
|
||||
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
|
||||
std::end(contents));
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
|
||||
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
|
||||
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
|
||||
const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
|
||||
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
|
||||
host_ptr);
|
||||
}
|
||||
|
||||
/// Tries to a get a cached query. Returns nullptr on failure.
|
||||
CachedQuery* TryGet(CacheAddr addr) {
|
||||
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
|
||||
const auto it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
const auto found = std::find_if(std::begin(contents), std::end(contents),
|
||||
[addr](auto& query) { return query.CacheAddr() == addr; });
|
||||
return found != std::end(contents) ? &*found : nullptr;
|
||||
}
|
||||
|
||||
static constexpr std::uintptr_t PAGE_SIZE = 4096;
|
||||
static constexpr int PAGE_SHIFT = 12;
|
||||
|
||||
Core::System& system;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
|
||||
|
||||
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
||||
};
|
||||
|
||||
template <class QueryCache, class HostCounter>
|
||||
class HostCounterBase {
|
||||
public:
|
||||
explicit HostCounterBase(std::shared_ptr<HostCounter> dependency)
|
||||
: dependency{std::move(dependency)} {}
|
||||
|
||||
/// Returns the current value of the query.
|
||||
u64 Query() {
|
||||
if (result) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
u64 value = BlockingQuery();
|
||||
if (dependency) {
|
||||
value += dependency->Query();
|
||||
}
|
||||
|
||||
return *(result = value);
|
||||
}
|
||||
|
||||
/// Returns true when flushing this query will potentially wait.
|
||||
bool WaitPending() const noexcept {
|
||||
return result.has_value();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Returns the value of query from the backend API blocking as needed.
|
||||
virtual u64 BlockingQuery() const = 0;
|
||||
|
||||
private:
|
||||
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
|
||||
std::optional<u64> result; ///< Filled with the already returned value.
|
||||
};
|
||||
|
||||
template <class HostCounter>
|
||||
class CachedQueryBase {
|
||||
public:
|
||||
explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
|
||||
: cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
|
||||
|
||||
CachedQueryBase(CachedQueryBase&& rhs) noexcept
|
||||
: cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)},
|
||||
timestamp{rhs.timestamp} {}
|
||||
|
||||
CachedQueryBase(const CachedQueryBase&) = delete;
|
||||
|
||||
CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept {
|
||||
cpu_addr = rhs.cpu_addr;
|
||||
host_ptr = rhs.host_ptr;
|
||||
counter = std::move(rhs.counter);
|
||||
timestamp = rhs.timestamp;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Flushes the query to guest memory.
|
||||
virtual void Flush() {
|
||||
// When counter is nullptr it means that it's just been reseted. We are supposed to write a
|
||||
// zero in these cases.
|
||||
const u64 value = counter ? counter->Query() : 0;
|
||||
std::memcpy(host_ptr, &value, sizeof(u64));
|
||||
|
||||
if (timestamp) {
|
||||
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
|
||||
}
|
||||
}
|
||||
|
||||
/// Binds a counter to this query.
|
||||
void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
|
||||
if (counter) {
|
||||
// If there's an old counter set it means the query is being rewritten by the game.
|
||||
// To avoid losing the data forever, flush here.
|
||||
Flush();
|
||||
}
|
||||
counter = std::move(counter_);
|
||||
timestamp = timestamp_;
|
||||
}
|
||||
|
||||
VAddr CpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
CacheAddr CacheAddr() const noexcept {
|
||||
return ToCacheAddr(host_ptr);
|
||||
}
|
||||
|
||||
u64 SizeInBytes() const noexcept {
|
||||
return SizeInBytes(timestamp.has_value());
|
||||
}
|
||||
|
||||
static u64 SizeInBytes(bool with_timestamp) {
|
||||
return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Returns true when querying the counter may potentially block.
|
||||
bool WaitPending() const noexcept {
|
||||
return counter && counter->WaitPending();
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
|
||||
static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
|
||||
static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
|
||||
|
||||
VAddr cpu_addr; ///< Guest CPU address.
|
||||
u8* host_ptr; ///< Writable host pointer.
|
||||
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
|
||||
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
@ -20,211 +20,49 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
using VideoCore::QueryType;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::uintptr_t PAGE_SIZE = 4096;
|
||||
constexpr int PAGE_SHIFT = 12;
|
||||
|
||||
constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp
|
||||
constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
|
||||
constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
|
||||
|
||||
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
|
||||
|
||||
constexpr GLenum GetTarget(QueryType type) {
|
||||
constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||
return QueryTargets[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CounterStream::CounterStream(QueryCache& cache, QueryType type)
|
||||
: cache{cache}, type{type}, target{GetTarget(type)} {}
|
||||
|
||||
CounterStream::~CounterStream() = default;
|
||||
|
||||
void CounterStream::Update(bool enabled, bool any_command_queued) {
|
||||
if (enabled) {
|
||||
Enable();
|
||||
} else {
|
||||
Disable(any_command_queued);
|
||||
}
|
||||
}
|
||||
|
||||
void CounterStream::Reset(bool any_command_queued) {
|
||||
if (current) {
|
||||
EndQuery(any_command_queued);
|
||||
|
||||
// Immediately start a new query to avoid disabling its state.
|
||||
current = cache.GetHostCounter(nullptr, type);
|
||||
}
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
|
||||
if (!current) {
|
||||
return nullptr;
|
||||
}
|
||||
EndQuery(any_command_queued);
|
||||
last = std::move(current);
|
||||
current = cache.GetHostCounter(last, type);
|
||||
return last;
|
||||
}
|
||||
|
||||
void CounterStream::Enable() {
|
||||
if (current) {
|
||||
return;
|
||||
}
|
||||
current = cache.GetHostCounter(last, type);
|
||||
}
|
||||
|
||||
void CounterStream::Disable(bool any_command_queued) {
|
||||
if (current) {
|
||||
EndQuery(any_command_queued);
|
||||
}
|
||||
last = std::exchange(current, nullptr);
|
||||
}
|
||||
|
||||
void CounterStream::EndQuery(bool any_command_queued) {
|
||||
if (!any_command_queued) {
|
||||
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
|
||||
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
|
||||
// for this. Insert to the OpenGL command stream a flush.
|
||||
glFlush();
|
||||
}
|
||||
glEndQuery(target);
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
|
||||
: system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
|
||||
QueryType::SamplesPassed}}} {}
|
||||
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
|
||||
: VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
|
||||
HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
|
||||
gl_rasterizer)},
|
||||
gl_rasterizer{gl_rasterizer} {}
|
||||
|
||||
QueryCache::~QueryCache() = default;
|
||||
|
||||
void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) {
|
||||
const u64 addr_begin = static_cast<u64>(addr);
|
||||
const u64 addr_end = addr_begin + static_cast<u64>(size);
|
||||
const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
|
||||
const u64 cache_begin = query.GetCacheAddr();
|
||||
const u64 cache_end = cache_begin + query.GetSizeInBytes();
|
||||
return cache_begin < addr_end && addr_begin < cache_end;
|
||||
};
|
||||
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
const auto& it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
continue;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
for (auto& query : contents) {
|
||||
if (!in_range(query)) {
|
||||
continue;
|
||||
}
|
||||
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
|
||||
Flush(query);
|
||||
}
|
||||
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
|
||||
std::end(contents));
|
||||
}
|
||||
}
|
||||
|
||||
void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
// We can handle flushes in the same way as invalidations.
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
|
||||
CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
|
||||
if (!query) {
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||
|
||||
query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
|
||||
}
|
||||
|
||||
query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
|
||||
}
|
||||
|
||||
void QueryCache::UpdateCounters() {
|
||||
auto& samples_passed = GetStream(QueryType::SamplesPassed);
|
||||
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
|
||||
}
|
||||
|
||||
void QueryCache::ResetCounter(QueryType type) {
|
||||
GetStream(type).Reset(rasterizer.AnyCommandQueued());
|
||||
}
|
||||
|
||||
void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
|
||||
reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||
}
|
||||
|
||||
std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
|
||||
QueryType type) {
|
||||
auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
|
||||
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||
auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
|
||||
OGLQuery query;
|
||||
if (reserve.empty()) {
|
||||
query.Create(GetTarget(type));
|
||||
} else {
|
||||
return query;
|
||||
}
|
||||
|
||||
query = std::move(reserve.back());
|
||||
reserve.pop_back();
|
||||
}
|
||||
|
||||
return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query));
|
||||
return query;
|
||||
}
|
||||
|
||||
CachedQuery& QueryCache::Register(CachedQuery&& cached_query) {
|
||||
const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT;
|
||||
auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
|
||||
rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
|
||||
return stored_ref;
|
||||
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
|
||||
queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||
}
|
||||
|
||||
CachedQuery* QueryCache::TryGet(CacheAddr addr) {
|
||||
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
|
||||
const auto it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
const auto found =
|
||||
std::find_if(std::begin(contents), std::end(contents),
|
||||
[addr](const auto& query) { return query.GetCacheAddr() == addr; });
|
||||
return found != std::end(contents) ? &*found : nullptr;
|
||||
bool QueryCache::AnyCommandQueued() const noexcept {
|
||||
return gl_rasterizer.AnyCommandQueued();
|
||||
}
|
||||
|
||||
void QueryCache::Flush(CachedQuery& cached_query) {
|
||||
auto& stream = GetStream(cached_query.GetType());
|
||||
|
||||
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
|
||||
// To avoid this disable and re-enable keeping the dependency stream.
|
||||
// But we only have to do this if we have pending waits to be done.
|
||||
const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
|
||||
const bool any_command_queued = rasterizer.AnyCommandQueued();
|
||||
if (slice_counter) {
|
||||
stream.Update(false, any_command_queued);
|
||||
}
|
||||
|
||||
cached_query.Flush();
|
||||
|
||||
if (slice_counter) {
|
||||
stream.Update(true, any_command_queued);
|
||||
}
|
||||
}
|
||||
|
||||
CounterStream& QueryCache::GetStream(QueryType type) {
|
||||
return streams[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
|
||||
OGLQuery&& query_)
|
||||
: cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
|
||||
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type)
|
||||
: VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
|
||||
type{type}, query{cache.AllocateQuery(type)} {
|
||||
glBeginQuery(GetTarget(type), query.handle);
|
||||
}
|
||||
|
||||
|
|
@ -232,81 +70,50 @@ HostCounter::~HostCounter() {
|
|||
cache.Reserve(type, std::move(query));
|
||||
}
|
||||
|
||||
u64 HostCounter::Query() {
|
||||
if (result) {
|
||||
return *result;
|
||||
void HostCounter::EndQuery() {
|
||||
if (!cache.AnyCommandQueued()) {
|
||||
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
|
||||
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
|
||||
// for this. Insert to the OpenGL command stream a flush.
|
||||
glFlush();
|
||||
}
|
||||
|
||||
u64 value;
|
||||
glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
|
||||
if (dependency) {
|
||||
value += dependency->Query();
|
||||
}
|
||||
|
||||
return *(result = value);
|
||||
glEndQuery(GetTarget(type));
|
||||
}
|
||||
|
||||
bool HostCounter::WaitPending() const noexcept {
|
||||
return result.has_value();
|
||||
u64 HostCounter::BlockingQuery() const {
|
||||
GLint64 value;
|
||||
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
|
||||
return static_cast<u64>(value);
|
||||
}
|
||||
|
||||
CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
|
||||
: type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
|
||||
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
|
||||
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
|
||||
|
||||
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
|
||||
: type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr},
|
||||
counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
|
||||
|
||||
CachedQuery::~CachedQuery() = default;
|
||||
: VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
|
||||
|
||||
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
|
||||
VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
|
||||
cache = rhs.cache;
|
||||
type = rhs.type;
|
||||
cpu_addr = rhs.cpu_addr;
|
||||
host_ptr = rhs.host_ptr;
|
||||
counter = std::move(rhs.counter);
|
||||
timestamp = rhs.timestamp;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CachedQuery::Flush() {
|
||||
// When counter is nullptr it means that it's just been reseted. We are supposed to write a zero
|
||||
// in these cases.
|
||||
const u64 value = counter ? counter->Query() : 0;
|
||||
std::memcpy(host_ptr, &value, sizeof(u64));
|
||||
|
||||
if (timestamp) {
|
||||
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
|
||||
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
|
||||
// To avoid this disable and re-enable keeping the dependency stream.
|
||||
// But we only have to do this if we have pending waits to be done.
|
||||
auto& stream = cache->Stream(type);
|
||||
const bool slice_counter = WaitPending() && stream.IsEnabled();
|
||||
if (slice_counter) {
|
||||
stream.Update(false);
|
||||
}
|
||||
}
|
||||
|
||||
void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
|
||||
if (counter) {
|
||||
// If there's an old counter set it means the query is being rewritten by the game.
|
||||
// To avoid losing the data forever, flush here.
|
||||
Flush();
|
||||
VideoCommon::CachedQueryBase<HostCounter>::Flush();
|
||||
|
||||
if (slice_counter) {
|
||||
stream.Update(true);
|
||||
}
|
||||
counter = std::move(counter_);
|
||||
timestamp = timestamp_;
|
||||
}
|
||||
|
||||
bool CachedQuery::WaitPending() const noexcept {
|
||||
return counter && counter->WaitPending();
|
||||
}
|
||||
|
||||
QueryType CachedQuery::GetType() const noexcept {
|
||||
return type;
|
||||
}
|
||||
|
||||
VAddr CachedQuery::GetCpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
CacheAddr CachedQuery::GetCacheAddr() const noexcept {
|
||||
return ToCacheAddr(host_ptr);
|
||||
}
|
||||
|
||||
u64 CachedQuery::GetSizeInBytes() const noexcept {
|
||||
return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/query_cache.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
|
|
@ -24,134 +25,57 @@ namespace OpenGL {
|
|||
|
||||
class CachedQuery;
|
||||
class HostCounter;
|
||||
class RasterizerOpenGL;
|
||||
class QueryCache;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
class CounterStream final {
|
||||
public:
|
||||
explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
|
||||
~CounterStream();
|
||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||
|
||||
void Update(bool enabled, bool any_command_queued);
|
||||
|
||||
void Reset(bool any_command_queued);
|
||||
|
||||
std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
|
||||
|
||||
bool IsEnabled() const {
|
||||
return current != nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
void Enable();
|
||||
|
||||
void Disable(bool any_command_queued);
|
||||
|
||||
void EndQuery(bool any_command_queued);
|
||||
|
||||
QueryCache& cache;
|
||||
|
||||
std::shared_ptr<HostCounter> current;
|
||||
std::shared_ptr<HostCounter> last;
|
||||
VideoCore::QueryType type;
|
||||
GLenum target;
|
||||
};
|
||||
|
||||
class QueryCache final {
|
||||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
|
||||
~QueryCache();
|
||||
|
||||
void InvalidateRegion(CacheAddr addr, std::size_t size);
|
||||
|
||||
void FlushRegion(CacheAddr addr, std::size_t size);
|
||||
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
|
||||
|
||||
void UpdateCounters();
|
||||
|
||||
void ResetCounter(VideoCore::QueryType type);
|
||||
OGLQuery AllocateQuery(VideoCore::QueryType type);
|
||||
|
||||
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
|
||||
|
||||
std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type);
|
||||
bool AnyCommandQueued() const noexcept;
|
||||
|
||||
private:
|
||||
CachedQuery& Register(CachedQuery&& cached_query);
|
||||
|
||||
CachedQuery* TryGet(CacheAddr addr);
|
||||
|
||||
void Flush(CachedQuery& cached_query);
|
||||
|
||||
CounterStream& GetStream(VideoCore::QueryType type);
|
||||
|
||||
Core::System& system;
|
||||
RasterizerOpenGL& rasterizer;
|
||||
|
||||
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
|
||||
|
||||
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
||||
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
|
||||
RasterizerOpenGL& gl_rasterizer;
|
||||
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
|
||||
};
|
||||
|
||||
class HostCounter final {
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||
public:
|
||||
explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type, OGLQuery&& query);
|
||||
VideoCore::QueryType type);
|
||||
~HostCounter();
|
||||
|
||||
/// Returns the current value of the query.
|
||||
u64 Query();
|
||||
|
||||
/// Returns true when querying this counter will potentially wait for OpenGL.
|
||||
bool WaitPending() const noexcept;
|
||||
void EndQuery();
|
||||
|
||||
private:
|
||||
u64 BlockingQuery() const override;
|
||||
|
||||
QueryCache& cache;
|
||||
VideoCore::QueryType type;
|
||||
|
||||
std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
|
||||
OGLQuery query; ///< OpenGL query.
|
||||
std::optional<u64> result; ///< Added values of the counter.
|
||||
OGLQuery query;
|
||||
};
|
||||
|
||||
class CachedQuery final {
|
||||
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||
public:
|
||||
explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
|
||||
CachedQuery(CachedQuery&&) noexcept;
|
||||
CachedQuery(const CachedQuery&) = delete;
|
||||
~CachedQuery();
|
||||
explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
|
||||
u8* host_ptr);
|
||||
CachedQuery(CachedQuery&& rhs) noexcept;
|
||||
|
||||
CachedQuery& operator=(CachedQuery&&) noexcept;
|
||||
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
|
||||
|
||||
/// Writes the counter value to host memory.
|
||||
void Flush();
|
||||
|
||||
/// Updates the counter this cached query registered in guest memory will write when requested.
|
||||
void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
|
||||
|
||||
/// Returns true when a flushing this query will potentially wait for OpenGL.
|
||||
bool WaitPending() const noexcept;
|
||||
|
||||
/// Returns the query type.
|
||||
VideoCore::QueryType GetType() const noexcept;
|
||||
|
||||
/// Returns the guest CPU address for this query.
|
||||
VAddr GetCpuAddr() const noexcept;
|
||||
|
||||
/// Returns the cache address for this query.
|
||||
CacheAddr GetCacheAddr() const noexcept;
|
||||
|
||||
/// Returns the number of cached bytes.
|
||||
u64 GetSizeInBytes() const noexcept;
|
||||
void Flush() override;
|
||||
|
||||
private:
|
||||
VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed).
|
||||
VAddr cpu_addr; ///< Guest CPU address.
|
||||
u8* host_ptr; ///< Writable host pointer.
|
||||
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
|
||||
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
|
||||
QueryCache* cache;
|
||||
VideoCore::QueryType type;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
|||
Loading…
Reference in New Issue