Pica: Cleanup clipping code and change screenspace z to range from -1..0.
The change in depth range seems to reflect better to what applications are expecting, and makes for cleaner code overall (hence is more likely to reflect hardware behavior).pull/1/head
parent
70a764d992
commit
365236fa4c
|
|
@ -15,30 +15,18 @@ namespace Clipper {
|
||||||
|
|
||||||
struct ClippingEdge {
|
struct ClippingEdge {
|
||||||
public:
|
public:
|
||||||
enum Type {
|
ClippingEdge(Math::Vec4<float24> coeffs,
|
||||||
POS_X = 0,
|
Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0),
|
||||||
NEG_X = 1,
|
float24::FromFloat32(0),
|
||||||
POS_Y = 2,
|
float24::FromFloat32(0),
|
||||||
NEG_Y = 3,
|
float24::FromFloat32(0)))
|
||||||
POS_Z = 4,
|
: coeffs(coeffs),
|
||||||
NEG_Z = 5,
|
bias(bias)
|
||||||
};
|
{
|
||||||
|
}
|
||||||
ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
|
|
||||||
|
|
||||||
bool IsInside(const OutputVertex& vertex) const {
|
bool IsInside(const OutputVertex& vertex) const {
|
||||||
switch (type) {
|
return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
|
||||||
case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
|
|
||||||
case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
|
|
||||||
case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
|
|
||||||
case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
|
|
||||||
|
|
||||||
// TODO: Check z compares ... should be 0..1 instead?
|
|
||||||
case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
|
|
||||||
|
|
||||||
default:
|
|
||||||
case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsOutSide(const OutputVertex& vertex) const {
|
bool IsOutSide(const OutputVertex& vertex) const {
|
||||||
|
|
@ -46,31 +34,17 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
|
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
|
||||||
auto dotpr = [this](const OutputVertex& vtx) {
|
float24 dp = Math::Dot(v0.pos + bias, coeffs);
|
||||||
switch (type) {
|
float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
|
||||||
case POS_X: return vtx.pos.x - vtx.pos.w;
|
|
||||||
case NEG_X: return -vtx.pos.x - vtx.pos.w;
|
|
||||||
case POS_Y: return vtx.pos.y - vtx.pos.w;
|
|
||||||
case NEG_Y: return -vtx.pos.y - vtx.pos.w;
|
|
||||||
|
|
||||||
// TODO: Verify z clipping
|
|
||||||
case POS_Z: return vtx.pos.z - vtx.pos.w;
|
|
||||||
|
|
||||||
default:
|
|
||||||
case NEG_Z: return -vtx.pos.w;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
float24 dp = dotpr(v0);
|
|
||||||
float24 dp_prev = dotpr(v1);
|
|
||||||
float24 factor = dp_prev / (dp_prev - dp);
|
float24 factor = dp_prev / (dp_prev - dp);
|
||||||
|
|
||||||
return OutputVertex::Lerp(factor, v0, v1);
|
return OutputVertex::Lerp(factor, v0, v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Type type;
|
|
||||||
float24 pos;
|
float24 pos;
|
||||||
|
Math::Vec4<float24> coeffs;
|
||||||
|
Math::Vec4<float24> bias;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void InitScreenCoordinates(OutputVertex& vtx)
|
static void InitScreenCoordinates(OutputVertex& vtx)
|
||||||
|
|
@ -98,10 +72,9 @@ static void InitScreenCoordinates(OutputVertex& vtx)
|
||||||
vtx.tc2 *= inv_w;
|
vtx.tc2 *= inv_w;
|
||||||
vtx.pos.w = inv_w;
|
vtx.pos.w = inv_w;
|
||||||
|
|
||||||
// TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
|
|
||||||
vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
|
vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
|
||||||
vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
|
vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
|
||||||
vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale;
|
vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
|
void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
|
||||||
|
|
@ -117,14 +90,29 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
|
||||||
auto* output_list = &buffer_a;
|
auto* output_list = &buffer_a;
|
||||||
auto* input_list = &buffer_b;
|
auto* input_list = &buffer_b;
|
||||||
|
|
||||||
|
// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
|
||||||
|
// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
|
||||||
|
// epsilon possible within float24 accuracy.
|
||||||
|
static const float24 EPSILON = float24::FromFloat32(0.00001);
|
||||||
|
static const float24 f0 = float24::FromFloat32(0.0);
|
||||||
|
static const float24 f1 = float24::FromFloat32(1.0);
|
||||||
|
static const std::array<ClippingEdge, 7> clipping_edges = {{
|
||||||
|
{ Math::MakeVec( f1, f0, f0, -f1) }, // x = +w
|
||||||
|
{ Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w
|
||||||
|
{ Math::MakeVec( f0, f1, f0, -f1) }, // y = +w
|
||||||
|
{ Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w
|
||||||
|
{ Math::MakeVec( f0, f0, f1, f0) }, // z = 0
|
||||||
|
{ Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w
|
||||||
|
{ Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON
|
||||||
|
}};
|
||||||
|
|
||||||
|
// TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
|
||||||
|
// drop the whole primitive instead of clipping the primitive properly. We should test if
|
||||||
|
// this happens on the 3DS, too.
|
||||||
|
|
||||||
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
|
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
|
||||||
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
|
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
|
||||||
for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)),
|
for (auto edge : clipping_edges) {
|
||||||
ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
|
|
||||||
|
|
||||||
std::swap(input_list, output_list);
|
std::swap(input_list, output_list);
|
||||||
output_list->clear();
|
output_list->clear();
|
||||||
|
|
|
||||||
|
|
@ -106,16 +106,17 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
ScreenToRasterizerCoordinates(v1.screenpos),
|
ScreenToRasterizerCoordinates(v1.screenpos),
|
||||||
ScreenToRasterizerCoordinates(v2.screenpos) };
|
ScreenToRasterizerCoordinates(v2.screenpos) };
|
||||||
|
|
||||||
if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
|
if (registers.cull_mode == Regs::CullMode::KeepCounterClockWise) {
|
||||||
// Reverse vertex order and use the CCW code path.
|
// Reverse vertex order and use the CW code path.
|
||||||
std::swap(vtxpos[1], vtxpos[2]);
|
std::swap(vtxpos[1], vtxpos[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (registers.cull_mode != Regs::CullMode::KeepAll) {
|
if (registers.cull_mode != Regs::CullMode::KeepAll) {
|
||||||
// Cull away triangles which are wound clockwise.
|
// Cull away triangles which are wound counter-clockwise.
|
||||||
// TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
|
|
||||||
if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
|
if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
|
||||||
return;
|
return;
|
||||||
|
} else {
|
||||||
|
// TODO: Consider A check for degenerate triangles ("SignedArea == 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Proper scissor rect test!
|
// TODO: Proper scissor rect test!
|
||||||
|
|
@ -475,7 +476,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
|
|
||||||
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
||||||
if (registers.output_merger.depth_test_enable) {
|
if (registers.output_merger.depth_test_enable) {
|
||||||
u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
|
u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
|
||||||
v1.screenpos[2].ToFloat32() * w1 +
|
v1.screenpos[2].ToFloat32() * w1 +
|
||||||
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
||||||
u16 ref_z = GetDepth(x >> 4, y >> 4);
|
u16 ref_z = GetDepth(x >> 4, y >> 4);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue