Improve SSE usage in software transform.

It's actually already pretty decent (unlike the softgpu), but there were a
few places it could use a bit of help.  Speeds up things with hardware
transform off, or areas that need to use software transform.
This commit is contained in:
Unknown W. Brackets 2014-03-17 23:05:48 -07:00
parent 416df17088
commit 678237aa6c
3 changed files with 22 additions and 32 deletions

View file

@ -318,8 +318,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
reader.Goto(index);
float v[3] = {0, 0, 0};
float c0[4] = {1, 1, 1, 1};
float c1[4] = {0, 0, 0, 0};
Vec4f c0 = Vec4f(1, 1, 1, 1);
Vec4f c1 = Vec4f(0, 0, 0, 0);
float uv[3] = {0, 0, 1};
float fogCoef = 1.0f;
@ -327,15 +327,10 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
// Do not touch the coordinates or the colors. No lighting.
reader.ReadPos(v);
if (reader.hasColor0()) {
reader.ReadColor0(c0);
for (int j = 0; j < 4; j++) {
c1[j] = 0.0f;
}
reader.ReadColor0(&c0.x);
// c1 is already 0.
} else {
c0[0] = gstate.getMaterialAmbientR() / 255.f;
c0[1] = gstate.getMaterialAmbientG() / 255.f;
c0[2] = gstate.getMaterialAmbientB() / 255.f;
c0[3] = gstate.getMaterialAmbientA() / 255.f;
c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
}
if (reader.hasUV()) {
@ -389,18 +384,15 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
}
// Perform lighting here if enabled. don't need to check through, it's checked above.
float unlitColor[4] = {1, 1, 1, 1};
Vec4f unlitColor = Vec4f(1, 1, 1, 1);
if (reader.hasColor0()) {
reader.ReadColor0(unlitColor);
reader.ReadColor0(&unlitColor.x);
} else {
unlitColor[0] = gstate.getMaterialAmbientR() / 255.f;
unlitColor[1] = gstate.getMaterialAmbientG() / 255.f;
unlitColor[2] = gstate.getMaterialAmbientB() / 255.f;
unlitColor[3] = gstate.getMaterialAmbientA() / 255.f;
unlitColor = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
}
float litColor0[4];
float litColor1[4];
lighter.Light(litColor0, litColor1, unlitColor, out, normal);
lighter.Light(litColor0, litColor1, unlitColor.AsArray(), out, normal);
if (gstate.isLightingEnabled()) {
// Don't ignore gstate.lmode - we should send two colors in that case
@ -424,15 +416,10 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
c0[j] = unlitColor[j];
}
} else {
c0[0] = gstate.getMaterialAmbientR() / 255.f;
c0[1] = gstate.getMaterialAmbientG() / 255.f;
c0[2] = gstate.getMaterialAmbientB() / 255.f;
c0[3] = gstate.getMaterialAmbientA() / 255.f;
c0 = Vec4f::FromRGBA(gstate.getMaterialAmbientRGBA());
}
if (lmode) {
for (int j = 0; j < 4; j++) {
c1[j] = 0.0f;
}
// c1 is already 0.
}
}
@ -528,12 +515,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
if (gstate_c.flipTexture) {
transformed[index].v = 1.0f - transformed[index].v;
}
for (int i = 0; i < 4; i++) {
transformed[index].color0[i] = c0[i] * 255.0f;
}
for (int i = 0; i < 3; i++) {
transformed[index].color1[i] = c1[i] * 255.0f;
}
transformed[index].color0_32 = c0.ToRGBA();
transformed[index].color1_32 = c1.ToRGBA();
}
// Here's the best opportunity to try to detect rectangles used to clear the screen, and