[spline/bezier]Pre-convert control-points for the accumulation of SIMD optimization, and avoid conversion in hot loops.

# Conflicts:
#	GPU/Common/SplineCommon.cpp
#	GPU/Common/SplineCommon.h
This commit is contained in:
xebra 2018-01-31 11:34:37 +09:00
parent 62ad5fe546
commit 36ed61d08d
2 changed files with 34 additions and 15 deletions

View file

@ -366,9 +366,9 @@ static void _SplinePatchLowQuality(u8 *&dest, u16 *indices, int &count, const Sp
}
static inline void AccumulateWeighted(Vec3f &out, const Vec3Packedf &in, const Vec4f &w) {
static inline void AccumulateWeighted(Vec3f &out, const Vec3f &in, const Vec4f &w) {
#ifdef _M_SSE
out.vec = _mm_add_ps(out.vec, _mm_mul_ps(_mm_loadu_ps(in.AsArray()), w.vec));
out.vec = _mm_add_ps(out.vec, _mm_mul_ps(in.vec, w.vec));
#else
out += in * w.x;
#endif
@ -382,6 +382,14 @@ static inline void AccumulateWeighted(Vec4f &out, const Vec4f &in, const Vec4f &
#endif
}
static inline void AccumulateWeighted(Vec2f &out, const Vec2f &in, const Vec4f &w) {
#ifdef _M_SSE
out.vec = _mm_add_ps(out.vec, _mm_mul_ps(in.vec, w.vec));
#else
out += in * w;
#endif
}
template <bool origNrm, bool origCol, bool origTc, bool useSSE4>
static void SplinePatchFullQuality(u8 *&dest, u16 *indices, int &count, const SplinePatchLocal &spatch, u32 origVertType, int quality, int maxVertices) {
// Full (mostly) correct tessellation of spline patches.
@ -445,6 +453,7 @@ static void SplinePatchFullQuality(u8 *&dest, u16 *indices, int &count, const Sp
Vec3f du, dv;
du.SetZero();
dv.SetZero();
Vec2f vert_tex;
if (origNrm) {
vert_nrm.SetZero();
}
@ -454,8 +463,7 @@ static void SplinePatchFullQuality(u8 *&dest, u16 *indices, int &count, const Sp
memcpy(vert->color, spatch.points[0]->color, 4);
}
if (origTc) {
vert->uv[0] = 0.0f;
vert->uv[1] = 0.0f;
vert_tex.SetZero();
} else {
vert->uv[0] = tu_width * ((float)tile_u * one_over_patch_div_s);
vert->uv[1] = tv_height * ((float)tile_v * one_over_patch_div_t);
@ -503,19 +511,16 @@ static void SplinePatchFullQuality(u8 *&dest, u16 *indices, int &count, const Sp
OutputDebugStringA(temp);
Crash();
}*/
const SimpleVertex *a = spatch.points[idx];
AccumulateWeighted(vert_pos, a->pos, fv);
AccumulateWeighted(vert_pos, spatch.pos[idx], fv);
if (origTc) {
vert->uv[0] += a->uv[0] * f;
vert->uv[1] += a->uv[1] * f;
AccumulateWeighted(vert_tex, spatch.tex[idx], fv);
}
if (origCol) {
Vec4f a_color = Vec4f::FromRGBA(a->color_32);
AccumulateWeighted(vert_color, a_color, fv);
AccumulateWeighted(vert_color, spatch.col[idx], fv);
}
if (origNrm) {
AccumulateWeighted(du, a->pos, Vec4f::AssignToAll(u_derivs[ii] * v_weights[jj]));
AccumulateWeighted(dv, a->pos, Vec4f::AssignToAll(u_weights[ii] * v_derivs[jj]));
AccumulateWeighted(du, spatch.pos[idx], Vec4f::AssignToAll(u_derivs[ii] * v_weights[jj]));
AccumulateWeighted(dv, spatch.pos[idx], Vec4f::AssignToAll(u_weights[ii] * v_derivs[jj]));
}
}
}
@ -537,6 +542,9 @@ static void SplinePatchFullQuality(u8 *&dest, u16 *indices, int &count, const Sp
if (origCol) {
vert->color_32 = vert_color.ToRGBA();
}
if (origTc) {
vert_tex.Write(vert->uv);
}
}
}
@ -919,6 +927,14 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
TessellateSplinePatchHardware(dest, quadIndices_, count, patch);
numPatches = (count_u - 3) * (count_v - 3);
} else {
patch.pos = (Vec3f *)managedBuf.Allocate(sizeof(Vec3f) * count_u * count_v);
patch.tex = (Vec2f *)managedBuf.Allocate(sizeof(Vec2f) * count_u * count_v);
patch.col = (Vec4f *)managedBuf.Allocate(sizeof(Vec4f) * count_u * count_v);
for (int idx = 0; idx < count_u * count_v; idx++) {
patch.pos[idx] = Vec3f(points[idx]->pos);
patch.tex[idx] = Vec2f(points[idx]->uv);
patch.col[idx] = Vec4f::FromRGBA(points[idx]->color_32);
}
int maxVertexCount = SPLINE_BUFFER_SIZE / vertexSize;
TessellateSplinePatch(dest, quadIndices_, count, patch, origVertType, maxVertexCount);
}