Math3D: Allow construction from NEON vectors.

This makes it match SSE and easier to keep things generic.  Will impact
alignment of non-packed Vec2/Vec3.
This commit is contained in:
Unknown W. Brackets 2021-11-27 20:57:56 -08:00
parent 96a7554053
commit 2d8fdd8cf4
2 changed files with 27 additions and 6 deletions

View file

@ -64,6 +64,9 @@ public:
#if defined(_M_SSE) #if defined(_M_SSE)
__m128i ivec; __m128i ivec;
__m128 vec; __m128 vec;
#elif PPSSPP_ARCH(ARM64)
int32x4_t ivec;
float32x4_t vec;
#endif #endif
}; };
@ -76,6 +79,11 @@ public:
#if defined(_M_SSE) #if defined(_M_SSE)
Vec2(const __m128 &_vec) : vec(_vec) {} Vec2(const __m128 &_vec) : vec(_vec) {}
Vec2(const __m128i &_ivec) : ivec(_ivec) {} Vec2(const __m128i &_ivec) : ivec(_ivec) {}
#elif PPSSPP_ARCH(ARM64)
Vec2(const float32x4_t &_vec) : vec(_vec) {}
#if !defined(_MSC_VER)
Vec2(const int32x4_t &_ivec) : ivec(_ivec) {}
#endif
#endif #endif
template<typename T2> template<typename T2>
@ -204,6 +212,9 @@ public:
#if defined(_M_SSE) #if defined(_M_SSE)
__m128i ivec; __m128i ivec;
__m128 vec; __m128 vec;
#elif PPSSPP_ARCH(ARM64)
int32x4_t ivec;
float32x4_t vec;
#endif #endif
}; };
@ -220,6 +231,14 @@ public:
Vec3(const Vec3Packed<T> &_xyz) { Vec3(const Vec3Packed<T> &_xyz) {
vec = _mm_loadu_ps(_xyz.AsArray()); vec = _mm_loadu_ps(_xyz.AsArray());
} }
#elif PPSSPP_ARCH(ARM64)
Vec3(const float32x4_t &_vec) : vec(_vec) {}
#if !defined(_MSC_VER)
Vec3(const int32x4_t &_ivec) : ivec(_ivec) {}
#endif
Vec3(const Vec3Packed<T> &_xyz) {
vec = vld1q_f32(_xyz.AsArray());
}
#else #else
Vec3(const Vec3Packed<T> &_xyz) : x(_xyz.x), y(_xyz.y), z(_xyz.z) {} Vec3(const Vec3Packed<T> &_xyz) : x(_xyz.x), y(_xyz.y), z(_xyz.z) {}
#endif #endif
@ -552,6 +571,9 @@ public:
#if defined(_M_SSE) #if defined(_M_SSE)
__m128i ivec; __m128i ivec;
__m128 vec; __m128 vec;
#elif PPSSPP_ARCH(ARM64)
int32x4_t ivec;
float32x4_t vec;
#endif #endif
}; };
@ -566,6 +588,11 @@ public:
#if defined(_M_SSE) #if defined(_M_SSE)
Vec4(const __m128 &_vec) : vec(_vec) {} Vec4(const __m128 &_vec) : vec(_vec) {}
Vec4(const __m128i &_ivec) : ivec(_ivec) {} Vec4(const __m128i &_ivec) : ivec(_ivec) {}
#elif PPSSPP_ARCH(ARM64)
Vec4(const float32x4_t &_vec) : vec(_vec) {}
#if !defined(_MSC_VER)
Vec4(const int32x4_t &_ivec) : ivec(_ivec) {}
#endif
#endif #endif
template<typename T2> template<typename T2>

View file

@ -379,13 +379,7 @@ static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
template <bool clearMode, GEBufferFormat fbFormat> template <bool clearMode, GEBufferFormat fbFormat>
void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) { void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) {
#if PPSSPP_ARCH(ARM64)
Vec4<int> prim_color;
vst1q_s32(prim_color.AsArray(), color_in);
prim_color = prim_color.Clamp(0, 255);
#else
Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255); Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255);
#endif
// Depth range test - applied in clear mode, if not through mode. // Depth range test - applied in clear mode, if not through mode.
if (pixelID.applyDepthRange) if (pixelID.applyDepthRange)
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())