2014-09-26 09:06:55 -07:00
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
2016-09-18 19:40:44 -07:00
# include <algorithm>
2021-05-01 07:15:04 -07:00
# include "Common/Data/Convert/ColorConv.h"
2020-10-04 10:04:01 +02:00
# include "Common/Profiler/Profiler.h"
2016-09-18 19:40:44 -07:00
# include "Core/Config.h"
2014-09-26 09:06:55 -07:00
# include "GPU/Common/DrawEngineCommon.h"
# include "GPU/Common/SplineCommon.h"
# include "GPU/Common/VertexDecoderCommon.h"
# include "GPU/ge_constants.h"
# include "GPU/GPUState.h"
2015-04-08 21:35:00 +02:00
# define QUAD_INDICES_MAX 65536
2017-06-02 12:25:47 +02:00
enum {
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof ( TransformedVertex )
} ;
2017-08-20 19:10:52 +02:00
DrawEngineCommon : : DrawEngineCommon ( ) : decoderMap_ ( 16 ) {
2015-04-08 21:35:00 +02:00
decJitCache_ = new VertexDecoderJitCache ( ) ;
2017-06-02 12:25:47 +02:00
transformed = ( TransformedVertex * ) AllocateMemoryPages ( TRANSFORMED_VERTEX_BUFFER_SIZE , MEM_PROT_READ | MEM_PROT_WRITE ) ;
transformedExpanded = ( TransformedVertex * ) AllocateMemoryPages ( 3 * TRANSFORMED_VERTEX_BUFFER_SIZE , MEM_PROT_READ | MEM_PROT_WRITE ) ;
2015-04-08 21:35:00 +02:00
}
DrawEngineCommon : : ~ DrawEngineCommon ( ) {
2017-06-02 12:25:47 +02:00
FreeMemoryPages ( transformed , TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
FreeMemoryPages ( transformedExpanded , 3 * TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
2015-04-08 21:35:00 +02:00
delete decJitCache_ ;
2017-08-20 19:10:52 +02:00
decoderMap_ . Iterate ( [ & ] ( const uint32_t vtype , VertexDecoder * decoder ) {
delete decoder ;
} ) ;
2018-01-31 21:42:39 +09:00
ClearSplineBezierWeights ( ) ;
2015-04-08 21:35:00 +02:00
}
2021-05-08 09:10:23 -07:00
void DrawEngineCommon : : Init ( ) {
useHWTransform_ = g_Config . bHardwareTransform ;
useHWTessellation_ = UpdateUseHWTessellation ( g_Config . bHardwareTessellation ) ;
}
2015-04-08 21:35:00 +02:00
VertexDecoder * DrawEngineCommon : : GetVertexDecoder ( u32 vtype ) {
2017-08-20 19:10:52 +02:00
VertexDecoder * dec = decoderMap_ . Get ( vtype ) ;
if ( dec )
return dec ;
dec = new VertexDecoder ( ) ;
2015-04-08 21:35:00 +02:00
dec - > SetVertexType ( vtype , decOptions_ , decJitCache_ ) ;
2017-08-20 19:10:52 +02:00
decoderMap_ . Insert ( vtype , dec ) ;
2015-04-08 21:35:00 +02:00
return dec ;
}
2014-09-26 09:06:55 -07:00
2017-11-19 12:38:52 +01:00
int DrawEngineCommon : : ComputeNumVertsToDecode ( ) const {
int vertsToDecode = 0 ;
if ( drawCalls [ 0 ] . indexType = = GE_VTYPE_IDX_NONE > > GE_VTYPE_IDX_SHIFT ) {
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
const DeferredDrawCall & dc = drawCalls [ i ] ;
vertsToDecode + = dc . vertexCount ;
}
} else {
// TODO: Share this computation with DecodeVertsStep?
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
const DeferredDrawCall & dc = drawCalls [ i ] ;
int lastMatch = i ;
const int total = numDrawCalls ;
int indexLowerBound = dc . indexLowerBound ;
int indexUpperBound = dc . indexUpperBound ;
for ( int j = i + 1 ; j < total ; + + j ) {
if ( drawCalls [ j ] . verts ! = dc . verts )
break ;
indexLowerBound = std : : min ( indexLowerBound , ( int ) drawCalls [ j ] . indexLowerBound ) ;
indexUpperBound = std : : max ( indexUpperBound , ( int ) drawCalls [ j ] . indexUpperBound ) ;
lastMatch = j ;
}
vertsToDecode + = indexUpperBound - indexLowerBound + 1 ;
i = lastMatch ;
}
}
return vertsToDecode ;
}
void DrawEngineCommon : : DecodeVerts ( u8 * dest ) {
const UVScale origUV = gstate_c . uv ;
for ( ; decodeCounter_ < numDrawCalls ; decodeCounter_ + + ) {
2018-03-05 12:24:02 +01:00
gstate_c . uv = drawCalls [ decodeCounter_ ] . uvScale ;
2017-11-19 12:38:52 +01:00
DecodeVertsStep ( dest , decodeCounter_ , decodedVerts_ ) ; // NOTE! DecodeVertsStep can modify decodeCounter_!
}
gstate_c . uv = origUV ;
// Sanity check
if ( indexGen . Prim ( ) < 0 ) {
ERROR_LOG_REPORT ( G3D , " DecodeVerts: Failed to deduce prim: %i " , indexGen . Prim ( ) ) ;
// Force to points (0)
2020-09-24 09:24:03 +02:00
indexGen . AddPrim ( GE_PRIM_POINTS , 0 , true ) ;
2017-11-19 12:38:52 +01:00
}
}
2015-10-21 23:06:32 +02:00
std : : vector < std : : string > DrawEngineCommon : : DebugGetVertexLoaderIDs ( ) {
std : : vector < std : : string > ids ;
2017-08-20 19:10:52 +02:00
decoderMap_ . Iterate ( [ & ] ( const uint32_t vtype , VertexDecoder * decoder ) {
2015-10-21 23:06:32 +02:00
std : : string id ;
2017-08-20 19:10:52 +02:00
id . resize ( sizeof ( vtype ) ) ;
memcpy ( & id [ 0 ] , & vtype , sizeof ( vtype ) ) ;
2015-10-21 23:06:32 +02:00
ids . push_back ( id ) ;
2017-08-20 19:10:52 +02:00
} ) ;
2015-10-21 23:06:32 +02:00
return ids ;
}
std : : string DrawEngineCommon : : DebugGetVertexLoaderString ( std : : string id , DebugShaderStringType stringType ) {
u32 mapId ;
memcpy ( & mapId , & id [ 0 ] , sizeof ( mapId ) ) ;
2017-08-20 19:10:52 +02:00
VertexDecoder * dec = decoderMap_ . Get ( mapId ) ;
return dec ? dec - > GetString ( stringType ) : " N/A " ;
2015-10-21 23:06:32 +02:00
}
2014-09-26 09:06:55 -07:00
struct Plane {
float x , y , z , w ;
void Set ( float _x , float _y , float _z , float _w ) { x = _x ; y = _y ; z = _z ; w = _w ; }
float Test ( float f [ 3 ] ) const { return x * f [ 0 ] + y * f [ 1 ] + z * f [ 2 ] + w ; }
} ;
static void PlanesFromMatrix ( float mtx [ 16 ] , Plane planes [ 6 ] ) {
planes [ 0 ] . Set ( mtx [ 3 ] - mtx [ 0 ] , mtx [ 7 ] - mtx [ 4 ] , mtx [ 11 ] - mtx [ 8 ] , mtx [ 15 ] - mtx [ 12 ] ) ; // Right
planes [ 1 ] . Set ( mtx [ 3 ] + mtx [ 0 ] , mtx [ 7 ] + mtx [ 4 ] , mtx [ 11 ] + mtx [ 8 ] , mtx [ 15 ] + mtx [ 12 ] ) ; // Left
planes [ 2 ] . Set ( mtx [ 3 ] + mtx [ 1 ] , mtx [ 7 ] + mtx [ 5 ] , mtx [ 11 ] + mtx [ 9 ] , mtx [ 15 ] + mtx [ 13 ] ) ; // Bottom
planes [ 3 ] . Set ( mtx [ 3 ] - mtx [ 1 ] , mtx [ 7 ] - mtx [ 5 ] , mtx [ 11 ] - mtx [ 9 ] , mtx [ 15 ] - mtx [ 13 ] ) ; // Top
planes [ 4 ] . Set ( mtx [ 3 ] + mtx [ 2 ] , mtx [ 7 ] + mtx [ 6 ] , mtx [ 11 ] + mtx [ 10 ] , mtx [ 15 ] + mtx [ 14 ] ) ; // Near
planes [ 5 ] . Set ( mtx [ 3 ] - mtx [ 2 ] , mtx [ 7 ] - mtx [ 6 ] , mtx [ 11 ] - mtx [ 10 ] , mtx [ 15 ] - mtx [ 14 ] ) ; // Far
}
static Vec3f ClipToScreen ( const Vec4f & coords ) {
2015-08-26 13:39:52 +02:00
float xScale = gstate . getViewportXScale ( ) ;
float xCenter = gstate . getViewportXCenter ( ) ;
float yScale = gstate . getViewportYScale ( ) ;
float yCenter = gstate . getViewportYCenter ( ) ;
float zScale = gstate . getViewportZScale ( ) ;
float zCenter = gstate . getViewportZCenter ( ) ;
float x = coords . x * xScale / coords . w + xCenter ;
float y = coords . y * yScale / coords . w + yCenter ;
float z = coords . z * zScale / coords . w + zCenter ;
2014-09-26 09:06:55 -07:00
// 16 = 0xFFFF / 4095.9375
2015-08-26 13:39:52 +02:00
return Vec3f ( x * 16 , y * 16 , z ) ;
2014-09-26 09:06:55 -07:00
}
static Vec3f ScreenToDrawing ( const Vec3f & coords ) {
Vec3f ret ;
ret . x = ( coords . x - gstate . getOffsetX16 ( ) ) * ( 1.0f / 16.0f ) ;
ret . y = ( coords . y - gstate . getOffsetY16 ( ) ) * ( 1.0f / 16.0f ) ;
ret . z = coords . z ;
return ret ;
}
2017-03-17 10:27:49 +01:00
void DrawEngineCommon : : Resized ( ) {
decJitCache_ - > Clear ( ) ;
lastVType_ = - 1 ;
dec_ = nullptr ;
2017-08-20 19:10:52 +02:00
decoderMap_ . Iterate ( [ & ] ( const uint32_t vtype , VertexDecoder * decoder ) {
delete decoder ;
} ) ;
decoderMap_ . Clear ( ) ;
2017-03-17 10:27:49 +01:00
ClearTrackedVertexArrays ( ) ;
2020-04-04 11:21:22 -07:00
useHWTransform_ = g_Config . bHardwareTransform ;
2020-04-04 11:52:32 -07:00
useHWTessellation_ = UpdateUseHWTessellation ( g_Config . bHardwareTessellation ) ;
2017-03-17 10:27:49 +01:00
}
2017-11-17 13:36:48 +01:00
u32 DrawEngineCommon : : NormalizeVertices ( u8 * outPtr , u8 * bufPtr , const u8 * inPtr , int lowerBound , int upperBound , u32 vertType , int * vertexSize ) {
2015-07-29 12:04:52 +02:00
const u32 vertTypeID = ( vertType & 0xFFFFFF ) | ( gstate . getUVGenMode ( ) < < 24 ) ;
VertexDecoder * dec = GetVertexDecoder ( vertTypeID ) ;
2017-11-17 13:36:48 +01:00
if ( vertexSize )
* vertexSize = dec - > VertexSize ( ) ;
2015-07-29 12:04:52 +02:00
return DrawEngineCommon : : NormalizeVertices ( outPtr , bufPtr , inPtr , dec , lowerBound , upperBound , vertType ) ;
}
2017-06-02 12:03:46 +02:00
// This code has plenty of potential for optimization.
2014-09-26 09:06:55 -07:00
//
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
// our clipping planes, we reject the box. Tighter bounds would be desirable but would take more calculations.
2017-11-17 13:36:48 +01:00
bool DrawEngineCommon : : TestBoundingBox ( void * control_points , int vertexCount , u32 vertType , int * bytesRead ) {
2014-09-26 09:06:55 -07:00
SimpleVertex * corners = ( SimpleVertex * ) ( decoded + 65536 * 12 ) ;
float * verts = ( float * ) ( decoded + 65536 * 18 ) ;
// Try to skip NormalizeVertices if it's pure positions. No need to bother with a vertex decoder
// and a large vertex format.
if ( ( vertType & 0xFFFFFF ) = = GE_VTYPE_POS_FLOAT ) {
verts = ( float * ) control_points ;
2017-11-17 13:36:48 +01:00
* bytesRead = 3 * sizeof ( float ) * vertexCount ;
2014-09-26 09:06:55 -07:00
} else if ( ( vertType & 0xFFFFFF ) = = GE_VTYPE_POS_8BIT ) {
const s8 * vtx = ( const s8 * ) control_points ;
for ( int i = 0 ; i < vertexCount * 3 ; i + + ) {
verts [ i ] = vtx [ i ] * ( 1.0f / 128.0f ) ;
}
2017-11-17 13:36:48 +01:00
* bytesRead = 3 * sizeof ( s8 ) * vertexCount ;
2014-09-26 09:06:55 -07:00
} else if ( ( vertType & 0xFFFFFF ) = = GE_VTYPE_POS_16BIT ) {
const s16 * vtx = ( const s16 * ) control_points ;
for ( int i = 0 ; i < vertexCount * 3 ; i + + ) {
verts [ i ] = vtx [ i ] * ( 1.0f / 32768.0f ) ;
}
2017-11-17 13:36:48 +01:00
* bytesRead = 3 * sizeof ( s16 ) * vertexCount ;
2014-09-26 09:06:55 -07:00
} else {
// Simplify away bones and morph before proceeding
u8 * temp_buffer = decoded + 65536 * 24 ;
2017-11-17 13:36:48 +01:00
int vertexSize = 0 ;
NormalizeVertices ( ( u8 * ) corners , temp_buffer , ( u8 * ) control_points , 0 , vertexCount , vertType , & vertexSize ) ;
2014-09-26 09:06:55 -07:00
for ( int i = 0 ; i < vertexCount ; i + + ) {
verts [ i * 3 ] = corners [ i ] . pos . x ;
verts [ i * 3 + 1 ] = corners [ i ] . pos . y ;
verts [ i * 3 + 2 ] = corners [ i ] . pos . z ;
}
2017-11-17 13:36:48 +01:00
* bytesRead = vertexSize * vertexCount ;
2014-09-26 09:06:55 -07:00
}
Plane planes [ 6 ] ;
float world [ 16 ] ;
float view [ 16 ] ;
float worldview [ 16 ] ;
float worldviewproj [ 16 ] ;
ConvertMatrix4x3To4x4 ( world , gstate . worldMatrix ) ;
ConvertMatrix4x3To4x4 ( view , gstate . viewMatrix ) ;
Matrix4ByMatrix4 ( worldview , world , view ) ;
Matrix4ByMatrix4 ( worldviewproj , worldview , gstate . projMatrix ) ;
PlanesFromMatrix ( worldviewproj , planes ) ;
for ( int plane = 0 ; plane < 6 ; plane + + ) {
int inside = 0 ;
int out = 0 ;
for ( int i = 0 ; i < vertexCount ; i + + ) {
// Here we can test against the frustum planes!
float value = planes [ plane ] . Test ( verts + i * 3 ) ;
if ( value < 0 )
out + + ;
else
inside + + ;
}
if ( inside = = 0 ) {
// All out
return false ;
}
// Any out. For testing that the planes are in the right locations.
// if (out != 0) return false;
}
return true ;
}
// TODO: This probably is not the best interface.
bool DrawEngineCommon : : GetCurrentSimpleVertices ( int count , std : : vector < GPUDebugVertex > & vertices , std : : vector < u16 > & indices ) {
// This is always for the current vertices.
u16 indexLowerBound = 0 ;
u16 indexUpperBound = count - 1 ;
2015-02-27 20:45:21 -08:00
if ( ! Memory : : IsValidAddress ( gstate_c . vertexAddr ) )
return false ;
2014-09-26 09:06:55 -07:00
bool savedVertexFullAlpha = gstate_c . vertexFullAlpha ;
if ( ( gstate . vertType & GE_VTYPE_IDX_MASK ) ! = GE_VTYPE_IDX_NONE ) {
const u8 * inds = Memory : : GetPointer ( gstate_c . indexAddr ) ;
2021-02-18 22:25:24 -08:00
const u16_le * inds16 = ( const u16_le * ) inds ;
const u32_le * inds32 = ( const u32_le * ) inds ;
2014-09-26 09:06:55 -07:00
if ( inds ) {
GetIndexBounds ( inds , count , gstate . vertType , & indexLowerBound , & indexUpperBound ) ;
indices . resize ( count ) ;
switch ( gstate . vertType & GE_VTYPE_IDX_MASK ) {
2016-04-10 01:52:51 -07:00
case GE_VTYPE_IDX_8BIT :
for ( int i = 0 ; i < count ; + + i ) {
indices [ i ] = inds [ i ] ;
}
break ;
2014-09-26 09:06:55 -07:00
case GE_VTYPE_IDX_16BIT :
for ( int i = 0 ; i < count ; + + i ) {
indices [ i ] = inds16 [ i ] ;
}
break ;
2016-04-10 01:52:51 -07:00
case GE_VTYPE_IDX_32BIT :
WARN_LOG_REPORT_ONCE ( simpleIndexes32 , G3D , " SimpleVertices: Decoding 32-bit indexes " ) ;
2014-09-26 09:06:55 -07:00
for ( int i = 0 ; i < count ; + + i ) {
2016-04-10 01:52:51 -07:00
// These aren't documented and should be rare. Let's bounds check each one.
if ( inds32 [ i ] ! = ( u16 ) inds32 [ i ] ) {
ERROR_LOG_REPORT_ONCE ( simpleIndexes32Bounds , G3D , " SimpleVertices: Index outside 16-bit range " ) ;
}
indices [ i ] = ( u16 ) inds32 [ i ] ;
2014-09-26 09:06:55 -07:00
}
break ;
}
} else {
indices . clear ( ) ;
}
} else {
indices . clear ( ) ;
}
static std : : vector < u32 > temp_buffer ;
static std : : vector < SimpleVertex > simpleVertices ;
temp_buffer . resize ( std : : max ( ( int ) indexUpperBound , 8192 ) * 128 / sizeof ( u32 ) ) ;
simpleVertices . resize ( indexUpperBound + 1 ) ;
NormalizeVertices ( ( u8 * ) ( & simpleVertices [ 0 ] ) , ( u8 * ) ( & temp_buffer [ 0 ] ) , Memory : : GetPointer ( gstate_c . vertexAddr ) , indexLowerBound , indexUpperBound , gstate . vertType ) ;
float world [ 16 ] ;
float view [ 16 ] ;
float worldview [ 16 ] ;
float worldviewproj [ 16 ] ;
ConvertMatrix4x3To4x4 ( world , gstate . worldMatrix ) ;
ConvertMatrix4x3To4x4 ( view , gstate . viewMatrix ) ;
Matrix4ByMatrix4 ( worldview , world , view ) ;
Matrix4ByMatrix4 ( worldviewproj , worldview , gstate . projMatrix ) ;
vertices . resize ( indexUpperBound + 1 ) ;
2018-02-26 16:39:38 +01:00
uint32_t vertType = gstate . vertType ;
2014-09-26 09:06:55 -07:00
for ( int i = indexLowerBound ; i < = indexUpperBound ; + + i ) {
const SimpleVertex & vert = simpleVertices [ i ] ;
2018-02-26 16:39:38 +01:00
if ( ( vertType & GE_VTYPE_THROUGH ) ! = 0 ) {
if ( vertType & GE_VTYPE_TC_MASK ) {
2014-09-26 09:06:55 -07:00
vertices [ i ] . u = vert . uv [ 0 ] ;
vertices [ i ] . v = vert . uv [ 1 ] ;
} else {
vertices [ i ] . u = 0.0f ;
vertices [ i ] . v = 0.0f ;
}
vertices [ i ] . x = vert . pos . x ;
vertices [ i ] . y = vert . pos . y ;
vertices [ i ] . z = vert . pos . z ;
2018-02-26 16:39:38 +01:00
if ( vertType & GE_VTYPE_COL_MASK ) {
2014-09-26 09:06:55 -07:00
memcpy ( vertices [ i ] . c , vert . color , sizeof ( vertices [ i ] . c ) ) ;
} else {
memset ( vertices [ i ] . c , 0 , sizeof ( vertices [ i ] . c ) ) ;
}
2017-07-02 22:51:52 +02:00
vertices [ i ] . nx = 0 ; // No meaningful normals in through mode
vertices [ i ] . ny = 0 ;
vertices [ i ] . nz = 1.0f ;
2014-09-26 09:06:55 -07:00
} else {
float clipPos [ 4 ] ;
Vec3ByMatrix44 ( clipPos , vert . pos . AsArray ( ) , worldviewproj ) ;
Vec3f screenPos = ClipToScreen ( clipPos ) ;
Vec3f drawPos = ScreenToDrawing ( screenPos ) ;
2018-02-26 16:39:38 +01:00
if ( vertType & GE_VTYPE_TC_MASK ) {
2015-01-03 17:15:16 -08:00
vertices [ i ] . u = vert . uv [ 0 ] * ( float ) gstate . getTextureWidth ( 0 ) ;
vertices [ i ] . v = vert . uv [ 1 ] * ( float ) gstate . getTextureHeight ( 0 ) ;
2014-09-26 09:06:55 -07:00
} else {
vertices [ i ] . u = 0.0f ;
vertices [ i ] . v = 0.0f ;
}
2017-07-02 22:51:52 +02:00
// Should really have separate coordinates for before and after transform.
2014-09-26 09:06:55 -07:00
vertices [ i ] . x = drawPos . x ;
vertices [ i ] . y = drawPos . y ;
vertices [ i ] . z = drawPos . z ;
2018-02-26 16:39:38 +01:00
if ( vertType & GE_VTYPE_COL_MASK ) {
2014-09-26 09:06:55 -07:00
memcpy ( vertices [ i ] . c , vert . color , sizeof ( vertices [ i ] . c ) ) ;
} else {
memset ( vertices [ i ] . c , 0 , sizeof ( vertices [ i ] . c ) ) ;
}
2017-07-02 22:51:52 +02:00
vertices [ i ] . nx = vert . nrm . x ;
vertices [ i ] . ny = vert . nrm . y ;
vertices [ i ] . nz = vert . nrm . z ;
2014-09-26 09:06:55 -07:00
}
}
gstate_c . vertexFullAlpha = savedVertexFullAlpha ;
return true ;
}
// This normalizes a set of vertices in any format to SimpleVertex format, by processing away morphing AND skinning.
// The rest of the transform pipeline like lighting will go as normal, either hardware or software.
// The implementation is initially a bit inefficient but shouldn't be a big deal.
// An intermediate buffer of not-easy-to-predict size is stored at bufPtr.
u32 DrawEngineCommon : : NormalizeVertices ( u8 * outPtr , u8 * bufPtr , const u8 * inPtr , VertexDecoder * dec , int lowerBound , int upperBound , u32 vertType ) {
2018-04-10 12:22:02 +02:00
// First, decode the vertices into a GPU compatible format. This step can be eliminated but will need a separate
// implementation of the vertex decoder.
2014-09-26 09:06:55 -07:00
dec - > DecodeVerts ( bufPtr , inPtr , lowerBound , upperBound ) ;
2018-04-10 12:22:02 +02:00
// OK, morphing eliminated but bones still remain to be taken care of.
// Let's do a partial software transform where we only do skinning.
2014-09-26 09:06:55 -07:00
VertexReader reader ( bufPtr , dec - > GetDecVtxFmt ( ) , vertType ) ;
SimpleVertex * sverts = ( SimpleVertex * ) outPtr ;
const u8 defaultColor [ 4 ] = {
( u8 ) gstate . getMaterialAmbientR ( ) ,
( u8 ) gstate . getMaterialAmbientG ( ) ,
( u8 ) gstate . getMaterialAmbientB ( ) ,
( u8 ) gstate . getMaterialAmbientA ( ) ,
} ;
2018-04-10 12:22:02 +02:00
// Let's have two separate loops, one for non skinning and one for skinning.
if ( ! g_Config . bSoftwareSkinning & & ( vertType & GE_VTYPE_WEIGHT_MASK ) ! = GE_VTYPE_WEIGHT_NONE ) {
int numBoneWeights = vertTypeGetNumBoneWeights ( vertType ) ;
for ( int i = lowerBound ; i < = upperBound ; i + + ) {
reader . Goto ( i - lowerBound ) ;
SimpleVertex & sv = sverts [ i ] ;
if ( vertType & GE_VTYPE_TC_MASK ) {
reader . ReadUV ( sv . uv ) ;
}
if ( vertType & GE_VTYPE_COL_MASK ) {
reader . ReadColor0_8888 ( sv . color ) ;
} else {
memcpy ( sv . color , defaultColor , 4 ) ;
}
float nrm [ 3 ] , pos [ 3 ] ;
float bnrm [ 3 ] , bpos [ 3 ] ;
if ( vertType & GE_VTYPE_NRM_MASK ) {
// Normals are generated during tessellation anyway, not sure if any need to supply
reader . ReadNrm ( nrm ) ;
} else {
nrm [ 0 ] = 0 ;
nrm [ 1 ] = 0 ;
nrm [ 2 ] = 1.0f ;
}
reader . ReadPos ( pos ) ;
// Apply skinning transform directly
float weights [ 8 ] ;
reader . ReadWeights ( weights ) ;
// Skinning
Vec3Packedf psum ( 0 , 0 , 0 ) ;
Vec3Packedf nsum ( 0 , 0 , 0 ) ;
for ( int w = 0 ; w < numBoneWeights ; w + + ) {
if ( weights [ w ] ! = 0.0f ) {
Vec3ByMatrix43 ( bpos , pos , gstate . boneMatrix + w * 12 ) ;
Vec3Packedf tpos ( bpos ) ;
psum + = tpos * weights [ w ] ;
Norm3ByMatrix43 ( bnrm , nrm , gstate . boneMatrix + w * 12 ) ;
Vec3Packedf tnorm ( bnrm ) ;
nsum + = tnorm * weights [ w ] ;
}
}
sv . pos = psum ;
sv . nrm = nsum ;
}
} else {
2014-09-26 09:06:55 -07:00
for ( int i = lowerBound ; i < = upperBound ; i + + ) {
2014-11-25 21:24:54 +09:00
reader . Goto ( i - lowerBound ) ;
2014-09-26 09:06:55 -07:00
SimpleVertex & sv = sverts [ i ] ;
if ( vertType & GE_VTYPE_TC_MASK ) {
reader . ReadUV ( sv . uv ) ;
} else {
2017-03-09 18:04:16 +09:00
sv . uv [ 0 ] = 0.0f ; // This will get filled in during tessellation
2017-01-28 10:38:38 +01:00
sv . uv [ 1 ] = 0.0f ;
2014-09-26 09:06:55 -07:00
}
if ( vertType & GE_VTYPE_COL_MASK ) {
reader . ReadColor0_8888 ( sv . color ) ;
} else {
memcpy ( sv . color , defaultColor , 4 ) ;
}
if ( vertType & GE_VTYPE_NRM_MASK ) {
2017-03-09 18:04:16 +09:00
// Normals are generated during tessellation anyway, not sure if any need to supply
2014-09-26 09:06:55 -07:00
reader . ReadNrm ( ( float * ) & sv . nrm ) ;
} else {
2017-01-28 10:38:38 +01:00
sv . nrm . x = 0.0f ;
sv . nrm . y = 0.0f ;
2014-09-26 09:06:55 -07:00
sv . nrm . z = 1.0f ;
}
reader . ReadPos ( ( float * ) & sv . pos ) ;
}
}
// Okay, there we are! Return the new type (but keep the index bits)
return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | ( vertType & ( GE_VTYPE_IDX_MASK | GE_VTYPE_THROUGH ) ) ;
}
2017-02-17 12:21:27 +01:00
2020-11-08 13:14:23 +01:00
bool DrawEngineCommon : : ApplyFramebufferRead ( bool * fboTexNeedsBind ) {
2017-12-21 18:50:41 -08:00
if ( gstate_c . Supports ( GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH ) ) {
2020-11-08 13:14:23 +01:00
* fboTexNeedsBind = false ;
2017-02-17 12:21:27 +01:00
return true ;
}
static const int MAX_REASONABLE_BLITS_PER_FRAME = 24 ;
static int lastFrameBlit = - 1 ;
static int blitsThisFrame = 0 ;
if ( lastFrameBlit ! = gpuStats . numFlips ) {
if ( blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME ) {
WARN_LOG_REPORT_ONCE ( blendingBlit , G3D , " Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d " , blitsThisFrame , gstate . getBlendFuncA ( ) , gstate . getBlendFuncB ( ) , gstate . getBlendEq ( ) ) ;
}
blitsThisFrame = 0 ;
lastFrameBlit = gpuStats . numFlips ;
}
+ + blitsThisFrame ;
if ( blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME * 2 ) {
WARN_LOG_ONCE ( blendingBlit2 , G3D , " Skipping additional blits needed for obscure blending: %d per frame, blend %d/%d/%d " , blitsThisFrame , gstate . getBlendFuncA ( ) , gstate . getBlendFuncB ( ) , gstate . getBlendEq ( ) ) ;
return false ;
}
2020-11-08 13:14:23 +01:00
* fboTexNeedsBind = true ;
2017-02-17 12:21:27 +01:00
gstate_c . Dirty ( DIRTY_SHADERBLEND ) ;
return true ;
}
2017-06-02 12:03:46 +02:00
void DrawEngineCommon : : DecodeVertsStep ( u8 * dest , int & i , int & decodedVerts ) {
PROFILE_THIS_SCOPE ( " vertdec " ) ;
const DeferredDrawCall & dc = drawCalls [ i ] ;
indexGen . SetIndex ( decodedVerts ) ;
int indexLowerBound = dc . indexLowerBound ;
int indexUpperBound = dc . indexUpperBound ;
if ( dc . indexType = = GE_VTYPE_IDX_NONE > > GE_VTYPE_IDX_SHIFT ) {
// Decode the verts and apply morphing. Simple.
dec_ - > DecodeVerts ( dest + decodedVerts * ( int ) dec_ - > GetDecVtxFmt ( ) . stride ,
dc . verts , indexLowerBound , indexUpperBound ) ;
decodedVerts + = indexUpperBound - indexLowerBound + 1 ;
2018-06-03 22:29:50 +08:00
bool clockwise = true ;
2018-08-23 12:46:09 +08:00
if ( gstate . isCullEnabled ( ) & & gstate . getCullMode ( ) ! = dc . cullMode ) {
2018-06-03 22:29:50 +08:00
clockwise = false ;
}
indexGen . AddPrim ( dc . prim , dc . vertexCount , clockwise ) ;
2017-06-02 12:03:46 +02:00
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i ;
const int total = numDrawCalls ;
for ( int j = i + 1 ; j < total ; + + j ) {
if ( drawCalls [ j ] . verts ! = dc . verts )
break ;
indexLowerBound = std : : min ( indexLowerBound , ( int ) drawCalls [ j ] . indexLowerBound ) ;
indexUpperBound = std : : max ( indexUpperBound , ( int ) drawCalls [ j ] . indexUpperBound ) ;
lastMatch = j ;
}
// 2. Loop through the drawcalls, translating indices as we go.
switch ( dc . indexType ) {
case GE_VTYPE_IDX_8BIT > > GE_VTYPE_IDX_SHIFT :
for ( int j = i ; j < = lastMatch ; j + + ) {
2018-04-27 21:49:43 +08:00
bool clockwise = true ;
2018-08-23 12:46:09 +08:00
if ( gstate . isCullEnabled ( ) & & gstate . getCullMode ( ) ! = drawCalls [ j ] . cullMode ) {
2018-04-27 21:49:43 +08:00
clockwise = false ;
2018-04-27 16:33:35 +08:00
}
2018-04-27 21:49:43 +08:00
indexGen . TranslatePrim ( drawCalls [ j ] . prim , drawCalls [ j ] . vertexCount , ( const u8 * ) drawCalls [ j ] . inds , indexLowerBound , clockwise ) ;
2017-06-02 12:03:46 +02:00
}
break ;
case GE_VTYPE_IDX_16BIT > > GE_VTYPE_IDX_SHIFT :
for ( int j = i ; j < = lastMatch ; j + + ) {
2018-04-27 21:49:43 +08:00
bool clockwise = true ;
2018-08-23 12:46:09 +08:00
if ( gstate . isCullEnabled ( ) & & gstate . getCullMode ( ) ! = drawCalls [ j ] . cullMode ) {
2018-04-27 21:49:43 +08:00
clockwise = false ;
2018-04-27 16:33:35 +08:00
}
2018-04-27 21:49:43 +08:00
indexGen . TranslatePrim ( drawCalls [ j ] . prim , drawCalls [ j ] . vertexCount , ( const u16_le * ) drawCalls [ j ] . inds , indexLowerBound , clockwise ) ;
2017-06-02 12:03:46 +02:00
}
break ;
case GE_VTYPE_IDX_32BIT > > GE_VTYPE_IDX_SHIFT :
for ( int j = i ; j < = lastMatch ; j + + ) {
2018-04-27 21:49:43 +08:00
bool clockwise = true ;
2018-08-23 12:46:09 +08:00
if ( gstate . isCullEnabled ( ) & & gstate . getCullMode ( ) ! = drawCalls [ j ] . cullMode ) {
2018-04-27 21:49:43 +08:00
clockwise = false ;
2018-04-27 16:33:35 +08:00
}
2018-04-27 21:49:43 +08:00
indexGen . TranslatePrim ( drawCalls [ j ] . prim , drawCalls [ j ] . vertexCount , ( const u32_le * ) drawCalls [ j ] . inds , indexLowerBound , clockwise ) ;
2017-06-02 12:03:46 +02:00
}
break ;
}
const int vertexCount = indexUpperBound - indexLowerBound + 1 ;
// This check is a workaround for Pangya Fantasy Golf, which sends bogus index data when switching items in "My Room" sometimes.
if ( decodedVerts + vertexCount > VERTEX_BUFFER_MAX ) {
return ;
}
// 3. Decode that range of vertex data.
dec_ - > DecodeVerts ( dest + decodedVerts * ( int ) dec_ - > GetDecVtxFmt ( ) . stride ,
dc . verts , indexLowerBound , indexUpperBound ) ;
decodedVerts + = vertexCount ;
// 4. Advance indexgen vertex counter.
indexGen . Advance ( vertexCount ) ;
i = lastMatch ;
}
}
inline u32 ComputeMiniHashRange ( const void * ptr , size_t sz ) {
2021-01-30 18:40:18 +01:00
// Switch to u32 units, and round up to avoid unaligned accesses.
// Probably doesn't matter if we skip the first few bytes in some cases.
const u32 * p = ( const u32 * ) ( ( ( uintptr_t ) ptr + 3 ) & ~ 3 ) ;
2017-06-02 12:03:46 +02:00
sz > > = 2 ;
if ( sz > 100 ) {
size_t step = sz / 4 ;
u32 hash = 0 ;
for ( size_t i = 0 ; i < sz ; i + = step ) {
2020-08-27 20:37:49 -07:00
hash + = XXH3_64bits ( p + i , 100 ) ;
2017-06-02 12:03:46 +02:00
}
return hash ;
} else {
return p [ 0 ] + p [ sz - 1 ] ;
}
}
u32 DrawEngineCommon : : ComputeMiniHash ( ) {
u32 fullhash = 0 ;
const int vertexSize = dec_ - > GetDecVtxFmt ( ) . stride ;
const int indexSize = IndexSize ( dec_ - > VertexType ( ) ) ;
int step ;
if ( numDrawCalls < 3 ) {
step = 1 ;
} else if ( numDrawCalls < 8 ) {
step = 4 ;
} else {
step = numDrawCalls / 8 ;
}
for ( int i = 0 ; i < numDrawCalls ; i + = step ) {
const DeferredDrawCall & dc = drawCalls [ i ] ;
if ( ! dc . inds ) {
fullhash + = ComputeMiniHashRange ( dc . verts , vertexSize * dc . vertexCount ) ;
} else {
int indexLowerBound = dc . indexLowerBound , indexUpperBound = dc . indexUpperBound ;
fullhash + = ComputeMiniHashRange ( ( const u8 * ) dc . verts + vertexSize * indexLowerBound , vertexSize * ( indexUpperBound - indexLowerBound ) ) ;
fullhash + = ComputeMiniHashRange ( dc . inds , indexSize * dc . vertexCount ) ;
}
}
return fullhash ;
}
2020-08-27 20:37:49 -07:00
uint64_t DrawEngineCommon : : ComputeHash ( ) {
uint64_t fullhash = 0 ;
2017-06-02 12:03:46 +02:00
const int vertexSize = dec_ - > GetDecVtxFmt ( ) . stride ;
const int indexSize = IndexSize ( dec_ - > VertexType ( ) ) ;
// TODO: Add some caps both for numDrawCalls and num verts to check?
// It is really very expensive to check all the vertex data so often.
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
const DeferredDrawCall & dc = drawCalls [ i ] ;
if ( ! dc . inds ) {
2020-08-27 20:37:49 -07:00
fullhash + = XXH3_64bits ( ( const char * ) dc . verts , vertexSize * dc . vertexCount ) ;
2017-06-02 12:03:46 +02:00
} else {
int indexLowerBound = dc . indexLowerBound , indexUpperBound = dc . indexUpperBound ;
int j = i + 1 ;
int lastMatch = i ;
while ( j < numDrawCalls ) {
if ( drawCalls [ j ] . verts ! = dc . verts )
break ;
indexLowerBound = std : : min ( indexLowerBound , ( int ) dc . indexLowerBound ) ;
indexUpperBound = std : : max ( indexUpperBound , ( int ) dc . indexUpperBound ) ;
lastMatch = j ;
j + + ;
}
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
// we do when drawing.
2020-08-27 20:37:49 -07:00
fullhash + = XXH3_64bits ( ( const char * ) dc . verts + vertexSize * indexLowerBound ,
vertexSize * ( indexUpperBound - indexLowerBound ) ) ;
2017-06-02 12:03:46 +02:00
// Hm, we will miss some indices when combining above, but meh, it should be fine.
2020-08-27 20:37:49 -07:00
fullhash + = XXH3_64bits ( ( const char * ) dc . inds , indexSize * dc . vertexCount ) ;
2017-06-02 12:03:46 +02:00
i = lastMatch ;
}
}
2020-08-27 20:37:49 -07:00
fullhash + = XXH3_64bits ( & drawCalls [ 0 ] . uvScale , sizeof ( drawCalls [ 0 ] . uvScale ) * numDrawCalls ) ;
2017-06-02 12:03:46 +02:00
return fullhash ;
}
2018-02-26 11:02:52 +01:00
2018-03-05 12:24:02 +01:00
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
2018-04-27 16:33:35 +08:00
void DrawEngineCommon : : SubmitPrim ( void * verts , void * inds , GEPrimitiveType prim , int vertexCount , u32 vertTypeID , int cullMode , int * bytesRead ) {
2018-02-26 11:02:52 +01:00
if ( ! indexGen . PrimCompatible ( prevPrim_ , prim ) | | numDrawCalls > = MAX_DEFERRED_DRAW_CALLS | | vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX ) {
DispatchFlush ( ) ;
}
// TODO: Is this the right thing to do?
if ( prim = = GE_PRIM_KEEP_PREVIOUS ) {
prim = prevPrim_ ! = GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS ;
} else {
prevPrim_ = prim ;
}
2018-03-02 12:10:32 +01:00
// If vtype has changed, setup the vertex decoder.
if ( vertTypeID ! = lastVType_ ) {
dec_ = GetVertexDecoder ( vertTypeID ) ;
lastVType_ = vertTypeID ;
}
2018-02-26 11:02:52 +01:00
* bytesRead = vertexCount * dec_ - > VertexSize ( ) ;
2020-09-20 20:35:42 +02:00
// Check that we have enough vertices to form the requested primitive.
2018-02-26 11:02:52 +01:00
if ( ( vertexCount < 2 & & prim > 0 ) | | ( vertexCount < 3 & & prim > 2 & & prim ! = GE_PRIM_RECTANGLES ) )
return ;
if ( g_Config . bVertexCache ) {
u32 dhash = dcid_ ;
2018-02-26 16:39:38 +01:00
dhash = __rotl ( dhash ^ ( u32 ) ( uintptr_t ) verts , 13 ) ;
dhash = __rotl ( dhash ^ ( u32 ) ( uintptr_t ) inds , 13 ) ;
2018-03-05 12:24:02 +01:00
dhash = __rotl ( dhash ^ ( u32 ) vertTypeID , 13 ) ;
2018-02-26 16:39:38 +01:00
dhash = __rotl ( dhash ^ ( u32 ) vertexCount , 13 ) ;
2018-02-27 20:40:44 +01:00
dcid_ = dhash ^ ( u32 ) prim ;
2018-02-26 11:02:52 +01:00
}
2018-03-05 12:24:02 +01:00
DeferredDrawCall & dc = drawCalls [ numDrawCalls ] ;
dc . verts = verts ;
dc . inds = inds ;
dc . indexType = ( vertTypeID & GE_VTYPE_IDX_MASK ) > > GE_VTYPE_IDX_SHIFT ;
dc . prim = prim ;
dc . vertexCount = vertexCount ;
dc . uvScale = gstate_c . uv ;
2018-04-27 16:33:35 +08:00
dc . cullMode = cullMode ;
2018-03-05 12:24:02 +01:00
2018-02-26 11:02:52 +01:00
if ( inds ) {
2018-03-05 12:24:02 +01:00
GetIndexBounds ( inds , vertexCount , vertTypeID , & dc . indexLowerBound , & dc . indexUpperBound ) ;
2018-02-26 11:02:52 +01:00
} else {
dc . indexLowerBound = 0 ;
dc . indexUpperBound = vertexCount - 1 ;
}
numDrawCalls + + ;
vertexCountInDrawCalls_ + = vertexCount ;
2018-04-10 12:22:02 +02:00
if ( g_Config . bSoftwareSkinning & & ( vertTypeID & GE_VTYPE_WEIGHT_MASK ) ) {
2018-02-26 11:02:52 +01:00
DecodeVertsStep ( decoded , decodeCounter_ , decodedVerts_ ) ;
decodeCounter_ + + ;
}
if ( prim = = GE_PRIM_RECTANGLES & & ( gstate . getTextureAddress ( 0 ) & 0x3FFFFFFF ) = = ( gstate . getFrameBufAddress ( ) & 0x3FFFFFFF ) ) {
2019-02-08 15:02:31 +01:00
// Rendertarget == texture? Shouldn't happen. Still, try some mitigations.
gstate_c . Dirty ( DIRTY_TEXTURE_PARAMS ) ;
DispatchFlush ( ) ;
2018-02-26 11:02:52 +01:00
}
}
2018-06-28 01:41:16 +09:00
2020-04-04 11:14:32 -07:00
bool DrawEngineCommon : : CanUseHardwareTransform ( int prim ) {
2020-04-04 11:21:22 -07:00
if ( ! useHWTransform_ )
2020-04-04 11:14:32 -07:00
return false ;
return ! gstate . isModeThrough ( ) & & prim ! = GE_PRIM_RECTANGLES ;
}
bool DrawEngineCommon : : CanUseHardwareTessellation ( GEPatchPrimType prim ) {
2020-04-04 11:52:32 -07:00
if ( useHWTessellation_ ) {
2020-04-04 11:14:32 -07:00
return CanUseHardwareTransform ( PatchPrimToPrim ( prim ) ) ;
}
return false ;
}
2018-07-13 18:35:44 +09:00
void TessellationDataTransfer : : CopyControlPoints ( float * pos , float * tex , float * col , int posStride , int texStride , int colStride , const SimpleVertex * const * points , int size , u32 vertType ) {
2018-06-28 01:41:16 +09:00
bool hasColor = ( vertType & GE_VTYPE_COL_MASK ) ! = 0 ;
bool hasTexCoord = ( vertType & GE_VTYPE_TC_MASK ) ! = 0 ;
for ( int i = 0 ; i < size ; + + i ) {
memcpy ( pos , points [ i ] - > pos . AsArray ( ) , 3 * sizeof ( float ) ) ;
pos + = posStride ;
}
if ( hasTexCoord ) {
for ( int i = 0 ; i < size ; + + i ) {
memcpy ( tex , points [ i ] - > uv , 2 * sizeof ( float ) ) ;
tex + = texStride ;
}
}
if ( hasColor ) {
for ( int i = 0 ; i < size ; + + i ) {
memcpy ( col , Vec4f : : FromRGBA ( points [ i ] - > color_32 ) . AsArray ( ) , 4 * sizeof ( float ) ) ;
col + = colStride ;
}
}
}