Compare commits

..

4 commits

Author SHA1 Message Date
Henrik Rydgård
1987169c81 OpenGL: When possible, avoid rebinding vertex arrays between glDrawArrays
Profitable optimization in DrawArrays-heavy games like GTA.
2023-05-17 18:42:23 +02:00
Henrik Rydgård
a9f2a7d7cd OpenGL: For contiguous DrawArrays, avoid re-binding the vertex buffer if possible. 2023-05-17 17:57:47 +02:00
Henrik Rydgård
78834a7424 Break out EnableDisableVertexArrays 2023-05-17 17:47:00 +02:00
Henrik Rydgård
1409d2dec4 Remove vestigial support for multiple vertex streams in OpenGL renderer
Unused, and made things more complex. Might do this later for all
backends.
2023-05-17 17:44:08 +02:00
878 changed files with 32516 additions and 62369 deletions

View file

@ -139,6 +139,12 @@ jobs:
cxx: clang++
args: cd android && ./ab.sh -j2 APP_ABI=armeabi-v7a UNITTEST=1 HEADLESS=1
id: android-arm32
- os: ubuntu-latest
extra: android
cc: clang
cxx: clang++
args: cd android && ./ab.sh -j2 APP_ABI=x86 UNITTEST=1 HEADLESS=1
id: android-x86_32
- os: ubuntu-latest
extra: android
cc: clang
@ -149,8 +155,14 @@ jobs:
extra: android
cc: clang
cxx: clang++
args: cd android && ./ab.sh -j2 APP_ABI=arm64-v8a OPENXR=1
id: android-vr
args: cd android && ./ab.sh -j2 APP_ABI=arm64-v8a OPENXR=1 OPENXR_PLATFORM_QUEST=1
id: android-vr-quest
- os: ubuntu-latest
extra: android
cc: clang
cxx: clang++
args: cd android && ./ab.sh -j2 APP_ABI=arm64-v8a OPENXR=1 OPENXR_PLATFORM_PICO=1
id: android-vr-pico
- os: ubuntu-latest
extra: android
cc: clang
@ -219,7 +231,7 @@ jobs:
run: |
sudo add-apt-repository -y "deb http://archive.ubuntu.com/ubuntu `lsb_release -sc` main universe restricted multiverse"
sudo apt-get update -y -qq
sudo apt-get install libsdl2-dev libgl1-mesa-dev libglu1-mesa-dev libsdl2-ttf-dev libfontconfig1-dev
sudo apt-get install libsdl2-dev libgl1-mesa-dev libglu1-mesa-dev
- name: Create macOS git-version.cpp for tagged release
if: startsWith(github.ref, 'refs/tags/') && runner.os == 'macOS' && matrix.extra == 'test'
@ -229,8 +241,6 @@ jobs:
- name: Setup ccache
uses: hendrikmuhs/ccache-action@v1.2
# Disable ccache on macos for now, it's become buggy for some reason.
if: matrix.id != 'macos'
with:
key: ${{ matrix.id }}
@ -333,7 +343,7 @@ jobs:
run: |
sudo add-apt-repository -y "deb http://archive.ubuntu.com/ubuntu `lsb_release -sc` main universe restricted multiverse"
sudo apt-get update -y -qq
sudo apt-get install libsdl2-dev libgl1-mesa-dev libglu1-mesa-dev libsdl2-ttf-dev libfontconfig1-dev
sudo apt-get install libsdl2-dev libgl1-mesa-dev libglu1-mesa-dev
- name: Install macOS dependencies
if: runner.os == 'macOS'

5
.gitignore vendored
View file

@ -105,7 +105,6 @@ Windows/*.ipch
# For vim
*.swp
tags
*.ctags
# Other VCS
.bzr/
@ -118,8 +117,6 @@ debian/ppsspp/
# Libretro build
*.o
*.tmp
*.a
# YouCompleteMe file
.ycm_extra_conf.pyc
@ -134,5 +131,3 @@ CMakeFiles
.cache/
build
libretro/obj/local
ppsspp_retroachievements.dat

View file

@ -158,4 +158,4 @@ libretro-build-tvos-arm64:
- .core-defs
- .cmake-defs
variables:
CORE_ARGS: -DIOS_PLATFORM=TVOS -DCMAKE_TOOLCHAIN_FILE=cmake/Toolchains/ios.cmake -DLIBRETRO=ON
CORE_ARGS: -DIOS_PLATFORM=TVOS -DUSE_FFMPEG=NO -DCMAKE_TOOLCHAIN_FILE=cmake/Toolchains/ios.cmake -DLIBRETRO=ON

9
.gitmodules vendored
View file

@ -44,12 +44,3 @@
[submodule "cpu_features"]
path = ext/cpu_features
url = https://github.com/google/cpu_features.git
[submodule "ext/rcheevos"]
path = ext/rcheevos
url = https://github.com/RetroAchievements/rcheevos.git
[submodule "ext/naett"]
path = ext/naett
url = https://github.com/erkkah/naett.git
[submodule "ext/libchdr"]
path = ext/libchdr
url = https://github.com/rtissera/libchdr.git

View file

@ -67,8 +67,6 @@ if(CMAKE_SYSTEM_PROCESSOR)
set(MIPS_DEVICE ON)
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^riscv64")
set(RISCV64_DEVICE ON)
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^loongarch64")
set(LOONGARCH64_DEVICE ON)
else()
message("Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
@ -141,7 +139,6 @@ option(ARMV7 "Set to ON if targeting an ARMv7 processor" ${ARMV7_DEVICE})
option(ARM "Set to ON if targeting an ARM processor" ${ARM_DEVICE})
option(MIPS "Set to ON if targeting a MIPS processor" ${MIPS_DEVICE})
option(RISCV64 "Set to ON if targeting a RISCV64 processor" ${RISCV64_DEVICE})
option(LOONGARCH64 "Set to ON if targeting a LOONGARCH64 processor" ${LOONGARCH64_DEVICE})
option(X86 "Set to ON if targeting an X86 processor" ${X86_DEVICE})
option(X86_64 "Set to ON if targeting an X86_64 processor" ${X86_64_DEVICE})
# :: Environments
@ -149,7 +146,7 @@ option(USING_EGL "Set to ON if target environment uses EGL" ${USING_EGL})
option(USING_FBDEV "Set to ON if target environment uses fbdev (eg. Pandora)" ${USING_FBDEV})
option(USING_GLES2 "Set to ON if target device uses OpenGL ES 2.0" ${USING_GLES2})
option(USING_X11_VULKAN "Set to OFF if target environment doesn't use X11 for Vulkan" ON)
option(USE_WAYLAND_WSI "Enable or disable Wayland WSI support for Vulkan" ON)
option(USE_WAYLAND_WSI "Enable or disable Wayland WSI support for Vulkan" ${USE_WAYLAND_WSI})
option(USE_VULKAN_DISPLAY_KHR "Enable or disable full screen display of Vulkan" ${USE_VULKAN_DISPLAY_KHR})
# :: Frontends
option(USING_QT_UI "Set to ON if you wish to use the Qt frontend wrapper" ${USING_QT_UI})
@ -177,20 +174,20 @@ option(USE_UBSAN "Use undefined behaviour sanitizer" OFF)
if(UNIX AND NOT (APPLE OR ANDROID) AND VULKAN)
if(USING_X11_VULKAN)
message("Using X11 for Vulkan")
find_package(X11)
include_directories(${X11_Xlib_INCLUDE_PATH})
add_definitions(-DVK_USE_PLATFORM_XLIB_KHR)
else()
message("NOT using X11 for Vulkan")
endif()
# add_definitions(-DVK_USE_PLATFORM_XCB_KHR)
find_package(Wayland)
if(NOT WAYLAND_FOUND)
message(STATUS "Could not find Wayland libraries, disabling Wayland WSI support for Vulkan.")
elseif(USE_WAYLAND_WSI)
include_directories(${WAYLAND_INCLUDE_DIR})
add_definitions(-DVK_USE_PLATFORM_WAYLAND_KHR)
if(USE_WAYLAND_WSI)
find_package(Wayland)
if(NOT WAYLAND_FOUND)
message(STATUS "Could not find Wayland libraries, disabling Wayland WSI support for Vulkan.")
else()
include_directories(${WAYLAND_INCLUDE_DIR})
add_definitions(-DVK_USE_PLATFORM_WAYLAND_KHR)
endif()
endif()
if(USE_VULKAN_DISPLAY_KHR)
@ -220,16 +217,6 @@ else()
set(CoreLinkType STATIC)
endif()
if(NOT ANDROID AND NOT WIN32 AND (NOT APPLE OR IOS))
set(HTTPS_NOT_AVAILABLE ON)
endif()
# Made this flag negative because it's hopefully quite temporary and didn't
# want to have to update all build systems.
if(HTTPS_NOT_AVAILABLE)
add_definitions(-DHTTPS_NOT_AVAILABLE)
endif()
# Work around for some misfeature of the current glslang build system
include_directories(ext/glslang)
@ -259,22 +246,11 @@ endif()
if(NOT LIBRETRO AND NOT IOS AND NOT MACOSX)
find_package(SDL2)
find_package(SDL2_ttf)
find_package(Fontconfig)
# TODO: this can be removed once CI supports newer SDL2_ttf
if (NOT SDL2_ttf_FOUND)
find_package(PkgConfig)
if(PkgConfig_FOUND)
pkg_check_modules(SDL2_ttf_PKGCONFIG IMPORTED_TARGET SDL2_ttf)
endif()
endif()
endif()
if(MACOSX AND NOT IOS)
if(USE_SYSTEM_LIBSDL2)
find_package(SDL2)
find_package(SDL2_ttf)
else()
find_library(SDL2Fwk SDL2 REQUIRED PATHS SDL/macOS)
message(STATUS "found SDL2Fwk=${SDL2Fwk}")
@ -328,9 +304,6 @@ endif()
if(RISCV64)
message("Generating for RISCV64, ${CMAKE_BUILD_TYPE}")
endif()
if(LOONGARCH64)
message("Generating for LOONGARCH64, ${CMAKE_BUILD_TYPE}")
endif()
if(X86)
message("Generating for x86, ${CMAKE_BUILD_TYPE}")
endif()
@ -406,8 +379,6 @@ if(NOT MSVC)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -parallel -fopenmp")
endif()
add_definitions(-fno-math-errno)
if(X86 OR X86_64)
# enable sse2 code generation
add_definitions(-msse2)
@ -433,13 +404,8 @@ if(NOT MSVC)
add_definitions(-Wno-psabi)
endif()
add_definitions(-D_XOPEN_SOURCE=700)
add_definitions(-D_XOPEN_SOURCE_EXTENDED -D__BSD_VISIBLE=1 -D_BSD_SOURCE -D_DEFAULT_SOURCE)
if(CMAKE_SYSTEM_NAME MATCHES "Linux|SunOS" OR MINGW)
add_definitions(-D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64)
endif()
if(${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
add_definitions(-D_NETBSD_SOURCE)
endif()
add_definitions(-D_XOPEN_SOURCE_EXTENDED -D__BSD_VISIBLE=1)
add_definitions(-D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64)
elseif(ANDROID)
add_definitions(-fsigned-char)
endif()
@ -451,7 +417,6 @@ else()
endif()
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_NDEBUG")
set(CMAKE_EXE_LINKER_FLAGS /NODEFAULTLIB:"libcmt.lib")
endif()
if(WIN32)
@ -544,14 +509,6 @@ set(CommonRISCV64
)
source_group(RISCV64 FILES ${CommonRISCV64})
set(CommonLOONGARCH64
${CommonJIT}
Common/LoongArchCPUDetect.cpp
Core/MIPS/fake/FakeJit.cpp
Core/MIPS/fake/FakeJit.h
)
source_group(LOONGARCH64 FILES ${CommonLOONGARCH64})
if(WIN32)
set(CommonD3D
Common/GPU/D3D9/D3D9ShaderCompiler.cpp
@ -590,7 +547,6 @@ add_library(Common STATIC
${CommonARM64}
${CommonMIPS}
${CommonRISCV64}
${CommonLOONGARCH64}
${CommonD3D}
${CommonVR}
Common/Serialize/Serializer.cpp
@ -610,7 +566,6 @@ add_library(Common STATIC
Common/Data/Collections/FixedSizeQueue.h
Common/Data/Collections/Hashmaps.h
Common/Data/Collections/TinySet.h
Common/Data/Collections/FastVec.h
Common/Data/Collections/ThreadSafeList.h
Common/Data/Color/RGBAUtil.cpp
Common/Data/Color/RGBAUtil.h
@ -675,8 +630,6 @@ add_library(Common STATIC
Common/File/FileDescriptor.h
Common/GPU/DataFormat.h
Common/GPU/MiscTypes.h
Common/GPU/GPUBackendCommon.cpp
Common/GPU/GPUBackendCommon.h
Common/GPU/thin3d.cpp
Common/GPU/thin3d.h
Common/GPU/thin3d_create.h
@ -698,8 +651,6 @@ add_library(Common STATIC
Common/GPU/OpenGL/GLFrameData.cpp
Common/GPU/OpenGL/GLFrameData.h
Common/GPU/OpenGL/thin3d_gl.cpp
Common/GPU/OpenGL/GLMemory.cpp
Common/GPU/OpenGL/GLMemory.h
Common/GPU/OpenGL/GLRenderManager.cpp
Common/GPU/OpenGL/GLRenderManager.h
Common/GPU/OpenGL/GLQueueRunner.cpp
@ -712,8 +663,6 @@ add_library(Common STATIC
Common/GPU/Vulkan/VulkanDebug.h
Common/GPU/Vulkan/VulkanContext.cpp
Common/GPU/Vulkan/VulkanContext.h
Common/GPU/Vulkan/VulkanDescSet.cpp
Common/GPU/Vulkan/VulkanDescSet.h
Common/GPU/Vulkan/VulkanFramebuffer.cpp
Common/GPU/Vulkan/VulkanFramebuffer.h
Common/GPU/Vulkan/VulkanImage.cpp
@ -753,10 +702,6 @@ add_library(Common STATIC
Common/Net/HTTPClient.h
Common/Net/HTTPHeaders.cpp
Common/Net/HTTPHeaders.h
Common/Net/HTTPNaettRequest.cpp
Common/Net/HTTPNaettRequest.h
Common/Net/HTTPRequest.cpp
Common/Net/HTTPRequest.h
Common/Net/HTTPServer.cpp
Common/Net/HTTPServer.h
Common/Net/NetBuffer.cpp
@ -781,8 +726,6 @@ add_library(Common STATIC
Common/Render/Text/draw_text.h
Common/Render/Text/draw_text_android.cpp
Common/Render/Text/draw_text_android.h
Common/Render/Text/draw_text_sdl.cpp
Common/Render/Text/draw_text_sdl.h
Common/Render/Text/draw_text_win.cpp
Common/Render/Text/draw_text_win.h
Common/Render/Text/draw_text_uwp.cpp
@ -793,8 +736,6 @@ add_library(Common STATIC
Common/System/NativeApp.h
Common/System/Request.cpp
Common/System/Request.h
Common/System/OSD.cpp
Common/System/OSD.h
Common/Thread/Channel.h
Common/Thread/ParallelLoop.cpp
Common/Thread/ParallelLoop.h
@ -813,8 +754,6 @@ add_library(Common STATIC
Common/UI/UI.h
Common/UI/Context.cpp
Common/UI/Context.h
Common/UI/IconCache.cpp
Common/UI/IconCache.h
Common/UI/UIScreen.cpp
Common/UI/UIScreen.h
Common/UI/Tween.cpp
@ -852,10 +791,8 @@ add_library(Common STATIC
Common/MemArenaDarwin.cpp
Common/MemArenaPosix.cpp
Common/MemArenaWin32.cpp
Common/MemArenaHorizon.cpp
Common/MemArena.h
Common/MemoryUtil.cpp
Common/MemoryUtilHorizon.cpp
Common/MemoryUtil.h
Common/OSVersion.cpp
Common/OSVersion.h
@ -901,11 +838,7 @@ if(USE_FFMPEG)
set(PLATFORM_ARCH "android/x86")
endif()
elseif(IOS)
if(IOS_PLATFORM STREQUAL "TVOS")
set(PLATFORM_ARCH "tvos/arm64")
else()
set(PLATFORM_ARCH "ios/universal")
endif()
set(PLATFORM_ARCH "ios/universal")
elseif(MACOSX)
set(PLATFORM_ARCH "macosx/universal")
elseif(LINUX)
@ -919,8 +852,6 @@ if(USE_FFMPEG)
set(PLATFORM_ARCH "linux/mips32")
elseif(RISCV64)
set(PLATFORM_ARCH "linux/riscv64")
elseif(LOONGARCH64)
set(PLATFORM_ARCH "linux/loongarch64")
elseif(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(PLATFORM_ARCH "linux/x86_64")
elseif(X86)
@ -1001,7 +932,6 @@ endif()
find_package(LIBZIP)
if(LIBZIP_FOUND AND USE_SYSTEM_LIBZIP)
include_directories(${LIBZIP_INCLUDE_DIRS})
add_definitions(-DSHARED_LIBZIP)
else()
add_library(libzip STATIC
@ -1219,6 +1149,15 @@ set(nativeExtra)
set(nativeExtraLibs)
if(ANDROID)
set(nativeExtra ${nativeExtra}
Common/GL/GLInterface/EGLAndroid.cpp
Common/GL/GLInterface/EGLAndroid.h
Common/GL/GLInterface/EGL.cpp
Common/GL/GLInterface/EGL.h
Common/GL/GLInterface/GLInterface.cpp
Common/GL/GLInterfaceBase.h
)
set(NativeAppSource ${NativeAppSource}
android/jni/app-android.cpp
android/jni/AndroidJavaGLContext.cpp
@ -1349,22 +1288,6 @@ else()
SDL/SDLVulkanGraphicsContext.cpp
)
endif()
if(SDL2_ttf_FOUND OR
(SDL2_ttf_PKGCONFIG_FOUND AND
SDL2_ttf_PKGCONFIG_VERSION VERSION_GREATER_EQUAL "2.0.18"))
add_definitions(-DUSE_SDL2_TTF)
if(FONTCONFIG_FOUND)
add_definitions(-DUSE_SDL2_TTF_FONTCONFIG)
set(nativeExtraLibs ${nativeExtraLibs} Fontconfig::Fontconfig)
endif()
elseif(SDL2_ttf_PKGCONFIG_FOUND)
message(WARNING "Found SDL2_ttf <2.0.18 - this is too old, falling back to atlas")
endif()
if(SDL2_ttf_FOUND)
set(nativeExtraLibs ${nativeExtraLibs} SDL2_ttf::SDL2_ttf)
elseif(SDL2_ttf_PKGCONFIG_FOUND)
set(nativeExtraLibs ${nativeExtraLibs} PkgConfig::SDL2_ttf_PKGCONFIG)
endif()
if(APPLE)
set(nativeExtra ${nativeExtra}
SDL/SDLMain.h
@ -1426,8 +1349,6 @@ list(APPEND NativeAppSource
UI/BackgroundAudio.cpp
UI/ChatScreen.h
UI/ChatScreen.cpp
UI/DebugOverlay.cpp
UI/DebugOverlay.h
UI/DevScreens.cpp
UI/DevScreens.h
UI/DisplayLayoutScreen.cpp
@ -1442,8 +1363,6 @@ list(APPEND NativeAppSource
UI/MiscScreens.cpp
UI/PauseScreen.h
UI/PauseScreen.cpp
UI/TabbedDialogScreen.h
UI/TabbedDialogScreen.cpp
UI/GameScreen.h
UI/GameScreen.cpp
UI/GameSettingsScreen.h
@ -1484,8 +1403,6 @@ list(APPEND NativeAppSource
UI/CustomButtonMappingScreen.cpp
UI/Theme.h
UI/Theme.cpp
UI/RetroAchievementScreens.cpp
UI/RetroAchievementScreens.h
)
if(ANDROID)
@ -1519,7 +1436,7 @@ if(LINUX AND NOT ANDROID)
endif()
set(ATOMIC_LIB)
if(ANDROID OR (LINUX AND ARM_DEVICE) OR (LINUX AND RISCV64) OR (LINUX AND LOONGARCH64))
if(ANDROID OR (LINUX AND ARM_DEVICE) OR (LINUX AND RISCV64))
set(ATOMIC_LIB atomic)
endif()
@ -1576,8 +1493,6 @@ set(CoreExtra)
set(CoreExtraLibs)
set(CoreExtra ${CoreExtra}
Core/MIPS/IR/IRAnalysis.cpp
Core/MIPS/IR/IRAnalysis.h
Core/MIPS/IR/IRCompALU.cpp
Core/MIPS/IR/IRCompBranch.cpp
Core/MIPS/IR/IRCompFPU.cpp
@ -1591,8 +1506,6 @@ set(CoreExtra ${CoreExtra}
Core/MIPS/IR/IRInterpreter.h
Core/MIPS/IR/IRJit.cpp
Core/MIPS/IR/IRJit.h
Core/MIPS/IR/IRNativeCommon.cpp
Core/MIPS/IR/IRNativeCommon.h
Core/MIPS/IR/IRPassSimplify.cpp
Core/MIPS/IR/IRPassSimplify.h
Core/MIPS/IR/IRRegCache.cpp
@ -1632,17 +1545,6 @@ list(APPEND CoreExtra
Core/MIPS/ARM64/Arm64RegCache.h
Core/MIPS/ARM64/Arm64RegCacheFPU.cpp
Core/MIPS/ARM64/Arm64RegCacheFPU.h
Core/MIPS/ARM64/Arm64IRAsm.cpp
Core/MIPS/ARM64/Arm64IRCompALU.cpp
Core/MIPS/ARM64/Arm64IRCompBranch.cpp
Core/MIPS/ARM64/Arm64IRCompFPU.cpp
Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp
Core/MIPS/ARM64/Arm64IRCompSystem.cpp
Core/MIPS/ARM64/Arm64IRCompVec.cpp
Core/MIPS/ARM64/Arm64IRJit.cpp
Core/MIPS/ARM64/Arm64IRJit.h
Core/MIPS/ARM64/Arm64IRRegCache.cpp
Core/MIPS/ARM64/Arm64IRRegCache.h
GPU/Common/VertexDecoderArm64.cpp
Core/Util/DisArm64.cpp
)
@ -1663,17 +1565,6 @@ list(APPEND CoreExtra
Core/MIPS/x86/RegCache.h
Core/MIPS/x86/RegCacheFPU.cpp
Core/MIPS/x86/RegCacheFPU.h
Core/MIPS/x86/X64IRAsm.cpp
Core/MIPS/x86/X64IRCompALU.cpp
Core/MIPS/x86/X64IRCompBranch.cpp
Core/MIPS/x86/X64IRCompFPU.cpp
Core/MIPS/x86/X64IRCompLoadStore.cpp
Core/MIPS/x86/X64IRCompSystem.cpp
Core/MIPS/x86/X64IRCompVec.cpp
Core/MIPS/x86/X64IRJit.cpp
Core/MIPS/x86/X64IRJit.h
Core/MIPS/x86/X64IRRegCache.cpp
Core/MIPS/x86/X64IRRegCache.h
GPU/Common/VertexDecoderX86.cpp
GPU/Software/DrawPixelX86.cpp
GPU/Software/SamplerX86.cpp
@ -1685,17 +1576,6 @@ list(APPEND CoreExtra
)
list(APPEND CoreExtra
Core/MIPS/RiscV/RiscVAsm.cpp
Core/MIPS/RiscV/RiscVCompALU.cpp
Core/MIPS/RiscV/RiscVCompBranch.cpp
Core/MIPS/RiscV/RiscVCompFPU.cpp
Core/MIPS/RiscV/RiscVCompLoadStore.cpp
Core/MIPS/RiscV/RiscVCompSystem.cpp
Core/MIPS/RiscV/RiscVCompVec.cpp
Core/MIPS/RiscV/RiscVJit.cpp
Core/MIPS/RiscV/RiscVJit.h
Core/MIPS/RiscV/RiscVRegCache.cpp
Core/MIPS/RiscV/RiscVRegCache.h
GPU/Common/VertexDecoderRiscV.cpp
)
@ -1916,8 +1796,6 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/CoreTiming.h
Core/CwCheat.cpp
Core/CwCheat.h
Core/FrameTiming.cpp
Core/FrameTiming.h
Core/HDRemaster.cpp
Core/HDRemaster.h
Core/Instance.cpp
@ -1926,8 +1804,6 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/KeyMap.h
Core/KeyMapDefaults.cpp
Core/KeyMapDefaults.h
Core/RetroAchievements.h
Core/RetroAchievements.cpp
Core/ThreadEventQueue.h
Core/TiltEventProcessor.h
Core/TiltEventProcessor.cpp
@ -1954,8 +1830,6 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/Debugger/WebSocket/GameBroadcaster.h
Core/Debugger/WebSocket/GameSubscriber.cpp
Core/Debugger/WebSocket/GameSubscriber.h
Core/Debugger/WebSocket/ClientConfigSubscriber.cpp
Core/Debugger/WebSocket/ClientConfigSubscriber.h
Core/Debugger/WebSocket/GPUBufferSubscriber.cpp
Core/Debugger/WebSocket/GPUBufferSubscriber.h
Core/Debugger/WebSocket/GPURecordSubscriber.cpp
@ -2265,8 +2139,6 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/Util/AudioFormat.h
Core/Util/GameManager.cpp
Core/Util/GameManager.h
Core/Util/GameDB.cpp
Core/Util/GameDB.h
Core/Util/PortManager.cpp
Core/Util/PortManager.h
Core/Util/BlockAllocator.cpp
@ -2325,18 +2197,9 @@ else()
include_directories(ext/zstd/lib)
endif()
include_directories(ext/libchdr/include)
target_link_libraries(${CoreLibName} Common native chdr kirk cityhash sfmt19937 xbrz xxhash rcheevos ${GlslangLibs}
target_link_libraries(${CoreLibName} Common native kirk cityhash sfmt19937 xbrz xxhash ${GlslangLibs}
${CoreExtraLibs} ${OPENGL_LIBRARIES} ${X11_LIBRARIES} ${CMAKE_DL_LIBS})
if(NOT HTTPS_NOT_AVAILABLE)
target_link_libraries(${CoreLibName} naett)
if(WIN32)
target_link_libraries(${CoreLibName} winhttp)
endif()
endif()
target_compile_features(${CoreLibName} PUBLIC cxx_std_17)
if(FFmpeg_FOUND)
@ -2483,12 +2346,11 @@ set(WindowsFiles
Windows/Debugger/Debugger_MemoryDlg.h
Windows/Debugger/Debugger_Lists.cpp
Windows/Debugger/Debugger_Lists.h
Windows/Debugger/Debugger_SymbolMap.h
Windows/Debugger/Debugger_VFPUDlg.cpp
Windows/Debugger/Debugger_VFPUDlg.h
Windows/Debugger/WatchItemWindow.cpp
Windows/Debugger/WatchItemWindow.h
Windows/Debugger/EditSymbolsWindow.cpp
Windows/Debugger/EditSymbolsWindow.h
Windows/GEDebugger/CtrlDisplayListView.cpp
Windows/GEDebugger/SimpleGLWindow.cpp
Windows/GEDebugger/TabState.cpp
@ -2532,13 +2394,6 @@ set(WindowsFiles
Windows/W32Util/ShellUtil.h
Windows/W32Util/TabControl.cpp
Windows/W32Util/TabControl.h
Windows/W32Util/IatHook.h
Windows/W32Util/ContextMenu.h
Windows/W32Util/ContextMenu.cpp
Windows/W32Util/DarkMode.h
Windows/W32Util/DarkMode.cpp
Windows/W32Util/UAHMenuBar.h
Windows/W32Util/UAHMenuBar.cpp
Windows/WindowsHost.cpp
Windows/WindowsHost.h
Windows/MainWindow.cpp
@ -2562,7 +2417,7 @@ set(WindowsFiles
list(APPEND LinkCommon ${CoreLibName} ${CMAKE_THREAD_LIBS_INIT})
if(WIN32)
list(APPEND LinkCommon kernel32 user32 gdi32 shell32 comctl32 dsound xinput d3d9 winmm dinput8 ole32 winspool ksuser mf uxtheme mfplat mfreadwrite mfuuid shlwapi)
list(APPEND LinkCommon kernel32 user32 gdi32 shell32 comctl32 dsound xinput d3d9 winmm dinput8 ole32 winspool ksuser mf mfplat mfreadwrite mfuuid shlwapi)
#setup_target_project(${TargetBin} Windows)
list(APPEND NativeAppSource ${WindowsFiles})
endif()

View file

@ -315,14 +315,6 @@ const u8* ARM64XEmitter::AlignCodePage()
return m_code;
}
const u8 *ARM64XEmitter::NopAlignCode16() {
int bytes = ((-(intptr_t)m_code) & 15);
for (int i = 0; i < bytes / 4; i++) {
Write32(0xD503201F); // official nop instruction
}
return m_code;
}
void ARM64XEmitter::FlushIcache()
{
FlushIcacheSection(m_lastCacheFlushEnd, m_code);
@ -514,7 +506,7 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi
distance >>= 2;
_assert_msg_(distance >= -0x2000 && distance <= 0x1FFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);
_assert_msg_(distance >= -0x1FFF && distance < 0x1FFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);
Rt = DecodeReg(Rt);
Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \
@ -2128,13 +2120,6 @@ void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd
(1 << 10) | (Rn << 5) | Rd);
}
void ARM64FloatEmitter::EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
Write32((1 << 30) | (U << 29) | (0b111100011 << 20) | (size << 22) | (opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
}
void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
{
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
@ -2234,45 +2219,6 @@ void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
EmitConvertScalarToInt(Rd, Rn, round, true);
}
void ARM64FloatEmitter::FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale) {
if (IsScalar(Rd)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rd);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
int rmode = 3;
int opcode = 0;
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
}
}
void ARM64FloatEmitter::FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale) {
if (IsScalar(Rd)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rd);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
int rmode = 3;
int opcode = 1;
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
}
}
void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn)
{
Rd = DecodeReg(Rd);
@ -2307,17 +2253,6 @@ void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd
(cond << 12) | (3 << 10) | (Rn << 5) | Rd);
}
void ARM64FloatEmitter::EmitCondCompare(bool M, bool S, CCFlags cond, int op, u8 nzcv, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rn), "%s doesn't support vector!", __FUNCTION__);
bool is_double = IsDouble(Rn);
Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm);
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \
(cond << 12) | (1 << 10) | (Rn << 5) | (op << 4) | nzcv);
}
void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
@ -2963,22 +2898,6 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
}
// Scalar - pairwise
void ARM64FloatEmitter::FADDP(ARM64Reg Rd, ARM64Reg Rn) {
EmitScalarPairwise(1, IsDouble(Rd), 0b01101, Rd, Rn);
}
void ARM64FloatEmitter::FMAXP(ARM64Reg Rd, ARM64Reg Rn) {
EmitScalarPairwise(1, IsDouble(Rd), 0b01111, Rd, Rn);
}
void ARM64FloatEmitter::FMINP(ARM64Reg Rd, ARM64Reg Rn) {
EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01111, Rd, Rn);
}
void ARM64FloatEmitter::FMAXNMP(ARM64Reg Rd, ARM64Reg Rn) {
EmitScalarPairwise(1, IsDouble(Rd), 0b01100, Rd, Rn);
}
void ARM64FloatEmitter::FMINNMP(ARM64Reg Rd, ARM64Reg Rn) {
EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01100, Rd, Rn);
}
// Scalar - 2 Source
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
@ -3061,12 +2980,6 @@ void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
EmitThreeSame(1, 2, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
EmitThreeSame(1, 3, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
{
u32 imm5 = 0;
@ -3102,9 +3015,6 @@ void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FADDP(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
EmitThreeSame(1, size >> 6, 0x1A, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, size >> 6, 0x1E, Rd, Rn, Rm);
@ -3137,14 +3047,6 @@ void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
}
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
int imm = size * 2 - scale;
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x1F, Rd, Rn);
}
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
int imm = size * 2 - scale;
EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x1F, Rd, Rn);
}
void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);
@ -3248,61 +3150,6 @@ void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
}
void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(true, size >> 4, 0b10001, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(false, size >> 4, 0b00111, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(false, size >> 4, 0b00110, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(true, size >> 4, 0b00110, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(true, size >> 4, 0b00111, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
EmitThreeSame(false, size >> 4, 0b10001, Rd, Rn, Rm);
}
void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01001, Rd, Rn);
}
void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01000, Rd, Rn);
}
void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01000, Rd, Rn);
}
void ARM64FloatEmitter::CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01001, Rd, Rn);
}
void ARM64FloatEmitter::CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01010, Rd, Rn);
}
// Move
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)
{
@ -3435,95 +3282,6 @@ void ARM64FloatEmitter::SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
EmitCopy(b64Bit, 0, imm5, 5, Rd, Rn);
}
void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh) {
Rd = DecodeReg(Rd);
u8 abc = abcdefgh >> 5;
u8 defgh = abcdefgh & 0x1F;
Write32((Q << 30) | (op << 29) | (0xF << 24) | (abc << 16) | (cmode << 12) | (o2 << 11) | (1 << 10) | (defgh << 5) | Rd);
}
void ARM64FloatEmitter::FMOV(u8 size, ARM64Reg Rd, u8 imm8) {
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
_assert_msg_(size == 32 || size == 64, "%s: unsupported size", __FUNCTION__);
_assert_msg_(IsQuad(Rd) || size == 32, "Use non-SIMD FMOV to load one double imm8");
EncodeModImm(IsQuad(Rd), size >> 6, 0b1111, 0, Rd, imm8);
}
void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift, bool MSL) {
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
_assert_msg_(size == 8 || size == 16 || size == 32 || size == 64, "%s: unsupported size %d", __FUNCTION__, size);
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
_assert_msg_(!MSL || (size == 32 && shift > 0 && shift <= 16), "MOVI MSL shift requires size 32, shift must be 8 or 16");
_assert_msg_(size != 64 || shift == 0, "MOVI 64-bit imm cannot be shifted");
u8 cmode = 0;
if (size == 8)
cmode = 0b1110;
else if (size == 16)
cmode = 0b1000 | (shift >> 2);
else if (MSL)
cmode = 0b1100 | (shift >> 3);
else if (size == 32)
cmode = (shift >> 2);
else if (size == 64)
cmode = 0b1110;
else
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
EncodeModImm(IsQuad(Rd), size >> 6, cmode, 0, Rd, imm8);
}
void ARM64FloatEmitter::MVNI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift, bool MSL) {
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
_assert_msg_(!MSL || (size == 32 && shift > 0 && shift <= 16), "MVNI MSL shift requires size 32, shift must be 8 or 16");
u8 cmode = 0;
if (size == 16)
cmode = 0b1000 | (shift >> 2);
else if (MSL)
cmode = 0b1100 | (shift >> 3);
else if (size == 32)
cmode = (shift >> 2);
else
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
EncodeModImm(IsQuad(Rd), 1, cmode, 0, Rd, imm8);
}
void ARM64FloatEmitter::ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift) {
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
u8 cmode = 0;
if (size == 16)
cmode = 0b1001 | (shift >> 2);
else if (size == 32)
cmode = 0b0001 | (shift >> 2);
else
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
EncodeModImm(IsQuad(Rd), 0, cmode, 0, Rd, imm8);
}
void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift) {
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
u8 cmode = 0;
if (size == 16)
cmode = 0b1001 | (shift >> 2);
else if (size == 32)
cmode = 0b0001 | (shift >> 2);
else
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
EncodeModImm(IsQuad(Rd), 1, cmode, 0, Rd, imm8);
}
// One source
void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn)
{
@ -3586,38 +3344,22 @@ void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
{
if (IsScalar(Rn)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
}
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
}
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
{
if (IsScalar(Rn)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
}
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
}
void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)
@ -3674,14 +3416,6 @@ void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags con
EmitCondSelect(0, 0, cond, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FCCMP(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond) {
EmitCondCompare(0, 0, cond, 0, nzcv, Rn, Rm);
}
void ARM64FloatEmitter::FCCMPE(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond) {
EmitCondCompare(0, 0, cond, 1, nzcv, Rn, Rm);
}
// Permute
void ARM64FloatEmitter::UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
@ -3708,20 +3442,6 @@ void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
EmitPermute(size, 7, Rd, Rn, Rm);
}
void ARM64FloatEmitter::EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, int index) {
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
bool quad = IsQuad(Rd);
_assert_msg_(index >= 0 && index < 16 && (quad || index < 8), "%s start index out of bounds", __FUNCTION__);
_assert_msg_(IsQuad(Rd) == IsQuad(Rn) && IsQuad(Rd) == IsQuad(Rm), "%s operands not same size", __FUNCTION__);
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm);
Write32((quad << 30) | (0x17 << 25) | (Rm << 16) | (index << 11) | (Rn << 5) | Rd);
}
// Shift by immediate
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
{
@ -3747,12 +3467,6 @@ void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
{
USHLL(src_size, Rd, Rn, shift, true);
}
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
SHLL(src_size, Rd, Rn, false);
}
void ARM64FloatEmitter::SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
SHLL(src_size, Rd, Rn, true);
}
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
{
SXTL(src_size, Rd, Rn, false);
@ -3792,11 +3506,6 @@ void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift,
EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {
_assert_msg_(src_size <= 32, "%s shift amount cannot be 64", __FUNCTION__);
Emit2RegMisc(upper, 1, src_size >> 4, 0b10011, Rd, Rn);
}
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
{
_assert_msg_(shift > 0, "%s shift amount must be greater than zero!", __FUNCTION__);
@ -4187,131 +3896,20 @@ void ARM64FloatEmitter::MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch, bool
}
// TODO: Quite a few values could be generated easily using the MOVI instruction and friends.
void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate) {
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch) {
// TODO: Make it work with more element sizes
// TODO: Optimize - there are shorter solution for many values
ARM64Reg s = (ARM64Reg)(S0 + DecodeReg(Rd));
int ival;
memcpy(&ival, &value, 4);
uint8_t imm8;
if (ival == 0) { // Make sure to not catch negative zero here
// Prefer MOVI 0, which may have no latency on some CPUs.
MOVI(32, Rd, 0);
if (negate)
FNEG(32, Rd, Rd);
} else if (negate && FPImm8FromFloat(-value, &imm8)) {
FMOV(32, Rd, imm8);
} else if (FPImm8FromFloat(value, &imm8)) {
FMOV(32, Rd, imm8);
if (negate) {
FNEG(32, Rd, Rd);
}
} else if (TryAnyMOVI(32, Rd, ival)) {
if (negate) {
FNEG(32, Rd, Rd);
}
} else if (TryAnyMOVI(32, Rd, ival ^ 0x80000000)) {
if (!negate) {
FNEG(32, Rd, Rd);
}
EOR(Rd, Rd, Rd);
} else {
_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);
if (negate) {
ival ^= 0x80000000;
}
m_emit->MOVI2R(scratch, ival);
DUP(32, Rd, scratch);
MOVI2F(s, value, scratch);
DUP(32, Rd, Rd, 0);
}
}
bool ARM64FloatEmitter::TryMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
if (size == 8) {
// Can always do 8.
MOVI(size, Rd, elementValue & 0xFF);
return true;
} else if (size == 16) {
if ((elementValue & 0xFF00) == 0) {
MOVI(size, Rd, elementValue & 0xFF, 0);
return true;
} else if ((elementValue & 0x00FF) == 0) {
MOVI(size, Rd, (elementValue >> 8) & 0xFF, 8);
return true;
} else if ((elementValue & 0xFF00) == 0xFF00) {
MVNI(size, Rd, ~elementValue & 0xFF, 0);
return true;
} else if ((elementValue & 0x00FF) == 0x00FF) {
MVNI(size, Rd, (~elementValue >> 8) & 0xFF, 8);
return true;
}
return false;
} else if (size == 32) {
for (int shift = 0; shift < 32; shift += 8) {
uint32_t mask = 0xFFFFFFFF &~ (0xFF << shift);
if ((elementValue & mask) == 0) {
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift);
return true;
} else if ((elementValue & mask) == mask) {
MVNI(size, Rd, (~elementValue >> shift) & 0xFF, shift);
return true;
}
}
// Maybe an MSL shift will work?
for (int shift = 8; shift <= 16; shift += 8) {
uint32_t mask = 0xFFFFFFFF & ~(0xFF << shift);
uint32_t ones = (1 << shift) - 1;
uint32_t notOnes = 0xFFFFFF00 << shift;
if ((elementValue & mask) == ones) {
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
return true;
} else if ((elementValue & mask) == notOnes) {
MVNI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
return true;
}
}
return false;
} else if (size == 64) {
uint8_t imm8 = 0;
for (int i = 0; i < 8; ++i) {
uint8_t byte = (elementValue >> (i * 8)) & 0xFF;
if (byte != 0 && byte != 0xFF)
return false;
if (byte == 0xFF)
imm8 |= 1 << i;
}
// Didn't run into any partial bytes, so size 64 is doable.
MOVI(size, Rd, imm8);
return true;
}
return false;
}
bool ARM64FloatEmitter::TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
// Try the original size first in case that's more optimal.
if (TryMOVI(size, Rd, elementValue))
return true;
uint64_t value = elementValue;
if (size != 64) {
uint64_t masked = elementValue & ((1 << size) - 1);
for (int i = size; i < 64; ++i) {
value |= masked << i;
}
}
for (int attempt = 8; attempt <= 64; attempt += attempt) {
// Original size was already attempted above.
if (attempt != size) {
if (TryMOVI(attempt, Rd, value))
return true;
}
}
return false;
}
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
u32 val;
bool shift;

View file

@ -94,7 +94,7 @@ enum ARM64Reg
// R19-R28. R29 (FP), R30 (LR) are always saved and FP updated appropriately.
const u32 ALL_CALLEE_SAVED = 0x1FF80000;
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // q8-q15
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
@ -290,23 +290,6 @@ public:
}
m_shifttype = ST_LSL;
}
ArithOption(ARM64Reg Rd, bool index, bool signExtend) {
if (index)
m_shift = 4;
else
m_shift = 0;
m_destReg = Rd;
m_type = TYPE_EXTENDEDREG;
if (Is64Bit(Rd)) {
m_width = WIDTH_64BIT;
m_extend = EXTEND_UXTX;
} else {
m_width = WIDTH_32BIT;
m_extend = signExtend ? EXTEND_SXTW : EXTEND_UXTW;
}
m_shifttype = ST_LSL;
}
ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
{
m_destReg = Rd;
@ -418,7 +401,6 @@ public:
void ReserveCodeSpace(u32 bytes);
const u8* AlignCode16();
const u8* AlignCodePage();
const u8 *NopAlignCode16();
void FlushIcache();
void FlushIcacheSection(const u8* start, const u8* end);
u8* GetWritableCodePtr();
@ -820,13 +802,6 @@ public:
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
// Scalar - pairwise
void FADDP(ARM64Reg Rd, ARM64Reg Rn);
void FMAXP(ARM64Reg Rd, ARM64Reg Rn);
void FMINP(ARM64Reg Rd, ARM64Reg Rn);
void FMAXNMP(ARM64Reg Rd, ARM64Reg Rn);
void FMINNMP(ARM64Reg Rd, ARM64Reg Rn);
// Scalar - 2 Source
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
@ -851,12 +826,9 @@ public:
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FADDP(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
@ -866,8 +838,6 @@ public:
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
@ -898,18 +868,6 @@ public:
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
// Move
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
@ -917,18 +875,6 @@ public:
void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
// Vector immediates
void FMOV(u8 size, ARM64Reg Rd, u8 imm8);
// MSL means bits shifted in are 1s. For size=64, each bit of imm8 is expanded to 8 actual bits.
void MOVI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0, bool MSL = false);
void MVNI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0, bool MSL = false);
void ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
void BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
bool TryMOVI(u8 size, ARM64Reg Rd, uint64_t value);
// Allow using a different size. Unclear if there's a penalty.
bool TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t value);
// One source
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
@ -937,8 +883,6 @@ public:
// and one that outputs to a scalar fp register.
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
void FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale);
void FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale);
// Scalar convert int to float. No rounding mode specifier necessary.
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
@ -965,10 +909,6 @@ public:
// Conditional select
void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
// Conditional compare
void FCCMP(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond);
void FCCMPE(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond);
// Permute
void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
@ -976,17 +916,12 @@ public:
void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
// Related to permute, extract vector from pair (always by byte arrangement.)
void EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, int index);
// Shift by immediate
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
// Shift == src_size for these.
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
@ -1003,7 +938,7 @@ public:
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
// ABI related
void ABI_PushRegisters(uint32_t gpr_registers, uint32_t fp_registers);
@ -1018,7 +953,6 @@ private:
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
void EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
@ -1027,7 +961,6 @@ private:
void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCondCompare(bool M, bool S, CCFlags cond, int op, u8 nzcv, ARM64Reg Rn, ARM64Reg Rm);
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
@ -1041,11 +974,9 @@ private:
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh);
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);

View file

@ -32,15 +32,14 @@
#if defined(CPU_FEATURES_OS_LINUX)
#define USE_CPU_FEATURES 1
#endif
#elif PPSSPP_ARCH(ARM64)
#elif PPSSPP_ARCH(ARM64) && defined(__aarch64__)
#include "ext/cpu_features/include/cpuinfo_aarch64.h"
#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) || defined(CPU_FEATURES_OS_WINDOWS)
#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID)
#define USE_CPU_FEATURES 1
#endif
#endif
#include <cstring>
#include <ctype.h>
#include "Common/CommonTypes.h"
@ -55,7 +54,7 @@
std::string GetCPUBrandString();
#else
// No CPUID on ARM, so we'll have to read the registry
#include "Common/CommonWindows.h"
#include <windows.h>
std::string GetCPUBrandString() {
std::string cpu_string;

View file

@ -613,14 +613,6 @@ const u8 *ARMXEmitter::AlignCode16()
return code;
}
const u8 *ARMXEmitter::NopAlignCode16() {
int bytes = ((-(intptr_t)code) & 15);
for (int i = 0; i < bytes / 4; i++) {
Write32(0xE320F000); // one of many possible nops
}
return code;
}
const u8 *ARMXEmitter::AlignCodePage()
{
ReserveCodeSpace((-(intptr_t)code) & 4095);

View file

@ -446,8 +446,6 @@ public:
void ReserveCodeSpace(u32 bytes);
const u8 *AlignCode16();
const u8 *AlignCodePage();
const u8 *NopAlignCode16();
void FlushIcache();
void FlushIcacheSection(u8 *start, u8 *end);
u8 *GetWritableCodePtr();

View file

@ -17,7 +17,7 @@
// Reference : https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set
#include "ppsspp_config.h"
#if (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !defined(__EMSCRIPTEN__)
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#include "ext/cpu_features/include/cpuinfo_x86.h"

View file

@ -109,28 +109,13 @@ struct CPUInfo {
bool RiscV_D;
bool RiscV_C;
bool RiscV_V;
bool RiscV_B;
bool RiscV_Zicsr;
bool RiscV_Zba;
bool RiscV_Zbb;
bool RiscV_Zbc;
bool RiscV_Zbs;
// LoongArch specific extension flags.
bool LOONGARCH_CPUCFG;
bool LOONGARCH_LAM;
bool LOONGARCH_UAL;
bool LOONGARCH_FPU;
bool LOONGARCH_LSX;
bool LOONGARCH_LASX;
bool LOONGARCH_CRC32;
bool LOONGARCH_COMPLEX;
bool LOONGARCH_CRYPTO;
bool LOONGARCH_LVZ;
bool LOONGARCH_LBT_X86;
bool LOONGARCH_LBT_ARM;
bool LOONGARCH_LBT_MIPS;
bool LOONGARCH_PTW;
// Quirks
struct {
// Samsung Galaxy S7 devices (Exynos 8890) have a big.LITTLE configuration where the cacheline size differs between big and LITTLE.

View file

@ -10,11 +10,6 @@
#include "Common/Log.h"
#include "Common/MemoryUtil.h"
#if PPSSPP_PLATFORM(SWITCH)
#include <cstdio>
#include <switch.h>
#endif // PPSSPP_PLATFORM(SWITCH)
// Everything that needs to generate code should inherit from this.
// You get memory management for free, plus, you can use all emitter functions without
// having to prefix them with gen-> or something similar.
@ -32,7 +27,7 @@ public:
virtual const u8 *GetCodePtr() const = 0;
u8 *GetBasePtr() const {
u8 *GetBasePtr() {
return region;
}
@ -70,20 +65,9 @@ public:
// Call this before you generate any code.
void AllocCodeSpace(int size) {
region_size = size;
#if PPSSPP_PLATFORM(SWITCH)
Result rc = jitCreate(&jitController, size);
if(R_FAILED(rc)) {
printf("Failed to create Jitbuffer of size 0x%x err: 0x%x\n", size, rc);
}
printf("[NXJIT]: Initialized RX: %p RW: %p\n", jitController.rx_addr, jitController.rw_addr);
region = (u8 *)jitController.rx_addr;
writableRegion = (u8 *)jitController.rw_addr;
#else // PPSSPP_PLATFORM(SWITCH)
// The protection will be set to RW if PlatformIsWXExclusive.
region = (u8 *)AllocateExecutableMemory(region_size);
writableRegion = region;
#endif // !PPSSPP_PLATFORM(SWITCH)
T::SetCodePointer(region, writableRegion);
}
@ -151,13 +135,8 @@ public:
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
void FreeCodeSpace() {
#if !PPSSPP_PLATFORM(SWITCH)
ProtectMemoryPages(region, region_size, MEM_PROT_READ | MEM_PROT_WRITE);
FreeExecutableMemory(region, region_size);
#else // !PPSSPP_PLATFORM(SWITCH)
jitClose(&jitController);
printf("[NXJIT]: Jit closed\n");
#endif // PPSSPP_PLATFORM(SWITCH)
region = nullptr;
writableRegion = nullptr;
region_size = 0;
@ -197,7 +176,5 @@ private:
const uint8_t *writeStart_ = nullptr;
uint8_t *writableRegion = nullptr;
size_t writeEstimated_ = 0;
#if PPSSPP_PLATFORM(SWITCH)
Jit jitController;
#endif // PPSSPP_PLATFORM(SWITCH)
};

View file

@ -399,7 +399,6 @@
<ClInclude Include="..\ext\libpng17\pnglibconf.h" />
<ClInclude Include="..\ext\libpng17\pngpriv.h" />
<ClInclude Include="..\ext\libpng17\pngstruct.h" />
<ClInclude Include="..\ext\naett\naett.h" />
<ClInclude Include="..\ext\vma\vk_mem_alloc.h" />
<ClInclude Include="ABI.h" />
<ClInclude Include="Arm64Emitter.h" />
@ -451,13 +450,11 @@
<ClInclude Include="GPU\D3D9\D3D9ShaderCompiler.h" />
<ClInclude Include="GPU\D3D9\D3D9StateCache.h" />
<ClInclude Include="GPU\DataFormat.h" />
<ClInclude Include="GPU\GPUBackendCommon.h" />
<ClInclude Include="GPU\MiscTypes.h" />
<ClInclude Include="GPU\OpenGL\DataFormatGL.h" />
<ClInclude Include="GPU\OpenGL\gl3stub.h" />
<ClInclude Include="GPU\OpenGL\GLFeatures.h" />
<ClInclude Include="GPU\OpenGL\GLFrameData.h" />
<ClInclude Include="GPU\OpenGL\GLMemory.h" />
<ClInclude Include="GPU\OpenGL\GLQueueRunner.h" />
<ClInclude Include="GPU\OpenGL\GLRenderManager.h" />
<ClInclude Include="GPU\OpenGL\GLSLProgram.h" />
@ -472,7 +469,6 @@
<ClInclude Include="GPU\Vulkan\VulkanBarrier.h" />
<ClInclude Include="GPU\Vulkan\VulkanContext.h" />
<ClInclude Include="GPU\Vulkan\VulkanDebug.h" />
<ClInclude Include="GPU\Vulkan\VulkanDescSet.h" />
<ClInclude Include="GPU\Vulkan\VulkanFramebuffer.h" />
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h" />
<ClInclude Include="GPU\Vulkan\VulkanImage.h" />
@ -492,12 +488,10 @@
<ClInclude Include="Math\lin\vec3.h" />
<ClInclude Include="Math\math_util.h" />
<ClInclude Include="Math\Statistics.h" />
<ClInclude Include="Net\HTTPNaettRequest.h" />
<ClInclude Include="Net\NetBuffer.h" />
<ClInclude Include="Net\HTTPClient.h" />
<ClInclude Include="Net\HTTPHeaders.h" />
<ClInclude Include="Net\HTTPServer.h" />
<ClInclude Include="Net\HTTPRequest.h" />
<ClInclude Include="Net\Resolve.h" />
<ClInclude Include="Net\Sinks.h" />
<ClInclude Include="Net\URL.h" />
@ -509,7 +503,6 @@
<ClInclude Include="Render\Text\draw_text.h" />
<ClInclude Include="Render\Text\draw_text_android.h" />
<ClInclude Include="Render\Text\draw_text_qt.h" />
<ClInclude Include="Render\Text\draw_text_sdl.h" />
<ClInclude Include="Render\Text\draw_text_uwp.h" />
<ClInclude Include="Render\Text\draw_text_win.h" />
<ClInclude Include="LogReporting.h" />
@ -532,6 +525,27 @@
<ClInclude Include="Crypto\sha256.h" />
<ClInclude Include="DbgNew.h" />
<ClInclude Include="ExceptionHandlerSetup.h" />
<ClInclude Include="GL\GLInterfaceBase.h" />
<ClInclude Include="GL\GLInterface\EGL.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GL\GLInterface\EGLAndroid.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GraphicsContext.h" />
<ClInclude Include="Log.h" />
<ClInclude Include="LogManager.h" />
@ -545,7 +559,6 @@
<ClInclude Include="Swap.h" />
<ClInclude Include="SysError.h" />
<ClInclude Include="System\Display.h" />
<ClInclude Include="System\OSD.h" />
<ClInclude Include="System\Request.h" />
<ClInclude Include="System\NativeApp.h" />
<ClInclude Include="System\System.h" />
@ -561,7 +574,6 @@
<ClInclude Include="TimeUtil.h" />
<ClInclude Include="UI\AsyncImageFileView.h" />
<ClInclude Include="UI\Context.h" />
<ClInclude Include="UI\IconCache.h" />
<ClInclude Include="UI\PopupScreens.h" />
<ClInclude Include="UI\Root.h" />
<ClInclude Include="UI\Screen.h" />
@ -831,7 +843,6 @@
<ForcedIncludeFiles Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
</ForcedIncludeFiles>
</ClCompile>
<ClCompile Include="..\ext\naett\naett.c" />
<ClCompile Include="..\ext\vma\vk_mem_alloc.cpp" />
<ClCompile Include="ABI.cpp" />
<ClCompile Include="Arm64Emitter.cpp" />
@ -847,7 +858,6 @@
</ClCompile>
<ClCompile Include="ArmEmitter.cpp" />
<ClCompile Include="Buffer.cpp" />
<ClCompile Include="Data\Collections\FastVec.h" />
<ClCompile Include="Data\Color\RGBAUtil.cpp" />
<ClCompile Include="Data\Convert\SmallDataConvert.cpp" />
<ClCompile Include="Data\Encoding\Base64.cpp" />
@ -882,12 +892,10 @@
<ClCompile Include="GPU\D3D9\D3D9ShaderCompiler.cpp" />
<ClCompile Include="GPU\D3D9\D3D9StateCache.cpp" />
<ClCompile Include="GPU\D3D9\thin3d_d3d9.cpp" />
<ClCompile Include="GPU\GPUBackendCommon.cpp" />
<ClCompile Include="GPU\OpenGL\DataFormatGL.cpp" />
<ClCompile Include="GPU\OpenGL\gl3stub.c" />
<ClCompile Include="GPU\OpenGL\GLFeatures.cpp" />
<ClCompile Include="GPU\OpenGL\GLFrameData.cpp" />
<ClCompile Include="GPU\OpenGL\GLMemory.cpp" />
<ClCompile Include="GPU\OpenGL\GLQueueRunner.cpp" />
<ClCompile Include="GPU\OpenGL\GLRenderManager.cpp" />
<ClCompile Include="GPU\OpenGL\GLSLProgram.cpp" />
@ -901,7 +909,6 @@
<ClCompile Include="GPU\Vulkan\VulkanBarrier.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanContext.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanDebug.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanDescSet.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanFramebuffer.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanImage.cpp" />
@ -920,12 +927,10 @@
<ClCompile Include="Math\lin\vec3.cpp" />
<ClCompile Include="Math\math_util.cpp" />
<ClCompile Include="Math\Statistics.cpp" />
<ClCompile Include="Net\HTTPNaettRequest.cpp" />
<ClCompile Include="Net\NetBuffer.cpp" />
<ClCompile Include="Net\HTTPClient.cpp" />
<ClCompile Include="Net\HTTPHeaders.cpp" />
<ClCompile Include="Net\HTTPServer.cpp" />
<ClCompile Include="Net\HTTPRequest.cpp" />
<ClCompile Include="Net\Resolve.cpp" />
<ClCompile Include="Net\Sinks.cpp" />
<ClCompile Include="Net\URL.cpp" />
@ -937,13 +942,11 @@
<ClCompile Include="Render\Text\draw_text.cpp" />
<ClCompile Include="Render\Text\draw_text_android.cpp" />
<ClCompile Include="Render\Text\draw_text_qt.cpp" />
<ClCompile Include="Render\Text\draw_text_sdl.cpp" />
<ClCompile Include="Render\Text\draw_text_uwp.cpp" />
<ClCompile Include="Render\Text\draw_text_win.cpp" />
<ClCompile Include="LogReporting.cpp" />
<ClCompile Include="RiscVCPUDetect.cpp" />
<ClCompile Include="RiscVEmitter.cpp" />
<ClCompile Include="LoongArchCPUDetect.cpp" />
<ClCompile Include="Serialize\Serializer.cpp" />
<ClCompile Include="Data\Convert\ColorConv.cpp" />
<ClCompile Include="ConsoleListener.cpp" />
@ -972,6 +975,36 @@
<ClCompile Include="Crypto\sha1.cpp" />
<ClCompile Include="Crypto\sha256.cpp" />
<ClCompile Include="ExceptionHandlerSetup.cpp" />
<ClCompile Include="GL\GLInterface\EGL.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GL\GLInterface\EGLAndroid.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GL\GLInterface\GLInterface.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="LogManager.cpp" />
<ClCompile Include="MemArenaAndroid.cpp" />
<ClCompile Include="MemArenaPosix.cpp" />
@ -983,7 +1016,6 @@
<ClCompile Include="OSVersion.cpp" />
<ClCompile Include="StringUtils.cpp" />
<ClCompile Include="System\Display.cpp" />
<ClCompile Include="System\OSD.cpp" />
<ClCompile Include="System\Request.cpp" />
<ClCompile Include="Thread\ParallelLoop.cpp" />
<ClCompile Include="Thread\ThreadManager.cpp" />
@ -992,7 +1024,6 @@
<ClCompile Include="TimeUtil.cpp" />
<ClCompile Include="UI\AsyncImageFileView.cpp" />
<ClCompile Include="UI\Context.cpp" />
<ClCompile Include="UI\IconCache.cpp" />
<ClCompile Include="UI\PopupScreens.cpp" />
<ClCompile Include="UI\Root.cpp" />
<ClCompile Include="UI\Screen.cpp" />

View file

@ -32,6 +32,15 @@
<ClInclude Include="ArmCommon.h" />
<ClInclude Include="BitSet.h" />
<ClInclude Include="CodeBlock.h" />
<ClInclude Include="GL\GLInterface\EGL.h">
<Filter>GL\GLInterface</Filter>
</ClInclude>
<ClInclude Include="GL\GLInterface\EGLAndroid.h">
<Filter>GL\GLInterface</Filter>
</ClInclude>
<ClInclude Include="GL\GLInterfaceBase.h">
<Filter>GL</Filter>
</ClInclude>
<ClInclude Include="GraphicsContext.h" />
<ClInclude Include="DbgNew.h" />
<ClInclude Include="OSVersion.h" />
@ -440,6 +449,9 @@
<ClInclude Include="Render\ManagedTexture.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="GPU\MiscTypes.h">
<Filter>GPU</Filter>
</ClInclude>
<ClInclude Include="GPU\Vulkan\VulkanFramebuffer.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
@ -488,36 +500,6 @@
<ClInclude Include="File\AndroidContentURI.h">
<Filter>File</Filter>
</ClInclude>
<ClInclude Include="GPU\OpenGL\GLMemory.h">
<Filter>GPU\OpenGL</Filter>
</ClInclude>
<ClInclude Include="GPU\GPUBackendCommon.h">
<Filter>GPU</Filter>
</ClInclude>
<ClInclude Include="GPU\MiscTypes.h">
<Filter>GPU</Filter>
</ClInclude>
<ClInclude Include="UI\IconCache.h">
<Filter>UI</Filter>
</ClInclude>
<ClInclude Include="System\OSD.h">
<Filter>System</Filter>
</ClInclude>
<ClInclude Include="Net\HTTPRequest.h">
<Filter>Net</Filter>
</ClInclude>
<ClInclude Include="..\ext\naett\naett.h">
<Filter>ext\naett</Filter>
</ClInclude>
<ClInclude Include="Net\HTTPNaettRequest.h">
<Filter>Net</Filter>
</ClInclude>
<ClInclude Include="Render\Text\draw_text_sdl.h">
<Filter>Render\Text</Filter>
</ClInclude>
<ClInclude Include="GPU\Vulkan\VulkanDescSet.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ABI.cpp" />
@ -544,6 +526,15 @@
</ClCompile>
<ClCompile Include="MipsEmitter.cpp" />
<ClCompile Include="Arm64Emitter.cpp" />
<ClCompile Include="GL\GLInterface\EGL.cpp">
<Filter>GL\GLInterface</Filter>
</ClCompile>
<ClCompile Include="GL\GLInterface\EGLAndroid.cpp">
<Filter>GL\GLInterface</Filter>
</ClCompile>
<ClCompile Include="GL\GLInterface\GLInterface.cpp">
<Filter>GL\GLInterface</Filter>
</ClCompile>
<ClCompile Include="MemArenaPosix.cpp" />
<ClCompile Include="MemArenaWin32.cpp" />
<ClCompile Include="MemArenaAndroid.cpp" />
@ -878,7 +869,6 @@
<Filter>GPU\Vulkan</Filter>
</ClCompile>
<ClCompile Include="RiscVEmitter.cpp" />
<ClCompile Include="LoongArchCPUDetect.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp">
<Filter>GPU\Vulkan</Filter>
</ClCompile>
@ -942,41 +932,17 @@
<ClCompile Include="File\AndroidContentURI.cpp">
<Filter>File</Filter>
</ClCompile>
<ClCompile Include="GPU\OpenGL\GLMemory.cpp">
<Filter>GPU\OpenGL</Filter>
</ClCompile>
<ClCompile Include="Data\Collections\FastVec.h">
<Filter>Data\Collections</Filter>
</ClCompile>
<ClCompile Include="GPU\GPUBackendCommon.cpp">
<Filter>GPU</Filter>
</ClCompile>
<ClCompile Include="UI\IconCache.cpp">
<Filter>UI</Filter>
</ClCompile>
<ClCompile Include="System\OSD.cpp">
<Filter>System</Filter>
</ClCompile>
<ClCompile Include="Net\HTTPRequest.cpp">
<Filter>Net</Filter>
</ClCompile>
<ClCompile Include="..\ext\naett\naett.c">
<Filter>ext\naett</Filter>
</ClCompile>
<ClCompile Include="Net\HTTPNaettRequest.cpp">
<Filter>Net</Filter>
</ClCompile>
<ClCompile Include="Render\Text\draw_text_sdl.cpp">
<Filter>Render\Text</Filter>
</ClCompile>
<ClCompile Include="GPU\Vulkan\VulkanDescSet.cpp">
<Filter>GPU\Vulkan</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Crypto">
<UniqueIdentifier>{1b593f03-7b28-4707-9228-4981796f5589}</UniqueIdentifier>
</Filter>
<Filter Include="GL">
<UniqueIdentifier>{2f2ca112-9e26-499e-9cb9-38a78b4ac09d}</UniqueIdentifier>
</Filter>
<Filter Include="GL\GLInterface">
<UniqueIdentifier>{2c723cf4-75b6-406a-90c0-ebb7a13ba476}</UniqueIdentifier>
</Filter>
<Filter Include="Serialize">
<UniqueIdentifier>{7be79ad5-3520-46a1-a370-dce2a943978c}</UniqueIdentifier>
</Filter>
@ -1076,12 +1042,6 @@
<Filter Include="ext\basis_universal">
<UniqueIdentifier>{d6d5f6e0-1c72-496b-af11-6d52d5123033}</UniqueIdentifier>
</Filter>
<Filter Include="ext\naett">
<UniqueIdentifier>{34f45db9-5c08-49cb-b349-b9e760ce3213}</UniqueIdentifier>
</Filter>
<Filter Include="ext\libchdr">
<UniqueIdentifier>{b681797d-7747-487f-b448-5ef5b2d2805b}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<Text Include="..\ext\libpng17\CMakeLists.txt">

View file

@ -30,11 +30,8 @@
#include <unistd.h>
#include <errno.h>
#if (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !defined(__EMSCRIPTEN__)
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#define Crash() {asm ("int $3");}
#elif PPSSPP_PLATFORM(SWITCH)
// TODO: Implement Crash() for Switch, lets not use breakpoint for the time being
#define Crash() {*((volatile u32 *)0x0) = 0xDEADC0DE;}
#elif PPSSPP_ARCH(ARM)
#define Crash() {asm ("bkpt #0");}
#elif PPSSPP_ARCH(ARM64)

View file

@ -36,39 +36,6 @@ typedef signed __int64 s64;
#else
#ifdef __SWITCH__
// Some HID conflicts
#define KEY_UP PKEY_UP
#define KEY_DOWN PKEY_DOWN
// Other conflicts
#define Event _Event
#define Framebuffer _Framebuffer
#define Waitable _Waitable
#define ThreadContext _ThreadContext
#include <switch.h>
// Cleanup
#undef KEY_UP
#undef KEY_DOWN
#undef Event
#undef Framebuffer
#undef Waitable
#undef ThreadContext
// Conflicting types with libnx
#ifndef _u64
#define u64 _u64
#endif // _u64
#ifndef s64
#define s64 _s64
#endif // _s64
typedef unsigned char u_char;
typedef unsigned short u_short;
typedef unsigned int u_int;
typedef unsigned long u_long;
#endif // __SWITCH__
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;

View file

@ -358,7 +358,7 @@ void ConsoleListener::LogWriterThread()
for (char *Text = logLocal, *End = logLocal + logLocalSize; Text < End; )
{
LogLevel Level = LogLevel::LINFO;
LogTypes::LOG_LEVELS Level = LogTypes::LINFO;
char *next = (char *) memchr(Text + 1, '\033', End - Text);
size_t Len = next - Text;
@ -367,7 +367,7 @@ void ConsoleListener::LogWriterThread()
if (Text[0] == '\033' && Text + 1 < End)
{
Level = (LogLevel)(Text[1] - '0');
Level = (LogTypes::LOG_LEVELS) (Text[1] - '0');
Len -= 2;
Text += 2;
}
@ -384,7 +384,7 @@ void ConsoleListener::LogWriterThread()
delete [] logLocal;
}
void ConsoleListener::SendToThread(LogLevel Level, const char *Text)
void ConsoleListener::SendToThread(LogTypes::LOG_LEVELS Level, const char *Text)
{
// Oops, we're already quitting. Just do nothing.
if (logPendingWritePos == (u32) -1)
@ -462,7 +462,7 @@ void ConsoleListener::SendToThread(LogLevel Level, const char *Text)
SetEvent(hTriggerEvent);
}
void ConsoleListener::WriteToConsole(LogLevel Level, const char *Text, size_t Len)
void ConsoleListener::WriteToConsole(LogTypes::LOG_LEVELS Level, const char *Text, size_t Len)
{
_dbg_assert_msg_(IsOpen(), "Don't call this before opening the console.");
@ -479,20 +479,21 @@ void ConsoleListener::WriteToConsole(LogLevel Level, const char *Text, size_t Le
WORD Color;
static wchar_t tempBuf[2048];
switch (Level) {
case LogLevel::LNOTICE: // light green
switch (Level)
{
case NOTICE_LEVEL: // light green
Color = FOREGROUND_GREEN | FOREGROUND_INTENSITY;
break;
case LogLevel::LERROR: // light red
case ERROR_LEVEL: // light red
Color = FOREGROUND_RED | FOREGROUND_INTENSITY;
break;
case LogLevel::LWARNING: // light yellow
case WARNING_LEVEL: // light yellow
Color = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY;
break;
case LogLevel::LINFO: // cyan
case INFO_LEVEL: // cyan
Color = FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY;
break;
case LogLevel::LDEBUG: // gray
case DEBUG_LEVEL: // gray
Color = FOREGROUND_INTENSITY;
break;
default: // off-white
@ -592,7 +593,7 @@ void ConsoleListener::PixelSpace(int Left, int Top, int Width, int Height, bool
COORD Coo = GetCoordinates(OldCursor, LBufWidth);
SetConsoleCursorPosition(hConsole, Coo);
// if (SLog.length() > 0) Log(LogLevel::LNOTICE, SLog.c_str());
// if (SLog.length() > 0) Log(LogTypes::LNOTICE, SLog.c_str());
// Resize the window too
if (Resize) MoveWindow(GetConsoleWindow(), Left,Top, (Width + 100),Height, true);
@ -614,16 +615,18 @@ void ConsoleListener::Log(const LogMessage &msg) {
char ColorAttr[16] = "";
char ResetAttr[16] = "";
if (bUseColor) {
if (bUseColor)
{
strcpy(ResetAttr, "\033[0m");
switch (msg.level) {
case LogLevel::LNOTICE: // light green
switch (msg.level)
{
case NOTICE_LEVEL: // light green
strcpy(ColorAttr, "\033[92m");
break;
case LogLevel::LERROR: // light red
case ERROR_LEVEL: // light red
strcpy(ColorAttr, "\033[91m");
break;
case LogLevel::LWARNING: // light yellow
case WARNING_LEVEL: // light yellow
strcpy(ColorAttr, "\033[93m");
break;
default:
@ -653,3 +656,5 @@ void ConsoleListener::ClearScreen(bool Cursor)
if (Cursor) SetConsoleCursorPosition(hConsole, coordScreen);
#endif
}

View file

@ -54,8 +54,8 @@ private:
static unsigned int WINAPI RunThread(void *lpParam);
void LogWriterThread();
void SendToThread(LogLevel Level, const char *Text);
void WriteToConsole(LogLevel Level, const char *Text, size_t Len);
void SendToThread(LogTypes::LOG_LEVELS Level, const char *Text);
void WriteToConsole(LogTypes::LOG_LEVELS Level, const char *Text, size_t Len);
static int refCount;
static HANDLE hThread;

View file

@ -62,7 +62,7 @@
/*
* MD5 context setup
*/
void ppsspp_md5_starts( md5_context *ctx )
void md5_starts( md5_context *ctx )
{
ctx->total[0] = 0;
ctx->total[1] = 0;
@ -73,7 +73,7 @@ void ppsspp_md5_starts( md5_context *ctx )
ctx->state[3] = 0x10325476;
}
static void ppsspp_md5_process( md5_context *ctx, unsigned char data[64] )
static void md5_process( md5_context *ctx, unsigned char data[64] )
{
unsigned long X[16], A, B, C, D;
@ -199,7 +199,7 @@ static void ppsspp_md5_process( md5_context *ctx, unsigned char data[64] )
/*
* MD5 process buffer
*/
void ppsspp_md5_update( md5_context *ctx, unsigned char *input, int ilen )
void md5_update( md5_context *ctx, unsigned char *input, int ilen )
{
int fill;
unsigned long left;
@ -220,7 +220,7 @@ void ppsspp_md5_update( md5_context *ctx, unsigned char *input, int ilen )
{
memcpy( (void *) (ctx->buffer + left),
(void *) input, fill );
ppsspp_md5_process( ctx, ctx->buffer );
md5_process( ctx, ctx->buffer );
input += fill;
ilen -= fill;
left = 0;
@ -228,7 +228,7 @@ void ppsspp_md5_update( md5_context *ctx, unsigned char *input, int ilen )
while( ilen >= 64 )
{
ppsspp_md5_process( ctx, input );
md5_process( ctx, input );
input += 64;
ilen -= 64;
}
@ -251,7 +251,7 @@ static const unsigned char md5_padding[64] =
/*
* MD5 final digest
*/
void ppsspp_md5_finish( md5_context *ctx, unsigned char output[16] )
void md5_finish( md5_context *ctx, unsigned char output[16] )
{
unsigned long last, padn;
unsigned long high, low;
@ -267,8 +267,8 @@ void ppsspp_md5_finish( md5_context *ctx, unsigned char output[16] )
last = ctx->total[0] & 0x3F;
padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
ppsspp_md5_update( ctx, (unsigned char *) md5_padding, padn );
ppsspp_md5_update( ctx, msglen, 8 );
md5_update( ctx, (unsigned char *) md5_padding, padn );
md5_update( ctx, msglen, 8 );
PUT_ULONG_LE( ctx->state[0], output, 0 );
PUT_ULONG_LE( ctx->state[1], output, 4 );
@ -279,13 +279,13 @@ void ppsspp_md5_finish( md5_context *ctx, unsigned char output[16] )
/*
* output = MD5( input buffer )
*/
void ppsspp_md5( unsigned char *input, int ilen, unsigned char output[16] )
void md5( unsigned char *input, int ilen, unsigned char output[16] )
{
md5_context ctx;
ppsspp_md5_starts( &ctx );
ppsspp_md5_update( &ctx, input, ilen );
ppsspp_md5_finish( &ctx, output );
md5_starts( &ctx );
md5_update( &ctx, input, ilen );
md5_finish( &ctx, output );
memset( &ctx, 0, sizeof( md5_context ) );
}
@ -293,14 +293,14 @@ void ppsspp_md5( unsigned char *input, int ilen, unsigned char output[16] )
/*
* MD5 HMAC context setup
*/
void ppsspp_md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen )
void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen )
{
int i;
unsigned char sum[16];
if( keylen > 64 )
{
ppsspp_md5( key, keylen, sum );
md5( key, keylen, sum );
keylen = 16;
key = sum;
}
@ -314,8 +314,8 @@ void ppsspp_md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen )
ctx->opad[i] = (unsigned char)( ctx->opad[i] ^ key[i] );
}
ppsspp_md5_starts( ctx );
ppsspp_md5_update( ctx, ctx->ipad, 64 );
md5_starts( ctx );
md5_update( ctx, ctx->ipad, 64 );
memset( sum, 0, sizeof( sum ) );
}
@ -323,23 +323,23 @@ void ppsspp_md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen )
/*
* MD5 HMAC process buffer
*/
void ppsspp_md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen )
void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen )
{
ppsspp_md5_update( ctx, input, ilen );
md5_update( ctx, input, ilen );
}
/*
* MD5 HMAC final digest
*/
void ppsspp_md5_hmac_finish( md5_context *ctx, unsigned char output[16] )
void md5_hmac_finish( md5_context *ctx, unsigned char output[16] )
{
unsigned char tmpbuf[16];
ppsspp_md5_finish( ctx, tmpbuf );
ppsspp_md5_starts( ctx );
ppsspp_md5_update( ctx, ctx->opad, 64 );
ppsspp_md5_update( ctx, tmpbuf, 16 );
ppsspp_md5_finish( ctx, output );
md5_finish( ctx, tmpbuf );
md5_starts( ctx );
md5_update( ctx, ctx->opad, 64 );
md5_update( ctx, tmpbuf, 16 );
md5_finish( ctx, output );
memset( tmpbuf, 0, sizeof( tmpbuf ) );
}
@ -347,14 +347,14 @@ void ppsspp_md5_hmac_finish( md5_context *ctx, unsigned char output[16] )
/*
* output = HMAC-MD5( hmac key, input buffer )
*/
void ppsspp_md5_hmac( unsigned char *key, int keylen, unsigned char *input, int ilen,
void md5_hmac( unsigned char *key, int keylen, unsigned char *input, int ilen,
unsigned char output[16] )
{
md5_context ctx;
ppsspp_md5_hmac_starts( &ctx, key, keylen );
ppsspp_md5_hmac_update( &ctx, input, ilen );
ppsspp_md5_hmac_finish( &ctx, output );
md5_hmac_starts( &ctx, key, keylen );
md5_hmac_update( &ctx, input, ilen );
md5_hmac_finish( &ctx, output );
memset( &ctx, 0, sizeof( md5_context ) );
}
@ -464,7 +464,7 @@ static const unsigned char md5_hmac_test_sum[7][16] =
/*
* Checkup routine
*/
int ppsspp_md5_self_test( int verbose )
int md5_self_test( int verbose )
{
int i, buflen;
unsigned char buf[1024];

View file

@ -46,7 +46,7 @@ extern "C" {
*
* \param ctx context to be initialized
*/
void ppsspp_md5_starts( md5_context *ctx );
void md5_starts( md5_context *ctx );
/**
* \brief MD5 process buffer
@ -55,7 +55,7 @@ void ppsspp_md5_starts( md5_context *ctx );
* \param input buffer holding the data
* \param ilen length of the input data
*/
void ppsspp_md5_update( md5_context *ctx, unsigned char *input, int ilen );
void md5_update( md5_context *ctx, unsigned char *input, int ilen );
/**
* \brief MD5 final digest
@ -63,7 +63,7 @@ void ppsspp_md5_update( md5_context *ctx, unsigned char *input, int ilen );
* \param ctx MD5 context
* \param output MD5 checksum result
*/
void ppsspp_md5_finish( md5_context *ctx, unsigned char output[16] );
void md5_finish( md5_context *ctx, unsigned char output[16] );
/**
* \brief Output = MD5( input buffer )
@ -72,7 +72,7 @@ void ppsspp_md5_finish( md5_context *ctx, unsigned char output[16] );
* \param ilen length of the input data
* \param output MD5 checksum result
*/
void ppsspp_md5( unsigned char *input, int ilen, unsigned char output[16] );
void md5( unsigned char *input, int ilen, unsigned char output[16] );
/**
* \brief Output = MD5( file contents )
@ -83,7 +83,7 @@ void ppsspp_md5( unsigned char *input, int ilen, unsigned char output[16] );
* \return 0 if successful, 1 if fopen failed,
* or 2 if fread failed
*/
int ppsspp_md5_file( char *path, unsigned char output[16] );
int md5_file( char *path, unsigned char output[16] );
/**
* \brief MD5 HMAC context setup
@ -92,7 +92,7 @@ int ppsspp_md5_file( char *path, unsigned char output[16] );
* \param key HMAC secret key
* \param keylen length of the HMAC key
*/
void ppsspp_md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
/**
* \brief MD5 HMAC process buffer
@ -101,7 +101,7 @@ void ppsspp_md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
* \param input buffer holding the data
* \param ilen length of the input data
*/
void ppsspp_md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
/**
* \brief MD5 HMAC final digest
@ -109,7 +109,7 @@ void ppsspp_md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
* \param ctx HMAC context
* \param output MD5 HMAC checksum result
*/
void ppsspp_md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
/**
* \brief Output = HMAC-MD5( hmac key, input buffer )
@ -120,7 +120,7 @@ void ppsspp_md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
* \param ilen length of the input data
* \param output HMAC-MD5 result
*/
void ppsspp_md5_hmac( unsigned char *key, int keylen,
void md5_hmac( unsigned char *key, int keylen,
unsigned char *input, int ilen,
unsigned char output[16] );
@ -129,7 +129,7 @@ void ppsspp_md5_hmac( unsigned char *key, int keylen,
*
* \return 0 if successful, or 1 if the test failed
*/
int ppsspp_md5_self_test( int verbose );
int md5_self_test( int verbose );
#ifdef __cplusplus
}

View file

@ -1,225 +0,0 @@
#pragma once
// Yet another replacement for std::vector, this time for use in graphics queues.
// Its major difference is that you can append uninitialized structures and initialize them after.
// This is not allows by std::vector but is very useful for our sometimes oversized unions.
// Also, copies during resize are done by memcpy, not by any move constructor or similar.
#include <cstdlib>
#include <cstring>
#ifdef _DEBUG
#include "Common/Log.h"
#endif
template<class T>
class FastVec {
public:
FastVec() {}
FastVec(size_t initialCapacity) {
capacity_ = initialCapacity;
data_ = (T *)malloc(initialCapacity * sizeof(T));
}
~FastVec() { if (data_) free(data_); }
T &push_uninitialized() {
if (size_ < capacity_) {
size_++;
return data_[size_ - 1];
} else {
ExtendByOne();
return data_[size_ - 1];
}
}
void push_back(const T &t) {
T &dest = push_uninitialized();
dest = t;
}
// Move constructor
FastVec(FastVec &&other) {
data_ = other.data_;
size_ = other.size_;
capacity_ = other.capacity_;
other.data_ = nullptr;
other.size_ = 0;
other.capacity_ = 0;
}
FastVec &operator=(FastVec &&other) {
if (this != &other) {
delete[] data_;
data_ = other.data_;
size_ = other.size_;
capacity_ = other.capacity_;
other.data_ = nullptr;
other.size_ = 0;
other.capacity_ = 0;
}
return *this;
}
// No copy constructor.
FastVec(const FastVec &other) = delete;
FastVec &operator=(const FastVec &other) = delete;
size_t size() const { return size_; }
size_t capacity() const { return capacity_; }
void clear() { size_ = 0; }
bool empty() const { return size_ == 0; }
const T *data() { return data_; }
T *begin() { return data_; }
T *end() { return data_ + size_; }
const T *begin() const { return data_; }
const T *end() const { return data_ + size_; }
// Out of bounds (past size() - 1) is undefined behavior.
T &operator[] (const size_t index) { return data_[index]; }
const T &operator[] (const size_t index) const { return data_[index]; }
T &at(const size_t index) { return data_[index]; }
const T &at(const size_t index) const { return data_[index]; }
// These two are invalid if empty().
const T &back() const { return (*this)[size() - 1]; }
const T &front() const { return (*this)[0]; }
// Limited functionality for inserts and similar, add as needed.
T &insert(T *iter) {
int pos = iter - data_;
ExtendByOne();
if (pos + 1 < (int)size_) {
memmove(data_ + pos + 1, data_ + pos, (size_ - pos) * sizeof(T));
}
return data_[pos];
}
void insert(T *destIter, const T *beginIter, const T *endIter) {
int pos = destIter - data_;
if (beginIter == endIter)
return;
size_t newItems = endIter - beginIter;
IncreaseCapacityTo(size_ + newItems);
memmove(data_ + pos + newItems, data_ + pos, (size_ - pos) * sizeof(T));
memcpy(data_ + pos, beginIter, newItems * sizeof(T));
size_ += newItems;
}
void resize(size_t size) {
if (size < size_) {
size_ = size;
} else {
// TODO
}
}
void reserve(size_t newCapacity) {
IncreaseCapacityTo(newCapacity);
}
void extend(const T *newData, size_t count) {
IncreaseCapacityTo(size_ + count);
memcpy(data_ + size_, newData, count * sizeof(T));
size_ += count;
}
T *extend_uninitialized(size_t count) {
size_t sz = size_;
if (size_ + count <= capacity_) {
size_ += count;
return &data_[sz];
} else {
size_t newCapacity = size_ + count * 2; // Leave some extra room when growing in all cases
if (newCapacity < capacity_ * 2) {
// Standard amortized O(1).
newCapacity = capacity_ * 2;
}
IncreaseCapacityTo(newCapacity);
size_ += count;
return &data_[sz];
}
}
void LockCapacity() {
#ifdef _DEBUG
capacityLocked_ = true;
#endif
}
private:
void IncreaseCapacityTo(size_t newCapacity) {
#ifdef _DEBUG
_dbg_assert_(!capacityLocked_);
#endif
if (newCapacity <= capacity_)
return;
T *oldData = data_;
data_ = (T *)malloc(sizeof(T) * newCapacity);
if (capacity_ != 0) {
memcpy(data_, oldData, sizeof(T) * size_);
free(oldData);
}
capacity_ = newCapacity;
}
void ExtendByOne() {
size_t newCapacity = capacity_ * 2;
if (newCapacity < 16) {
newCapacity = 16;
}
IncreaseCapacityTo(newCapacity);
size_++;
}
size_t size_ = 0;
size_t capacity_ = 0;
T *data_ = nullptr;
#ifdef _DEBUG
bool capacityLocked_ = false;
#endif
};
// Simple cyclical vector.
template <class T, size_t size>
class HistoryBuffer {
public:
T &Add(size_t index) {
#ifdef _DEBUG
_dbg_assert_((int64_t)index >= 0);
#endif
if (index > maxIndex_)
maxIndex_ = index;
T &entry = data_[index % size];
entry = T{};
return entry;
}
const T &Back(size_t index) const {
#ifdef _DEBUG
_dbg_assert_(index < maxIndex_ && index < size);
#endif
return data_[(maxIndex_ - index) % size];
}
// Out of bounds (past size() - 1) is undefined behavior.
T &operator[] (const size_t index) {
#ifdef _DEBUG
_dbg_assert_(index <= maxIndex_);
#endif
return data_[index % size];
}
const T &operator[] (const size_t index) const {
#ifdef _DEBUG
_dbg_assert_(index <= maxIndex_);
#endif
return data_[index % size];
}
size_t MaxIndex() const {
return maxIndex_;
}
private:
T data_[size]{};
size_t maxIndex_ = 0;
};

View file

@ -222,3 +222,4 @@ private:
volatile int curReadBlock;
volatile int curWriteBlock;
};

View file

@ -29,7 +29,7 @@ enum class BucketState : uint8_t {
// we always use very small values, so it's probably better to have them in the same
// cache-line as the corresponding key.
// Enforces that value are pointers to make sure that combined storage makes sense.
template <class Key, class Value>
template <class Key, class Value, Value NullValue>
class DenseHashMap {
public:
DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
@ -37,44 +37,23 @@ public:
state.resize(initialCapacity);
}
// Returns true if the entry was found, and writes the entry to *value.
// Returns false and does not write to value if no entry was found.
// Note that nulls can be stored.
bool Get(const Key &key, Value *value) const {
// Returns nullptr if no entry was found.
Value Get(const Key &key) {
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
*value = map[p].value;
return true;
} else if (state[p] == BucketState::FREE) {
return false;
}
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key))
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
// We looped around the whole map.
_assert_msg_(false, "DenseHashMap: Hit full on Get()");
}
}
return false;
}
// Only works if Value can be nullptr
Value GetOrNull(const Key &key) const {
Value value;
if (Get(key, &value)) {
return value;
} else {
return (Value)nullptr;
}
}
bool ContainsKey(const Key &key) const {
// Slightly wasteful, though compiler might optimize it.
Value value;
return Get(key, &value);
return NullValue;
}
// Asserts if we already had the key!
@ -135,7 +114,6 @@ public:
return false;
}
// This will never crash if you call it without locking - but, the value might not be right.
size_t size() const {
return count_;
}
@ -212,7 +190,7 @@ private:
// Like the above, uses linear probing for cache-friendliness.
// Does not perform hashing at all so expects well-distributed keys.
template <class Value>
template <class Value, Value NullValue>
class PrehashMap {
public:
PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
@ -221,24 +199,22 @@ public:
}
// Returns nullptr if no entry was found.
bool Get(uint32_t hash, Value *value) {
Value Get(uint32_t hash) {
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
*value = map[p].value;
return true;
} else if (state[p] == BucketState::FREE) {
return false;
}
if (state[p] == BucketState::TAKEN && hash == map[p].hash)
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
_assert_msg_(false, "PrehashMap: Hit full on Get()");
}
}
return false;
return NullValue;
}
// Returns false if we already had the key! Which is a bit different.

View file

@ -187,7 +187,7 @@ struct FixedTinyVec {
bool operator == (const FixedTinyVec<T, MaxSize> &other) const {
if (count_ != other.count_)
return false;
for (int i = 0; i < count_; i++) {
for (size_t i = 0; i < count_; i++) {
if (!(data_[i] == other.data_[i])) {
return false;
}

View file

@ -617,7 +617,6 @@ void ConvertRGB565ToBGR565(u16 *dst, const u16 *src, u32 numPixels) {
u32 i = 0;
#endif
// TODO: Add a 64-bit loop too.
const u32 *src32 = (const u32 *)src;
u32 *dst32 = (u32 *)dst;
for (; i < numPixels / 2; i++) {

View file

@ -219,15 +219,13 @@ int u8_strlen(const char *s)
}
/* reads the next utf-8 sequence out of a string, updating an index */
uint32_t u8_nextchar(const char *s, int *index) {
uint32_t u8_nextchar(const char *s, int *i) {
uint32_t ch = 0;
int sz = 0;
int i = *index;
do {
ch = (ch << 6) + (unsigned char)s[i++];
ch = (ch << 6) + (unsigned char)s[(*i)++];
sz++;
} while (s[i] && ((s[i]) & 0xC0) == 0x80);
*index = i;
} while (s[*i] && ((s[*i]) & 0xC0) == 0x80);
return ch - offsetsFromUTF8[sz - 1];
}
@ -428,17 +426,6 @@ int u8_is_locale_utf8(const char *locale)
return 0;
}
bool AnyEmojiInString(const char *s, size_t byteCount) {
int i = 0;
while (i < byteCount) {
uint32_t c = u8_nextchar(s, &i);
if (CodepointIsProbablyEmoji(c)) {
return true;
}
}
return false;
}
int UTF8StringNonASCIICount(const char *utf8string) {
UTF8 utf(utf8string);
int count = 0;
@ -571,12 +558,6 @@ std::u16string ConvertUTF8ToUCS2(const std::string &source) {
return dst;
}
std::string CodepointToUTF8(uint32_t codePoint) {
char temp[16]{};
UTF8::encode(temp, codePoint);
return std::string(temp);
}
#ifndef _WIN32
// Replacements for the Win32 wstring functions. Not to be used from emulation code!

View file

@ -26,15 +26,6 @@ int u8_strlen(const char *s);
void u8_inc(const char *s, int *i);
void u8_dec(const char *s, int *i);
inline bool CodepointIsProbablyEmoji(uint32_t c) {
// Original check was some ranges grabbed from https://stackoverflow.com/a/62898106.
// But let's just go with checking if outside the BMP, it's not a big deal if we accidentally
// switch to color when not needed if someone uses a weird glyph.
return c > 0xFFFF;
}
bool AnyEmojiInString(const char *s, size_t byteCount);
class UTF8 {
public:
static const uint32_t INVALID = (uint32_t)-1;
@ -98,8 +89,6 @@ bool UTF8StringHasNonASCII(const char *utf8string);
// Removes overlong encodings and similar.
std::string SanitizeUTF8(const std::string &utf8string);
std::string CodepointToUTF8(uint32_t codePoint);
// UTF8 to Win32 UTF-16
// Should be used when calling Win32 api calls

View file

@ -7,7 +7,6 @@
#include <inttypes.h>
// Hm, what's this for?
#ifndef _MSC_VER
#include <strings.h>
#endif
@ -20,17 +19,17 @@
#include <vector>
#include "Common/Data/Format/IniFile.h"
#include "Common/Data/Text/Parsers.h"
#include "Common/File/VFS/VFS.h"
#include "Common/File/FileUtil.h"
#include "Common/Log.h"
#include "Common/Math/math_util.h"
#include "Common/Data/Text/Parsers.h"
#ifdef _WIN32
#include "Common/Data/Encoding/Utf8.h"
#endif
#include "Common/StringUtils.h"
// This unescapes # signs.
// NOTE: These parse functions can make better use of the string_view - the pos argument should not be needed, for example.
static bool ParseLineKey(std::string_view line, size_t &pos, std::string *keyOut) {
static bool ParseLineKey(const std::string &line, size_t &pos, std::string *keyOut) {
std::string key = "";
while (pos < line.size()) {
@ -45,8 +44,7 @@ static bool ParseLineKey(std::string_view line, size_t &pos, std::string *keyOut
}
// Escaped.
key += line.substr(pos, next - pos - 1);
key.push_back('#');
key += line.substr(pos, next - pos - 1) + "#";
pos = next + 1;
} else if (line[next] == '=') {
// Hurray, done.
@ -62,11 +60,11 @@ static bool ParseLineKey(std::string_view line, size_t &pos, std::string *keyOut
return true;
}
static bool ParseLineValue(std::string_view line, size_t &pos, std::string *valueOut) {
static bool ParseLineValue(const std::string &line, size_t &pos, std::string *valueOut) {
std::string value = "";
std::string_view strippedLine = StripSpaces(line.substr(pos));
if (strippedLine.size() >= 2 && strippedLine[0] == '"' && strippedLine[strippedLine.size() - 1] == '"') {
std::string strippedLine = StripSpaces(line.substr(pos));
if (strippedLine[0] == '"' && strippedLine[strippedLine.size()-1] == '"') {
// Don't remove comment if is surrounded by " "
value += line.substr(pos);
pos = line.npos; // Won't enter the while below
@ -86,8 +84,7 @@ static bool ParseLineValue(std::string_view line, size_t &pos, std::string *valu
break;
} else {
// Escaped.
value += line.substr(pos, next - pos - 1);
value.push_back('#');
value += line.substr(pos, next - pos - 1) + "#";
pos = next + 1;
}
}
@ -99,7 +96,7 @@ static bool ParseLineValue(std::string_view line, size_t &pos, std::string *valu
return true;
}
static bool ParseLineComment(std::string_view line, size_t &pos, std::string *commentOut) {
static bool ParseLineComment(const std::string& line, size_t &pos, std::string *commentOut) {
// Don't bother with anything if we don't need the comment data.
if (commentOut) {
// Include any whitespace/formatting in the comment.
@ -120,7 +117,8 @@ static bool ParseLineComment(std::string_view line, size_t &pos, std::string *co
return true;
}
static bool ParseLine(std::string_view line, std::string* keyOut, std::string* valueOut, std::string* commentOut)
// Ugh, this is ugly.
static bool ParseLine(const std::string& line, std::string* keyOut, std::string* valueOut, std::string* commentOut)
{
// Rules:
// 1. A line starting with ; is commented out.
@ -144,7 +142,7 @@ static bool ParseLine(std::string_view line, std::string* keyOut, std::string* v
return true;
}
static std::string EscapeHash(std::string_view value) {
static std::string EscapeComments(const std::string &value) {
std::string result = "";
for (size_t pos = 0; pos < value.size(); ) {
@ -153,8 +151,7 @@ static std::string EscapeHash(std::string_view value) {
result += value.substr(pos);
pos = value.npos;
} else {
result += value.substr(pos, next - pos);
result += "\\#";
result += value.substr(pos, next - pos) + "\\#";
pos = next + 1;
}
}
@ -162,56 +159,34 @@ static std::string EscapeHash(std::string_view value) {
return result;
}
void ParsedIniLine::ParseFrom(std::string_view line) {
line = StripSpaces(line);
if (line.empty()) {
key.clear();
value.clear();
comment.clear();
} else if (line[0] == '#') {
key.clear();
value.clear();
comment = line;
} else {
ParseLine(line, &key, &value, &comment);
}
}
void ParsedIniLine::Reconstruct(std::string *output) const {
if (!key.empty()) {
*output = EscapeHash(key) + " = " + EscapeHash(value) + comment;
} else {
*output = comment;
}
}
void Section::Clear() {
lines_.clear();
lines.clear();
}
bool Section::GetKeys(std::vector<std::string> &keys) const {
keys.clear();
for (auto liter = lines_.begin(); liter != lines_.end(); ++liter) {
if (!liter->Key().empty())
keys.push_back(std::string(liter->Key()));
}
return true;
}
ParsedIniLine *Section::GetLine(const char *key) {
for (auto &line : lines_) {
if (equalsNoCase(line.Key(), key))
std::string* Section::GetLine(const char* key, std::string* valueOut, std::string* commentOut)
{
for (std::vector<std::string>::iterator iter = lines.begin(); iter != lines.end(); ++iter)
{
std::string& line = *iter;
std::string lineKey;
ParseLine(line, &lineKey, valueOut, commentOut);
if (!strcasecmp(lineKey.c_str(), key))
return &line;
}
return nullptr;
return 0;
}
const ParsedIniLine *Section::GetLine(const char* key) const {
for (auto &line : lines_) {
if (equalsNoCase(line.Key(), key))
const std::string* Section::GetLine(const char* key, std::string* valueOut, std::string* commentOut) const
{
for (std::vector<std::string>::const_iterator iter = lines.begin(); iter != lines.end(); ++iter)
{
const std::string& line = *iter;
std::string lineKey;
ParseLine(line, &lineKey, valueOut, commentOut);
if (!strcasecmp(lineKey.c_str(), key))
return &line;
}
return nullptr;
return 0;
}
void Section::Set(const char* key, uint32_t newValue) {
@ -223,7 +198,6 @@ void Section::Set(const char* key, uint64_t newValue) {
}
void Section::Set(const char* key, float newValue) {
_dbg_assert_(!my_isnanorinf(newValue));
Set(key, StringFromFormat("%f", newValue).c_str());
}
@ -235,13 +209,19 @@ void Section::Set(const char* key, int newValue) {
Set(key, StringFromInt(newValue).c_str());
}
void Section::Set(const char* key, const char* newValue) {
ParsedIniLine *line = GetLine(key);
if (line) {
line->SetValue(newValue);
} else {
void Section::Set(const char* key, const char* newValue)
{
std::string value, commented;
std::string* line = GetLine(key, &value, &commented);
if (line)
{
// Change the value - keep the key and comment
*line = StripSpaces(key) + " = " + EscapeComments(newValue) + commented;
}
else
{
// The key did not already exist in this section - let's add it.
lines_.emplace_back(ParsedIniLine(key, newValue));
lines.emplace_back(std::string(key) + " = " + EscapeComments(newValue));
}
}
@ -253,15 +233,16 @@ void Section::Set(const char* key, const std::string& newValue, const std::strin
Delete(key);
}
bool Section::Get(const char* key, std::string* value, const char* defaultValue) const {
const ParsedIniLine *line = GetLine(key);
if (!line) {
if (defaultValue) {
bool Section::Get(const char* key, std::string* value, const char* defaultValue) const
{
const std::string* line = GetLine(key, value, 0);
if (!line)
{
if (defaultValue)
{
*value = defaultValue;
}
return false;
} else {
*value = line->Value();
}
return true;
}
@ -306,7 +287,7 @@ void Section::Set(const char* key, const std::vector<std::string>& newValues)
}
void Section::AddComment(const std::string &comment) {
lines_.emplace_back(ParsedIniLine::CommentOnly("# " + comment));
lines.emplace_back("# " + comment);
}
bool Section::Get(const char* key, std::vector<std::string>& values) const
@ -341,7 +322,7 @@ bool Section::Get(const char* key, int* value, int defaultValue) const
{
std::string temp;
bool retval = Get(key, &temp, 0);
if (retval && TryParse(temp, value))
if (retval && TryParse(temp.c_str(), value))
return true;
*value = defaultValue;
return false;
@ -371,7 +352,7 @@ bool Section::Get(const char* key, bool* value, bool defaultValue) const
{
std::string temp;
bool retval = Get(key, &temp, 0);
if (retval && TryParse(temp, value))
if (retval && TryParse(temp.c_str(), value))
return true;
*value = defaultValue;
return false;
@ -381,7 +362,7 @@ bool Section::Get(const char* key, float* value, float defaultValue) const
{
std::string temp;
bool retval = Get(key, &temp, 0);
if (retval && TryParse(temp, value))
if (retval && TryParse(temp.c_str(), value))
return true;
*value = defaultValue;
return false;
@ -391,35 +372,46 @@ bool Section::Get(const char* key, double* value, double defaultValue) const
{
std::string temp;
bool retval = Get(key, &temp, 0);
if (retval && TryParse(temp, value))
if (retval && TryParse(temp.c_str(), value))
return true;
*value = defaultValue;
return false;
}
bool Section::Exists(const char *key) const {
for (auto &line : lines_) {
if (equalsNoCase(key, line.Key()))
bool Section::Exists(const char *key) const
{
for (std::vector<std::string>::const_iterator iter = lines.begin(); iter != lines.end(); ++iter)
{
std::string lineKey;
ParseLine(*iter, &lineKey, NULL, NULL);
if (!strcasecmp(lineKey.c_str(), key))
return true;
}
return false;
}
std::map<std::string, std::string> Section::ToMap() const {
std::map<std::string, std::string> Section::ToMap() const
{
std::map<std::string, std::string> outMap;
for (auto &line : lines_) {
if (!line.Key().empty()) {
outMap[std::string(line.Key())] = line.Value();
for (std::vector<std::string>::const_iterator iter = lines.begin(); iter != lines.end(); ++iter)
{
std::string lineKey, lineValue;
if (ParseLine(*iter, &lineKey, &lineValue, NULL)) {
outMap[lineKey] = lineValue;
}
}
return outMap;
}
bool Section::Delete(const char *key) {
ParsedIniLine *line = GetLine(key);
for (auto liter = lines_.begin(); liter != lines_.end(); ++liter) {
if (line == &*liter) {
lines_.erase(liter);
bool Section::Delete(const char *key)
{
std::string* line = GetLine(key, 0, 0);
for (std::vector<std::string>::iterator liter = lines.begin(); liter != lines.end(); ++liter)
{
if (line == &*liter)
{
lines.erase(liter);
return true;
}
}
@ -428,36 +420,42 @@ bool Section::Delete(const char *key) {
// IniFile
const Section* IniFile::GetSection(const char* sectionName) const {
for (const auto &iter : sections)
const Section* IniFile::GetSection(const char* sectionName) const
{
for (std::vector<Section>::const_iterator iter = sections.begin(); iter != sections.end(); ++iter)
if (!strcasecmp(iter->name().c_str(), sectionName))
return iter.get();
return nullptr;
return (&(*iter));
return 0;
}
Section* IniFile::GetSection(const char* sectionName) {
for (const auto &iter : sections)
Section* IniFile::GetSection(const char* sectionName)
{
for (std::vector<Section>::iterator iter = sections.begin(); iter != sections.end(); ++iter)
if (!strcasecmp(iter->name().c_str(), sectionName))
return iter.get();
return nullptr;
return (&(*iter));
return 0;
}
Section* IniFile::GetOrCreateSection(const char* sectionName) {
Section* IniFile::GetOrCreateSection(const char* sectionName)
{
Section* section = GetSection(sectionName);
if (!section) {
sections.push_back(std::unique_ptr<Section>(new Section(sectionName)));
section = sections.back().get();
if (!section)
{
sections.push_back(Section(sectionName));
section = &sections[sections.size() - 1];
}
return section;
}
bool IniFile::DeleteSection(const char* sectionName) {
bool IniFile::DeleteSection(const char* sectionName)
{
Section* s = GetSection(sectionName);
if (!s)
return false;
for (auto iter = sections.begin(); iter != sections.end(); ++iter) {
if (iter->get() == s) {
for (std::vector<Section>::iterator iter = sections.begin(); iter != sections.end(); ++iter)
{
if (&(*iter) == s)
{
sections.erase(iter);
return true;
}
@ -465,21 +463,35 @@ bool IniFile::DeleteSection(const char* sectionName) {
return false;
}
bool IniFile::Exists(const char* sectionName, const char* key) const {
bool IniFile::Exists(const char* sectionName, const char* key) const
{
const Section* section = GetSection(sectionName);
if (!section)
return false;
return section->Exists(key);
}
bool IniFile::DeleteKey(const char* sectionName, const char* key) {
void IniFile::SetLines(const char* sectionName, const std::vector<std::string> &lines)
{
Section* section = GetOrCreateSection(sectionName);
section->lines.clear();
for (std::vector<std::string>::const_iterator iter = lines.begin(); iter != lines.end(); ++iter)
{
section->lines.push_back(*iter);
}
}
bool IniFile::DeleteKey(const char* sectionName, const char* key)
{
Section* section = GetSection(sectionName);
if (!section)
return false;
ParsedIniLine *line = section->GetLine(key);
for (auto liter = section->lines_.begin(); liter != section->lines_.end(); ++liter) {
if (line == &(*liter)) {
section->lines_.erase(liter);
std::string* line = section->GetLine(key, 0, 0);
for (std::vector<std::string>::iterator liter = section->lines.begin(); liter != section->lines.end(); ++liter)
{
if (line == &(*liter))
{
section->lines.erase(liter);
return true;
}
}
@ -487,13 +499,55 @@ bool IniFile::DeleteKey(const char* sectionName, const char* key) {
}
// Return a list of all keys in a section
bool IniFile::GetKeys(const char* sectionName, std::vector<std::string>& keys) const {
const Section *section = GetSection(sectionName);
bool IniFile::GetKeys(const char* sectionName, std::vector<std::string>& keys) const
{
const Section* section = GetSection(sectionName);
if (!section)
return false;
return section->GetKeys(keys);
keys.clear();
for (std::vector<std::string>::const_iterator liter = section->lines.begin(); liter != section->lines.end(); ++liter)
{
std::string key;
ParseLine(*liter, &key, 0, 0);
if (!key.empty())
keys.push_back(key);
}
return true;
}
// Return a list of all lines in a section
bool IniFile::GetLines(const char* sectionName, std::vector<std::string>& lines, const bool remove_comments) const
{
const Section* section = GetSection(sectionName);
if (!section)
return false;
lines.clear();
for (std::vector<std::string>::const_iterator iter = section->lines.begin(); iter != section->lines.end(); ++iter)
{
std::string line = StripSpaces(*iter);
if (remove_comments)
{
int commentPos = (int)line.find('#');
if (commentPos == 0)
{
continue;
}
if (commentPos != (int)std::string::npos)
{
line = StripSpaces(line.substr(0, commentPos));
}
}
lines.push_back(line);
}
return true;
}
void IniFile::SortSections()
{
std::sort(sections.begin(), sections.end());
@ -502,7 +556,7 @@ void IniFile::SortSections()
bool IniFile::Load(const Path &path)
{
sections.clear();
sections.push_back(std::unique_ptr<Section>(new Section("")));
sections.push_back(Section(""));
// first section consists of the comments before the first real section
// Open file
@ -558,18 +612,16 @@ bool IniFile::Load(std::istream &in) {
if (sectionNameEnd != std::string::npos) {
// New section!
std::string sub = line.substr(1, sectionNameEnd - 1);
sections.push_back(std::unique_ptr<Section>(new Section(sub)));
sections.push_back(Section(sub));
if (sectionNameEnd + 1 < line.size()) {
sections.back()->comment = line.substr(sectionNameEnd + 1);
sections[sections.size() - 1].comment = line.substr(sectionNameEnd + 1);
}
} else {
if (sections.empty()) {
sections.push_back(std::unique_ptr<Section>(new Section("")));
sections.push_back(Section(""));
}
ParsedIniLine parsedLine;
parsedLine.ParseFrom(line);
sections.back()->lines_.push_back(parsedLine);
sections[sections.size() - 1].lines.push_back(line);
}
}
}
@ -589,14 +641,13 @@ bool IniFile::Save(const Path &filename)
// TODO: Do we still need this? It's annoying.
fprintf(file, "\xEF\xBB\xBF");
for (const auto &section : sections) {
if (!section->name().empty() && (!section->lines_.empty() || !section->comment.empty())) {
fprintf(file, "[%s]%s\n", section->name().c_str(), section->comment.c_str());
for (const Section &section : sections) {
if (!section.name().empty() && (!section.lines.empty() || !section.comment.empty())) {
fprintf(file, "[%s]%s\n", section.name().c_str(), section.comment.c_str());
}
for (const auto &line : section->lines_) {
std::string buffer;
line.Reconstruct(&buffer);
fprintf(file, "%s\n", buffer.c_str());
for (const std::string &s : section.lines) {
fprintf(file, "%s\n", s.c_str());
}
}

View file

@ -5,10 +5,8 @@
#pragma once
#include <istream>
#include <memory>
#include <map>
#include <string>
#include <string_view>
#include <vector>
#include <cstdint>
@ -16,39 +14,6 @@
class VFSInterface;
class ParsedIniLine {
public:
ParsedIniLine() {}
ParsedIniLine(std::string_view key, std::string_view value) {
this->key = key;
this->value = value;
}
ParsedIniLine(std::string_view key, std::string_view value, std::string_view comment) {
this->key = key;
this->value = value;
this->comment = comment;
}
static ParsedIniLine CommentOnly(std::string_view comment) {
return ParsedIniLine(std::string_view(), std::string_view(), comment);
}
// Comments only come from "ParseFrom".
void ParseFrom(std::string_view line);
void Reconstruct(std::string *output) const;
// Having these as views allows a more efficient internal representation, like one joint string.
std::string_view Key() const { return key; }
std::string_view Value() const { return value; }
std::string_view Comment() const { return comment; }
void SetValue(std::string_view newValue) { value = newValue; }
private:
std::string key;
std::string value;
std::string comment;
};
class Section {
friend class IniFile;
@ -63,8 +28,8 @@ public:
std::map<std::string, std::string> ToMap() const;
ParsedIniLine *GetLine(const char *key);
const ParsedIniLine *GetLine(const char *key) const;
std::string *GetLine(const char* key, std::string* valueOut, std::string* commentOut);
const std::string *GetLine(const char* key, std::string* valueOut, std::string* commentOut) const;
void Set(const char* key, const char* newValue);
void Set(const char* key, const std::string& newValue, const std::string& defaultValue);
@ -105,9 +70,6 @@ public:
bool Get(const char* key, double* value, double defaultValue = false) const;
bool Get(const char* key, std::vector<std::string>& values) const;
// Return a list of all keys in this section
bool GetKeys(std::vector<std::string> &keys) const;
bool operator < (const Section& other) const {
return name_ < other.name_;
}
@ -117,7 +79,7 @@ public:
}
protected:
std::vector<ParsedIniLine> lines_;
std::vector<std::string> lines;
std::string name_;
std::string comment;
};
@ -125,10 +87,12 @@ protected:
class IniFile {
public:
bool Load(const Path &path);
bool Load(const std::string &filename) { return Load(Path(filename)); }
bool Load(std::istream &istream);
bool LoadFromVFS(VFSInterface &vfs, const std::string &filename);
bool Save(const Path &path);
bool Save(const std::string &filename) { return Save(Path(filename)); }
// Returns true if key exists in section
bool Exists(const char* sectionName, const char* key) const;
@ -173,19 +137,21 @@ public:
bool GetKeys(const char* sectionName, std::vector<std::string>& keys) const;
void SetLines(const char* sectionName, const std::vector<std::string> &lines);
bool GetLines(const char* sectionName, std::vector<std::string>& lines, const bool remove_comments = true) const;
bool DeleteKey(const char* sectionName, const char* key);
bool DeleteSection(const char* sectionName);
void SortSections();
std::vector<std::unique_ptr<Section>> &Sections() { return sections; }
std::vector<Section> &Sections() { return sections; }
bool HasSection(const char *section) { return GetSection(section) != 0; }
Section* GetOrCreateSection(const char* section);
private:
std::vector<std::unique_ptr<Section>> sections;
std::vector<Section> sections;
const Section* GetSection(const char* section) const;
Section* GetSection(const char* section);

View file

@ -1,5 +1,3 @@
#pragma once
#include <cstring>
#include <string>
#include <vector>

View file

@ -9,8 +9,6 @@
//
// Zero dependencies apart from stdlib (if you remove the vhjson usage.)
#pragma once
#include <string>
#include <vector>
#include <sstream>

View file

@ -40,8 +40,6 @@ static const char * const g_categoryNames[(size_t)I18NCat::CATEGORY_COUNT] = {
"UI Elements",
"Upgrade",
"VR",
"Achievements",
"PSPSettings",
};
I18NRepo g_i18nrepo;
@ -140,13 +138,13 @@ bool I18NRepo::LoadIni(const std::string &languageID, const Path &overridePath)
Clear();
const std::vector<std::unique_ptr<Section>> &sections = ini.Sections();
const std::vector<Section> &sections = ini.Sections();
std::lock_guard<std::mutex> guard(catsLock_);
for (auto &section : sections) {
for (size_t i = 0; i < (size_t)I18NCat::CATEGORY_COUNT; i++) {
if (!strcmp(section->name().c_str(), g_categoryNames[i])) {
cats_[i].reset(new I18NCategory(*section.get()));
if (!strcmp(section.name().c_str(), g_categoryNames[i])) {
cats_[i].reset(new I18NCategory(section));
}
}
}

View file

@ -56,8 +56,6 @@ enum class I18NCat : uint8_t {
UI_ELEMENTS,
UPGRADE,
VR,
ACHIEVEMENTS,
PSPSETTINGS,
CATEGORY_COUNT,
NONE = CATEGORY_COUNT,
};
@ -116,9 +114,8 @@ public:
std::string LanguageID();
std::shared_ptr<I18NCategory> GetCategory(I18NCat category);
std::shared_ptr<I18NCategory> GetCategoryByName(const char *name);
// Translate the string, by looking up "key" in the file, and falling back to either def or key, in that order, if the lookup fails.
// def can (and usually is) set to nullptr.
const char *T(I18NCat category, const char *key, const char *def = nullptr) {
if (category == I18NCat::NONE)
return def ? def : key;

View file

@ -19,7 +19,7 @@ void NiceSizeFormat(uint64_t size, char *out, size_t bufSize) {
if (s == 0)
snprintf(out, bufSize, "%d B", (int)size);
else
snprintf(out, bufSize, "%3.2f %s", f, sizes[s]);
snprintf(out, bufSize, "%3.1f %s", f, sizes[s]);
}
std::string NiceSizeFormat(uint64_t size) {

View file

@ -177,18 +177,20 @@ void WordWrapper::AppendWord(int endIndex, int lastChar, bool addNewline) {
}
void WordWrapper::Wrap() {
out_.clear();
// First, let's check if it fits as-is.
size_t len = strlen(str_);
// We know it'll be approximately this size. It's fine if the guess is a little off.
out_.reserve(len + len / 16);
if (MeasureWidth(str_, len) <= maxW_) {
// If it fits, we don't need to go through each character.
out_ = str_;
return;
}
out_.clear();
// We know it'll be approximately this size. It's fine if the guess is a little off.
out_.reserve(len + len / 16);
if (flags_ & FLAG_ELLIPSIZE_TEXT) {
ellipsisWidth_ = MeasureWidth("...", 3);
}

View file

@ -18,14 +18,12 @@
#include "ppsspp_config.h"
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64)
#define REAL_CPUDETECT_AVAIL 1
#elif (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !defined(__EMSCRIPTEN__)
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#define REAL_CPUDETECT_AVAIL 1
#elif PPSSPP_ARCH(MIPS) || PPSSPP_ARCH(MIPS64)
#define REAL_CPUDETECT_AVAIL 1
#elif PPSSPP_ARCH(RISCV64)
#define REAL_CPUDETECT_AVAIL 1
#elif PPSSPP_ARCH(LOONGARCH64)
#define REAL_CPUDETECT_AVAIL 1
#endif
#ifndef REAL_CPUDETECT_AVAIL

View file

@ -1,14 +1,14 @@
#include "Common/File/AndroidContentURI.h"
bool AndroidContentURI::Parse(std::string_view path) {
bool AndroidContentURI::Parse(const std::string &path) {
const char *prefix = "content://";
if (!startsWith(path, prefix)) {
return false;
}
std::string_view components = path.substr(strlen(prefix));
std::string components = path.substr(strlen(prefix));
std::vector<std::string_view> parts;
std::vector<std::string> parts;
SplitString(components, '/', parts);
if (parts.size() == 3) {
// Single file URI.
@ -60,7 +60,7 @@ AndroidContentURI AndroidContentURI::WithRootFilePath(const std::string &filePat
return uri;
}
AndroidContentURI AndroidContentURI::WithComponent(std::string_view filePath) {
AndroidContentURI AndroidContentURI::WithComponent(const std::string &filePath) {
AndroidContentURI uri = *this;
if (uri.file.empty()) {
// Not sure what to do.
@ -68,17 +68,16 @@ AndroidContentURI AndroidContentURI::WithComponent(std::string_view filePath) {
}
if (uri.file.back() == ':') {
// Special case handling for Document URIs: Treat the ':' as a directory separator too (but preserved in the filename).
uri.file.append(filePath);
uri.file = uri.file + filePath;
} else {
uri.file.push_back('/');
uri.file.append(filePath);
uri.file = uri.file + "/" + filePath;
}
return uri;
}
AndroidContentURI AndroidContentURI::WithExtraExtension(std::string_view extension) {
AndroidContentURI AndroidContentURI::WithExtraExtension(const std::string &extension) {
AndroidContentURI uri = *this;
uri.file.append(extension);
uri.file = uri.file + extension;
return uri;
}

View file

@ -23,15 +23,15 @@ private:
std::string file;
public:
AndroidContentURI() {}
explicit AndroidContentURI(std::string_view path) {
explicit AndroidContentURI(const std::string &path) {
Parse(path);
}
bool Parse(std::string_view path);
bool Parse(const std::string &path);
AndroidContentURI WithRootFilePath(const std::string &filePath);
AndroidContentURI WithComponent(std::string_view filePath);
AndroidContentURI WithExtraExtension(std::string_view extension); // The ext string contains the dot.
AndroidContentURI WithComponent(const std::string &filePath);
AndroidContentURI WithExtraExtension(const std::string &extension);
AndroidContentURI WithReplacedExtension(const std::string &oldExtension, const std::string &newExtension) const;
AndroidContentURI WithReplacedExtension(const std::string &newExtension) const;

View file

@ -61,16 +61,16 @@ void Android_RegisterStorageCallbacks(JNIEnv * env, jobject obj) {
_dbg_assert_(computeRecursiveDirectorySize);
}
bool Android_IsContentUri(std::string_view filename) {
bool Android_IsContentUri(const std::string &filename) {
return startsWith(filename, "content://");
}
int Android_OpenContentUriFd(std::string_view filename, Android_OpenContentUriMode mode) {
int Android_OpenContentUriFd(const std::string &filename, Android_OpenContentUriMode mode) {
if (!g_nativeActivity) {
return -1;
}
std::string fname(filename);
std::string fname = filename;
// PPSSPP adds an ending slash to directories before looking them up.
// TODO: Fix that in the caller (or don't call this for directories).
if (fname.back() == '/')

View file

@ -2,7 +2,6 @@
#include <vector>
#include <string>
#include <string_view>
#include "Common/File/DirListing.h"
@ -40,8 +39,8 @@ extern std::string g_externalDir;
void Android_StorageSetNativeActivity(jobject nativeActivity);
bool Android_IsContentUri(std::string_view uri);
int Android_OpenContentUriFd(std::string_view uri, const Android_OpenContentUriMode mode);
bool Android_IsContentUri(const std::string &uri);
int Android_OpenContentUriFd(const std::string &uri, const Android_OpenContentUriMode mode);
StorageError Android_CreateDirectory(const std::string &parentTreeUri, const std::string &dirName);
StorageError Android_CreateFile(const std::string &parentTreeUri, const std::string &fileName);
StorageError Android_MoveFile(const std::string &fileUri, const std::string &srcParentUri, const std::string &destParentUri);
@ -64,8 +63,8 @@ void Android_RegisterStorageCallbacks(JNIEnv * env, jobject obj);
// Stub out the Android Storage wrappers, so that we can avoid ifdefs everywhere.
inline bool Android_IsContentUri(std::string_view uri) { return false; }
inline int Android_OpenContentUriFd(std::string_view uri, const Android_OpenContentUriMode mode) { return -1; }
inline bool Android_IsContentUri(const std::string &uri) { return false; }
inline int Android_OpenContentUriFd(const std::string &uri, const Android_OpenContentUriMode mode) { return -1; }
inline StorageError Android_CreateDirectory(const std::string &parentTreeUri, const std::string &dirName) { return StorageError::UNKNOWN; }
inline StorageError Android_CreateFile(const std::string &parentTreeUri, const std::string &fileName) { return StorageError::UNKNOWN; }
inline StorageError Android_MoveFile(const std::string &fileUri, const std::string &srcParentUri, const std::string &destParentUri) { return StorageError::UNKNOWN; }

View file

@ -6,7 +6,6 @@
#include <direct.h>
#if PPSSPP_PLATFORM(UWP)
#include <fileapifromapp.h>
#include <UWP/UWPHelpers/StorageManager.h>
#endif
#else
#include <strings.h>
@ -184,7 +183,7 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
std::string tmp;
while (*filter) {
if (*filter == ':') {
filters.insert(tmp);
filters.insert(std::move(tmp));
tmp.clear();
} else {
tmp.push_back(*filter);
@ -192,7 +191,7 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
filter++;
}
if (!tmp.empty())
filters.insert(tmp);
filters.insert(std::move(tmp));
}
#if PPSSPP_PLATFORM(WINDOWS)
@ -221,13 +220,6 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
HANDLE hFind = FindFirstFileEx((directory.ToWString() + L"\\*").c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
#endif
if (hFind == INVALID_HANDLE_VALUE) {
#if PPSSPP_PLATFORM(UWP)
// This step just to avoid empty results by adding fake folders
// it will help also to navigate back between selected folder
// we must ignore this function for any request other than UI navigation
if (GetFakeFolders(directory, files, filter, filters))
return true;
#endif
return false;
}
do {

View file

@ -23,19 +23,9 @@
#include "Common/File/AndroidStorage.h"
#include "Common/Data/Encoding/Utf8.h"
#if PPSSPP_PLATFORM(UWP)
#include "UWP/UWPHelpers/StorageManager.h"
#endif
bool free_disk_space(const Path &path, int64_t &space) {
#ifdef _WIN32
ULARGE_INTEGER free;
#if PPSSPP_PLATFORM(UWP)
if (GetDriveFreeSpace(path, space)) {
return true;
}
else
#endif
if (GetDiskFreeSpaceExW(path.ToWString().c_str(), &free, nullptr, nullptr)) {
space = free.QuadPart;
return true;

View file

@ -54,7 +54,6 @@
#include <direct.h> // getcwd
#if PPSSPP_PLATFORM(UWP)
#include <fileapifromapp.h>
#include "UWP/UWPHelpers/StorageManager.h"
#endif
#else
#include <sys/param.h>
@ -137,20 +136,14 @@ FILE *OpenCFile(const Path &path, const char *mode) {
}
// TODO: Support append modes and stuff... For now let's go with the most common one.
Android_OpenContentUriMode openMode = Android_OpenContentUriMode::READ_WRITE_TRUNCATE;
const char *fmode = "wb";
if (!strcmp(mode, "at") || !strcmp(mode, "a")) {
openMode = Android_OpenContentUriMode::READ_WRITE;
fmode = "ab";
}
int descriptor = Android_OpenContentUriFd(path.ToString(), openMode);
int descriptor = Android_OpenContentUriFd(path.ToString(), Android_OpenContentUriMode::READ_WRITE_TRUNCATE);
if (descriptor < 0) {
INFO_LOG(COMMON, "Opening '%s' for write failed", path.ToString().c_str());
return nullptr;
}
FILE *f = fdopen(descriptor, fmode);
FILE *f = fdopen(descriptor, "wb");
if (f && (!strcmp(mode, "at") || !strcmp(mode, "a"))) {
// Append mode - not sure we got a "true" append mode, so seek to the end.
// Append mode.
fseek(f, 0, SEEK_END);
}
return f;
@ -165,18 +158,7 @@ FILE *OpenCFile(const Path &path, const char *mode) {
}
#if defined(_WIN32) && defined(UNICODE)
#if PPSSPP_PLATFORM(UWP) && !defined(__LIBRETRO__)
// We shouldn't use _wfopen here,
// this function is not allowed to read outside Local and Installation folders
// FileSystem (broadFileSystemAccess) doesn't apply on _wfopen
// if we have custom memory stick location _wfopen will return null
// 'GetFileStreamFromApp' will convert 'mode' to [access, share, creationDisposition]
// then it will call 'CreateFile2FromAppW' -> convert HANDLE to FILE*
FILE* file = GetFileStreamFromApp(path.ToString(), mode);
return file;
#else
return _wfopen(path.ToWString().c_str(), ConvertUTF8ToWString(mode).c_str());
#endif
#else
return fopen(path.c_str(), mode);
#endif
@ -592,7 +574,7 @@ bool CreateFullPath(const Path &path) {
return false;
}
std::vector<std::string_view> parts;
std::vector<std::string> parts;
SplitString(diff, '/', parts);
// Probably not necessary sanity check, ported from the old code.
@ -602,7 +584,7 @@ bool CreateFullPath(const Path &path) {
}
Path curPath = root;
for (auto part : parts) {
for (auto &part : parts) {
curPath /= part;
if (!File::Exists(curPath)) {
File::CreateDir(curPath);
@ -677,15 +659,10 @@ bool Rename(const Path &srcFilename, const Path &destFilename) {
INFO_LOG(COMMON, "Rename: %s --> %s", srcFilename.c_str(), destFilename.c_str());
#if defined(_WIN32) && defined(UNICODE)
#if PPSSPP_PLATFORM(UWP)
if (MoveFileFromAppW(srcFilename.ToWString().c_str(), destFilename.ToWString().c_str()))
return true;
#else
std::wstring srcw = srcFilename.ToWString();
std::wstring destw = destFilename.ToWString();
if (_wrename(srcw.c_str(), destw.c_str()) == 0)
return true;
#endif
#else
if (rename(srcFilename.c_str(), destFilename.c_str()) == 0)
return true;
@ -969,7 +946,7 @@ bool OpenFileInEditor(const Path &fileName) {
#if PPSSPP_PLATFORM(WINDOWS)
#if PPSSPP_PLATFORM(UWP)
OpenFile(fileName.ToString());
// Do nothing.
#else
ShellExecuteW(nullptr, L"open", fileName.ToWString().c_str(), nullptr, nullptr, SW_SHOW);
#endif
@ -1181,7 +1158,6 @@ uint8_t *ReadLocalFile(const Path &filename, size_t *size) {
return nullptr;
}
fseek(file, 0, SEEK_SET);
// NOTE: If you find ~10 memory leaks from here, with very varying sizes, it might be the VFPU LUTs.
uint8_t *contents = new uint8_t[f_size + 1];
if (fread(contents, 1, f_size, file) != f_size) {
delete[] contents;

View file

@ -12,17 +12,13 @@
#include "android/jni/app-android.h"
#if PPSSPP_PLATFORM(UWP) && !defined(__LIBRETRO__)
#include "UWP/UWPHelpers/StorageManager.h"
#endif
#if HOST_IS_CASE_SENSITIVE
#include <dirent.h>
#include <unistd.h>
#include <sys/stat.h>
#endif
Path::Path(std::string_view str) {
Path::Path(const std::string &str) {
Init(str);
}
@ -33,7 +29,7 @@ Path::Path(const std::wstring &str) {
}
#endif
void Path::Init(std::string_view str) {
void Path::Init(const std::string &str) {
if (str.empty()) {
type_ = PathType::UNDEFINED;
path_.clear();
@ -81,7 +77,7 @@ void Path::Init(std::string_view str) {
// We always use forward slashes internally, we convert to backslash only when
// converted to a wstring.
Path Path::operator /(std::string_view subdir) const {
Path Path::operator /(const std::string &subdir) const {
if (type_ == PathType::CONTENT_URI) {
AndroidContentURI uri(path_);
return Path(uri.WithComponent(subdir).ToString());
@ -104,18 +100,18 @@ Path Path::operator /(std::string_view subdir) const {
return Path(fullPath);
}
void Path::operator /=(std::string_view subdir) {
void Path::operator /=(const std::string &subdir) {
*this = *this / subdir;
}
Path Path::WithExtraExtension(std::string_view ext) const {
Path Path::WithExtraExtension(const std::string &ext) const {
if (type_ == PathType::CONTENT_URI) {
AndroidContentURI uri(path_);
return Path(uri.WithExtraExtension(ext).ToString());
}
_dbg_assert_(!ext.empty() && ext[0] == '.');
return Path(path_ + std::string(ext));
return Path(path_ + ext);
}
Path Path::WithReplacedExtension(const std::string &oldExtension, const std::string &newExtension) const {
@ -161,7 +157,7 @@ std::string Path::GetFilename() const {
return path_;
}
std::string GetExtFromString(std::string_view str) {
std::string GetExtFromString(const std::string &str) {
size_t pos = str.rfind(".");
if (pos == std::string::npos) {
return "";
@ -171,7 +167,7 @@ std::string GetExtFromString(std::string_view str) {
// Don't want to detect "df/file" from "/as.df/file"
return "";
}
std::string ext(str.substr(pos));
std::string ext = str.substr(pos);
for (size_t i = 0; i < ext.size(); i++) {
ext[i] = tolower(ext[i]);
}
@ -262,15 +258,6 @@ std::wstring Path::ToWString() const {
}
return w;
}
std::string Path::ToCString() const {
std::string w = path_;
for (size_t i = 0; i < w.size(); i++) {
if (w[i] == '/') {
w[i] = '\\';
}
}
return w;
}
#endif
std::string Path::ToVisualString(const char *relativeRoot) const {
@ -278,9 +265,6 @@ std::string Path::ToVisualString(const char *relativeRoot) const {
return AndroidContentURI(path_).ToVisualString();
#if PPSSPP_PLATFORM(WINDOWS)
} else if (type_ == PathType::NATIVE) {
#if PPSSPP_PLATFORM(UWP) && !defined(__LIBRETRO__)
return GetPreviewPath(path_);
#else
// It can be useful to show the path as relative to the memstick
if (relativeRoot) {
std::string root = ReplaceAll(relativeRoot, "/", "\\");
@ -293,7 +277,6 @@ std::string Path::ToVisualString(const char *relativeRoot) const {
} else {
return ReplaceAll(path_, "/", "\\");
}
#endif
#else
if (relativeRoot) {
std::string root = relativeRoot;

View file

@ -3,7 +3,6 @@
#include "ppsspp_config.h"
#include <string>
#include <string_view>
#if defined(__APPLE__)
@ -37,11 +36,11 @@ enum class PathType {
class Path {
private:
void Init(std::string_view str);
void Init(const std::string &str);
public:
Path() : type_(PathType::UNDEFINED) {}
explicit Path(std::string_view str);
explicit Path(const std::string &str);
#if PPSSPP_PLATFORM(WINDOWS)
explicit Path(const std::wstring &str);
@ -72,13 +71,13 @@ public:
bool IsAbsolute() const;
// Returns a path extended with a subdirectory.
Path operator /(std::string_view subdir) const;
Path operator /(const std::string &subdir) const;
// Navigates down into a subdir.
void operator /=(std::string_view subdir);
void operator /=(const std::string &subdir);
// File extension manipulation.
Path WithExtraExtension(std::string_view ext) const;
Path WithExtraExtension(const std::string &ext) const;
Path WithReplacedExtension(const std::string &oldExtension, const std::string &newExtension) const;
Path WithReplacedExtension(const std::string &newExtension) const;
@ -91,11 +90,6 @@ public:
#if PPSSPP_PLATFORM(WINDOWS)
std::wstring ToWString() const;
std::string ToCString() const; // Flips the slashes back to Windows standard, but string still UTF-8.
#else
std::string ToCString() const {
return ToString();
}
#endif
// Pass in a relative root to turn the path into a relative path - if it is one!
@ -140,7 +134,7 @@ private:
};
// Utility function for parsing out file extensions.
std::string GetExtFromString(std::string_view str);
std::string GetExtFromString(const std::string &str);
// Utility function for fixing the case of paths. Only present on Unix-like systems.

View file

@ -38,7 +38,7 @@ bool LoadRemoteFileList(const Path &url, const std::string &userAgent, bool *can
http::RequestParams req(baseURL.Resource(), "text/plain, text/html; q=0.9, */*; q=0.8");
if (http.Resolve(baseURL.Host().c_str(), baseURL.Port())) {
if (http.Connect(2, 20.0, cancel)) {
net::RequestProgress progress(cancel);
http::RequestProgress progress(cancel);
code = http.GET(req, &result, responseHeaders, &progress);
http.Disconnect();
}
@ -78,7 +78,7 @@ bool LoadRemoteFileList(const Path &url, const std::string &userAgent, bool *can
return false;
}
for (auto &item : items) {
for (std::string item : items) {
// Apply some workarounds.
if (item.empty())
continue;
@ -210,7 +210,7 @@ std::string PathBrowser::GetFriendlyPath() const {
bool PathBrowser::GetListing(std::vector<File::FileInfo> &fileInfo, const char *filter, bool *cancel) {
std::unique_lock<std::mutex> guard(pendingLock_);
while (!IsListingReady() && (!cancel || !*cancel)) {
// In case cancel changes, just sleep. TODO: Replace with condition variable.
// In case cancel changes, just sleep.
guard.unlock();
sleep_ms(50);
guard.lock();
@ -221,6 +221,14 @@ bool PathBrowser::GetListing(std::vector<File::FileInfo> &fileInfo, const char *
}
bool PathBrowser::CanNavigateUp() {
/* Leaving this commented out, not sure if there's a use in UWP for navigating up from the user data folder.
#if PPSSPP_PLATFORM(UWP)
// Can't navigate up from memstick folder :(
if (path_ == GetSysDirectory(DIRECTORY_MEMSTICK_ROOT)) {
return false;
}
#endif
*/
return path_.CanNavigateUp();
}

View file

@ -4,7 +4,6 @@
#include "Common/File/VFS/VFS.h"
#include "Common/File/FileUtil.h"
#include "Common/File/AndroidStorage.h"
#include "Common/StringUtils.h"
VFS g_VFS;
@ -28,7 +27,7 @@ void VFS::Clear() {
static bool IsLocalAbsolutePath(const char *path) {
bool isUnixLocal = path[0] == '/';
#ifdef _WIN32
bool isWindowsLocal = (isalpha(path[0]) && path[1] == ':') || startsWith(path, "\\\\") || startsWith(path, "//");
bool isWindowsLocal = isalpha(path[0]) && path[1] == ':';
#else
bool isWindowsLocal = false;
#endif

View file

@ -272,7 +272,6 @@ VFSOpenFile *ZipFileReader::OpenFileForRead(VFSFileReference *vfsReference, size
zip_stat_t zstat;
if (zip_stat_index(zip_file_, reference->zi, 0, &zstat) != 0) {
lock_.unlock();
delete openFile;
return nullptr;
}

View file

@ -0,0 +1,402 @@
// Copyright 2012 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <array>
#include <cstdlib>
#include "Common/Log.h"
#include "Common/GL/GLInterface/EGL.h"
// Show the current FPS
void cInterfaceEGL::Swap() {
eglSwapBuffers(egl_dpy, egl_surf);
}
void cInterfaceEGL::SwapInterval(int Interval) {
eglSwapInterval(egl_dpy, Interval);
}
void* cInterfaceEGL::GetFuncAddress(const std::string& name) {
return (void*)eglGetProcAddress(name.c_str());
}
void cInterfaceEGL::DetectMode() {
EGLint num_configs;
bool supportsGL = false, supportsGLES2 = false, supportsGLES3 = false;
static const int renderable_types[3] = {
EGL_OPENGL_BIT,
(1 << 6), /* EGL_OPENGL_ES3_BIT_KHR */
EGL_OPENGL_ES2_BIT,
};
static const char *renderable_names[3] = {
"OpenGL", "OpenGL ES 3", "OpenGL ES 2"
};
for (int i = 0; i < 3; i++) {
int renderable_type = renderable_types[i];
const char *renderable_name = renderable_names[i];
// attributes for a visual in RGBA format with at least
// 8 bits per color
int attribs[] = {
EGL_RENDERABLE_TYPE, renderable_type,
EGL_RED_SIZE, 8,
EGL_GREEN_SIZE, 8,
EGL_BLUE_SIZE, 8,
EGL_ALPHA_SIZE, 8,
EGL_DEPTH_SIZE, 16,
EGL_STENCIL_SIZE, 8,
EGL_SURFACE_TYPE, EGL_WINDOW_BIT,
EGL_TRANSPARENT_TYPE, EGL_NONE,
EGL_SAMPLES, 0,
EGL_NONE
};
// Get how many configs there are
if (!eglChooseConfig( egl_dpy, attribs, nullptr, 0, &num_configs)) {
EGL_ILOG("DetectMode: couldn't get an EGL visual config with renderable_type=%s", renderable_name);
continue;
}
EGL_ILOG("DetectMode: got an EGL visual config with renderable_type=%s", renderable_name);
EGLConfig* config = new EGLConfig[num_configs];
// Get all the configurations
if (!eglChooseConfig(egl_dpy, attribs, config, num_configs, &num_configs)) {
EGL_ILOG("DetectMode: couldn't choose an EGL visual config\n");
delete[] config;
continue;
}
for (int i = 0; i < num_configs; ++i) {
EGLint attribVal;
bool ret;
ret = eglGetConfigAttrib(egl_dpy, config[i], EGL_RENDERABLE_TYPE, &attribVal);
if (ret) {
if ((attribVal & EGL_OPENGL_BIT) && s_opengl_mode != GLInterfaceMode::MODE_DETECT_ES)
supportsGL = true;
if (attribVal & (1 << 6)) /* EGL_OPENGL_ES3_BIT_KHR */
supportsGLES3 = true; // Apparently, this cannot be completely trusted so we implement a fallback to ES 2.0 below.
if (attribVal & EGL_OPENGL_ES2_BIT)
supportsGLES2 = true;
}
}
delete[] config;
}
if (supportsGL)
s_opengl_mode = GLInterfaceMode::MODE_OPENGL;
else if (supportsGLES3)
s_opengl_mode = GLInterfaceMode::MODE_OPENGLES3;
else if (supportsGLES2)
s_opengl_mode = GLInterfaceMode::MODE_OPENGLES2;
if (s_opengl_mode == GLInterfaceMode::MODE_DETECT) // Errored before we found a mode
s_opengl_mode = GLInterfaceMode::MODE_OPENGL; // Fall back to OpenGL
}
static void LogEGLConfig(EGLDisplay egl_dpy, EGLConfig config) {
EGLint red = 0, green = 0, blue = 0, alpha = 0, depth = 0, stencil = 0, format = -1, type;
struct {
EGLint value;
const char *name;
} vals[] = {
{ EGL_RED_SIZE, "EGL_RED_SIZE" },
{ EGL_GREEN_SIZE, "EGL_GREEN_SIZE" },
{ EGL_BLUE_SIZE, "EGL_BLUE_SIZE" },
{ EGL_ALPHA_SIZE, "EGL_ALPHA_SIZE" },
{ EGL_DEPTH_SIZE, "EGL_DEPTH_SIZE" },
{ EGL_STENCIL_SIZE, "EGL_STENCIL_SIZE" },
{ EGL_NATIVE_VISUAL_ID, "EGL_NATIVE_VISUAL_ID" },
{ EGL_NATIVE_VISUAL_TYPE, "EGL_NATIVE_VISUAL_TYPE" },
{ EGL_MAX_SWAP_INTERVAL, "EGL_MAX_SWAP_INTERVAL" },
{ EGL_MIN_SWAP_INTERVAL, "EGL_MIN_SWAP_INTERVAL" },
{ EGL_MIN_SWAP_INTERVAL, "EGL_MIN_SWAP_INTERVAL" },
{ EGL_NATIVE_RENDERABLE, "EGL_NATIVE_RENDERABLE" },
{ EGL_COLOR_BUFFER_TYPE, "EGL_COLOR_BUFFER_TYPE" },
{ EGL_BUFFER_SIZE, "EGL_BUFFER_SIZE" },
{ EGL_CONFIG_ID, "EGL_CONFIG_ID" },
{ EGL_SAMPLES, "EGL_SAMPLES" },
};
for (int i = 0; i < (int)(sizeof(vals)/sizeof(vals[0])); i++) {
EGLint value;
eglGetConfigAttrib(egl_dpy, config, vals[i].value, &value);
EGL_ILOG(" %s = %d", vals[i].name, value);
}
}
const char *cInterfaceEGL::EGLGetErrorString(EGLint error) {
switch (error) {
case EGL_SUCCESS: return "EGL_SUCCESS";
case EGL_NOT_INITIALIZED: return "EGL_NOT_INITIALIZED";
case EGL_BAD_ACCESS: return "EGL_BAD_ACCESS";
case EGL_BAD_ALLOC: return "EGL_BAD_ALLOC";
case EGL_BAD_ATTRIBUTE: return "EGL_BAD_ATTRIBUTE";
case EGL_BAD_CONTEXT: return "EGL_BAD_CONTEXT";
case EGL_BAD_CONFIG: return "EGL_BAD_CONFIG";
case EGL_BAD_CURRENT_SURFACE: return "EGL_BAD_CURRENT_SURFACE";
case EGL_BAD_DISPLAY: return "EGL_BAD_DISPLAY";
case EGL_BAD_SURFACE: return "EGL_BAD_SURFACE";
case EGL_BAD_MATCH: return "EGL_BAD_MATCH";
case EGL_BAD_PARAMETER: return "EGL_BAD_PARAMETER";
case EGL_BAD_NATIVE_PIXMAP: return "EGL_BAD_NATIVE_PIXMAP";
case EGL_BAD_NATIVE_WINDOW: return "EGL_BAD_NATIVE_WINDOW";
case EGL_CONTEXT_LOST: return "EGL_CONTEXT_LOST";
default:
return "(UNKNOWN)";
}
}
bool cInterfaceEGL::ChooseAndCreate(void *window_handle, bool core, bool use565) {
int attribs32[] = {
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, // Keep this first!
EGL_RED_SIZE, 8,
EGL_GREEN_SIZE, 8,
EGL_BLUE_SIZE, 8,
EGL_ALPHA_SIZE, 8,
EGL_DEPTH_SIZE, 16,
EGL_STENCIL_SIZE, 8,
EGL_TRANSPARENT_TYPE, EGL_NONE,
EGL_NONE, 0
};
int attribs16[] = {
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, // Keep this first!
EGL_RED_SIZE, 5,
EGL_GREEN_SIZE, 6,
EGL_BLUE_SIZE, 5,
EGL_ALPHA_SIZE, 0,
EGL_DEPTH_SIZE, 16,
EGL_STENCIL_SIZE, 8,
EGL_TRANSPARENT_TYPE, EGL_NONE,
EGL_NONE, 0
};
int attribsFallback32[] = {
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, // Keep this first!
EGL_RED_SIZE, 8,
EGL_GREEN_SIZE, 8,
EGL_BLUE_SIZE, 8,
EGL_ALPHA_SIZE, 8,
EGL_DEPTH_SIZE, 16,
EGL_NONE, 0
};
int attribsFallback16[] = {
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, // Keep this first!
EGL_RED_SIZE, 5,
EGL_GREEN_SIZE, 6,
EGL_BLUE_SIZE, 5,
EGL_ALPHA_SIZE, 0,
EGL_DEPTH_SIZE, 16,
EGL_NONE, 0
};
int *attribs = attribs32;
int *attribsFallback = attribsFallback32;
if (use565) {
attribs = attribs16;
attribsFallback = attribsFallback16;
}
EGLint ctx_attribs[] = {
EGL_CONTEXT_CLIENT_VERSION, 2,
EGL_NONE, 0,
EGL_NONE, 0,
EGL_NONE, 0,
EGL_NONE, 0,
};
switch (s_opengl_mode) {
case MODE_OPENGL:
EGL_ILOG("Setting RENDERABLE_TYPE to EGL_OPENGL_BIT");
attribs[1] = EGL_OPENGL_BIT;
// 1 will be major version, and 3 the minor version.
ctx_attribs[2] = 0x30FB; /* EGL_CONTEXT_MINOR_VERSION_KHR */
// Let's always use a core profile here.
ctx_attribs[4] = 0x30FD; /* EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR */
ctx_attribs[5] = 1; /* EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR */
break;
case MODE_OPENGLES2:
EGL_ILOG("Setting RENDERABLE_TYPE to EGL_OPENGL_ES2_BIT");
attribs[1] = EGL_OPENGL_ES2_BIT;
ctx_attribs[1] = 2;
break;
case MODE_OPENGLES3:
EGL_ILOG("Setting RENDERABLE_TYPE to EGL_OPENGL_ES3_BIT_KHR");
attribs[1] = (1 << 6); /* EGL_OPENGL_ES3_BIT_KHR */
ctx_attribs[1] = 3;
break;
default:
EGL_ELOG("Unknown OpenGL mode set\n");
return false;
break;
}
EGL_ILOG("Calling eglChooseConfig to get number of configs (use16bit=%d)...", (int)use565);
EGLConfig *configs;
EGLint num_configs = 0;
if (!eglChooseConfig(egl_dpy, attribs, NULL, 0, &num_configs) || num_configs == 0) {
EGL_ILOG("Error: couldn't get a number of configs. Trying with fallback config (no stencil, not specifying transparent:none)\n");
attribsFallback[1] = attribs[1];
attribs = attribsFallback;
if (!eglChooseConfig(egl_dpy, attribs, NULL, 0, &num_configs) || num_configs == 0) {
eglTerminate(egl_dpy);
return false;
}
}
EGL_ILOG("Got %d configs. Now choosing...", num_configs);
configs = new EGLConfig[num_configs];
if (!eglChooseConfig(egl_dpy, attribs, configs, num_configs, &num_configs)) {
EGL_ELOG("Error: couldn't get an EGL visual config (num_configs=%d)! Terminating EGL.\n", num_configs);
eglTerminate(egl_dpy);
return false;
}
int chosenConfig = -1;
// Find our ideal config in the list. If it's there, use it, otherwise pick whatever the device wanted (#0)
int wantedAlpha = 8;
// Requiring alpha seems to be a problem on older devices. Let's see if this helps...
if (attribs[1] == EGL_OPENGL_ES2_BIT)
wantedAlpha = 0;
for (int i = 0; i < num_configs; i++) {
EGL_ILOG("Config %d:", i);
LogEGLConfig(egl_dpy, configs[i]);
int red, green, blue, alpha, depth, stencil;
eglGetConfigAttrib(egl_dpy, configs[i], EGL_RED_SIZE, &red);
eglGetConfigAttrib(egl_dpy, configs[i], EGL_GREEN_SIZE, &green);
eglGetConfigAttrib(egl_dpy, configs[i], EGL_BLUE_SIZE, &blue);
eglGetConfigAttrib(egl_dpy, configs[i], EGL_ALPHA_SIZE, &alpha);
eglGetConfigAttrib(egl_dpy, configs[i], EGL_DEPTH_SIZE, &depth);
eglGetConfigAttrib(egl_dpy, configs[i], EGL_STENCIL_SIZE, &stencil);
if (chosenConfig == -1 && red == 8 && green == 8 && blue == 8 && alpha == wantedAlpha && depth == 24 && stencil == 8) {
chosenConfig = i;
}
}
if (chosenConfig == -1)
chosenConfig = 0;
EGL_ILOG("eglChooseConfig successful: num_configs=%d, choosing config %d", num_configs, chosenConfig);
if (s_opengl_mode == MODE_OPENGL) {
EGL_ILOG("eglBindAPI(OPENGL)");
eglBindAPI(EGL_OPENGL_API);
} else {
EGL_ILOG("eglBindAPI(OPENGL_ES)");
eglBindAPI(EGL_OPENGL_ES_API);
}
EGLNativeWindowType host_window = (EGLNativeWindowType)window_handle;
EGLNativeWindowType native_window = InitializePlatform(host_window, configs[chosenConfig]);
const char *s = eglQueryString(egl_dpy, EGL_VERSION);
EGL_ILOG("EGL_VERSION = %s\n", s);
s = eglQueryString(egl_dpy, EGL_VENDOR);
EGL_ILOG("EGL_VENDOR = %s\n", s);
s = eglQueryString(egl_dpy, EGL_EXTENSIONS);
EGL_ILOG("EGL_EXTENSIONS = %s\n", s);
s = eglQueryString(egl_dpy, EGL_CLIENT_APIS);
EGL_ILOG("EGL_CLIENT_APIS = %s\n", s);
if (s_opengl_mode == MODE_OPENGL) {
EGL_ILOG("Finding a good GL version");
egl_ctx = nullptr;
for (int minor = 6; minor >= 0 && !egl_ctx; --minor) {
ctx_attribs[1] = 4;
ctx_attribs[3] = minor;
egl_ctx = eglCreateContext(egl_dpy, configs[chosenConfig], EGL_NO_CONTEXT, ctx_attribs);
}
if (!egl_ctx) {
ctx_attribs[1] = 3;
ctx_attribs[3] = 3;
egl_ctx = eglCreateContext(egl_dpy, configs[chosenConfig], EGL_NO_CONTEXT, ctx_attribs);
}
} else {
egl_ctx = eglCreateContext(egl_dpy, configs[chosenConfig], EGL_NO_CONTEXT, ctx_attribs);
}
if (!egl_ctx) {
EGL_ILOG("Error: eglCreateContext failed: %s\n", EGLGetErrorString(eglGetError()));
delete[] configs;
return false;
}
EGL_ILOG("Successfully created EGL context.\n");
egl_surf = eglCreateWindowSurface(egl_dpy, configs[chosenConfig], native_window, nullptr);
if (!egl_surf) {
EGL_ILOG("Error: eglCreateWindowSurface failed: native_window=%p error=%s ctx_attribs[1]==%d\n", native_window, EGLGetErrorString(eglGetError()), ctx_attribs[1]);
eglDestroyContext(egl_dpy, egl_ctx);
delete[] configs;
return false;
}
EGL_ILOG("Successfully created EGL window surface (window=%p).\n", native_window);
delete[] configs;
return true;
}
// Create rendering window.
bool cInterfaceEGL::Create(void *window_handle, bool core, bool use565) {
EGLint egl_major, egl_minor;
egl_dpy = OpenDisplay();
if (!egl_dpy) {
EGL_ILOG("Error: eglGetDisplay() failed\n");
return false;
}
if (!eglInitialize(egl_dpy, &egl_major, &egl_minor)) {
EGL_ILOG("Error: eglInitialize() failed\n");
return false;
}
EGL_ILOG("eglInitialize() succeeded (use565=%d)\n", (int)use565);
if (s_opengl_mode == MODE_DETECT || s_opengl_mode == MODE_DETECT_ES)
DetectMode();
if (!ChooseAndCreate(window_handle, core, use565) && (s_opengl_mode == MODE_OPENGLES3 || s_opengl_mode == MODE_OPENGL)) {
// Fallback to ES 2.0 and try again.
s_opengl_mode = MODE_OPENGLES2;
if (!ChooseAndCreate(window_handle, core, use565)) {
eglTerminate(egl_dpy);
egl_dpy = nullptr;
return false;
}
}
return true;
}
bool cInterfaceEGL::MakeCurrent() {
return eglMakeCurrent(egl_dpy, egl_surf, egl_surf, egl_ctx);
}
bool cInterfaceEGL::ClearCurrent() {
return eglMakeCurrent(egl_dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
}
void cInterfaceEGL::Shutdown() {
ShutdownPlatform();
if (egl_ctx && !eglMakeCurrent(egl_dpy, egl_surf, egl_surf, egl_ctx)) {
NOTICE_LOG(G3D, "Could not release drawing context.");
}
if (egl_ctx) {
eglMakeCurrent(egl_dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
if (!eglDestroySurface(egl_dpy, egl_surf))
NOTICE_LOG(G3D, "Could not destroy window surface.");
if (!eglDestroyContext(egl_dpy, egl_ctx))
NOTICE_LOG(G3D, "Could not destroy drawing context.");
if (!eglTerminate(egl_dpy))
NOTICE_LOG(G3D, "Could not destroy display connection.");
egl_ctx = nullptr;
egl_dpy = nullptr;
egl_surf = nullptr;
}
}

View file

@ -0,0 +1,42 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <EGL/egl.h>
#include "Common/Log.h"
#include "Common/GL/GLInterfaceBase.h"
#define EGL_ILOG(...) INFO_LOG(G3D, __VA_ARGS__)
#define EGL_ELOG(...) INFO_LOG(G3D, __VA_ARGS__)
class cInterfaceEGL : public cInterfaceBase {
public:
void SwapInterval(int Interval) override;
void Swap() override;
void SetMode(u32 mode) override { s_opengl_mode = mode; }
void* GetFuncAddress(const std::string& name) override;
bool Create(void *window_handle, bool core, bool use565) override;
bool MakeCurrent() override;
bool ClearCurrent() override;
void Shutdown() override;
protected:
EGLSurface egl_surf;
EGLContext egl_ctx;
EGLDisplay egl_dpy;
virtual EGLDisplay OpenDisplay() = 0;
virtual EGLNativeWindowType InitializePlatform(EGLNativeWindowType host_window, EGLConfig config) = 0;
virtual void ShutdownPlatform() = 0;
virtual void SetInternalResolution(int internalWidth, int internalHeight) {}
const char *EGLGetErrorString(EGLint error);
private:
bool ChooseAndCreate(void *window_handle, bool core, bool use565);
void DetectMode();
};

View file

@ -0,0 +1,31 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <android/native_window.h>
#include "Common/Log.h"
#include "Common/GL/GLInterface/EGLAndroid.h"
EGLDisplay cInterfaceEGLAndroid::OpenDisplay() {
return eglGetDisplay(EGL_DEFAULT_DISPLAY);
}
EGLNativeWindowType cInterfaceEGLAndroid::InitializePlatform(EGLNativeWindowType host_window, EGLConfig config) {
EGLint format;
if (EGL_FALSE == eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &format)) {
EGL_ELOG("Failed getting EGL_NATIVE_VISUAL_ID: error %s", EGLGetErrorString(eglGetError()));
return NULL;
}
int32_t result = ANativeWindow_setBuffersGeometry(host_window, internalWidth_, internalHeight_, format);
EGL_ILOG("ANativeWindow_setBuffersGeometry returned %d", result);
const int width = ANativeWindow_getWidth(host_window);
const int height = ANativeWindow_getHeight(host_window);
SetBackBufferDimensions(width, height);
return host_window;
}
void cInterfaceEGLAndroid::ShutdownPlatform() {
}

View file

@ -0,0 +1,24 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/GL/GLInterface/EGL.h"
class cInterfaceEGLAndroid : public cInterfaceEGL {
public:
cInterfaceEGLAndroid() : internalWidth_(0), internalHeight_(0) {}
protected:
EGLDisplay OpenDisplay() override;
EGLNativeWindowType InitializePlatform(EGLNativeWindowType host_window, EGLConfig config) override;
void ShutdownPlatform() override;
void OverrideBackbufferDimensions(int internalWidth, int internalHeight) override {
internalWidth_ = internalWidth;
internalHeight_ = internalHeight;
}
private:
int internalWidth_;
int internalHeight_;
};

View file

@ -0,0 +1,21 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "ppsspp_config.h"
#if PPSSPP_PLATFORM(SWITCH)
#include <switch.h>
#include "Common/Log.h"
#include "Common/GL/GLInterface/EGLSwitch.h"
EGLDisplay cInterfaceEGLSwitch::OpenDisplay() {
return eglGetDisplay(EGL_DEFAULT_DISPLAY);
}
EGLNativeWindowType cInterfaceEGLSwitch::InitializePlatform(EGLNativeWindowType host_window, EGLConfig config) {
return nwindowGetDefault();
}
void cInterfaceEGLSwitch::ShutdownPlatform() {
}
#endif

View file

@ -0,0 +1,24 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/GL/GLInterface/EGL.h"
class cInterfaceEGLSwitch : public cInterfaceEGL {
public:
cInterfaceEGLSwitch() {}
protected:
EGLDisplay OpenDisplay() override;
EGLNativeWindowType InitializePlatform(EGLNativeWindowType host_window, EGLConfig config) override;
void ShutdownPlatform() override;
void OverrideBackbufferDimensions(int internalWidth, int internalHeight) override {
internalWidth_ = internalWidth;
internalHeight_ = internalHeight;
}
private:
int internalWidth_ = 0;
int internalHeight_ = 0;
};

View file

@ -0,0 +1,44 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "ppsspp_config.h"
#include "Common/GL/GLInterfaceBase.h"
#ifdef __ANDROID__
#include "Common/GL/GLInterface/EGLAndroid.h"
#elif PPSSPP_PLATFORM(SWITCH)
#include "Common/GL/GLInterface/EGLSwitch.h"
#elif defined(__APPLE__)
#include "Common/GL/GLInterface/AGL.h"
#elif defined(_WIN32)
#include "Common/GL/GLInterface/WGL.h"
#elif HAVE_X11
#if defined(USE_EGL) && USE_EGL
#include "Common/GL/GLInterface/EGLX11.h"
#else
#include "Common/GL/GLInterface/GLX.h"
#endif
#else
#error Platform doesnt have a GLInterface
#endif
cInterfaceBase* HostGL_CreateGLInterface(){
#ifdef __ANDROID__
return new cInterfaceEGLAndroid;
#elif PPSSPP_PLATFORM(SWITCH)
return new cInterfaceEGLSwitch;
#elif defined(__APPLE__)
return new cInterfaceAGL;
#elif defined(_WIN32)
return new cInterfaceWGL;
#elif defined(HAVE_X11) && HAVE_X11
#if defined(USE_EGL) && USE_EGL
return new cInterfaceEGLX11;
#else
return new cInterfaceGLX;
#endif
#else
return nullptr;
#endif
}

View file

@ -0,0 +1,50 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <string>
#include "Common/CommonTypes.h"
enum GLInterfaceMode {
MODE_DETECT = 0,
MODE_DETECT_ES,
MODE_OPENGL,
MODE_OPENGLES2,
MODE_OPENGLES3,
};
class cInterfaceBase {
protected:
// Window dimensions.
u32 s_backbuffer_width;
u32 s_backbuffer_height;
u32 s_opengl_mode;
public:
cInterfaceBase() : s_backbuffer_width(0), s_backbuffer_height(0), s_opengl_mode(MODE_DETECT) {}
virtual ~cInterfaceBase() {}
virtual void Swap() {}
virtual void SetMode(u32 mode) { s_opengl_mode = GLInterfaceMode::MODE_OPENGL; }
virtual u32 GetMode() { return s_opengl_mode; }
virtual void* GetFuncAddress(const std::string& name) { return nullptr; }
virtual bool Create(void *window_handle, bool core = true, bool use16bit = false) = 0;
virtual bool MakeCurrent() { return true; }
virtual bool ClearCurrent() { return true; }
virtual void Shutdown() {}
virtual void SwapInterval(int Interval) { }
virtual u32 GetBackBufferWidth() { return s_backbuffer_width; }
virtual u32 GetBackBufferHeight() { return s_backbuffer_height; }
virtual void OverrideBackbufferDimensions(int w, int h) = 0;
virtual void SetBackBufferDimensions(u32 W, u32 H) {s_backbuffer_width = W; s_backbuffer_height = H; }
virtual void Update() { }
virtual bool PeekMessages() { return false; }
};
cInterfaceBase* HostGL_CreateGLInterface();

View file

@ -11,6 +11,7 @@ static HMODULE g_D3DCompileModule;
LPCREATEDXGIFACTORY ptr_CreateDXGIFactory;
LPD3D11CREATEDEVICE ptr_D3D11CreateDevice;
LPD3D11CREATEDEVICEANDSWAPCHAIN ptr_D3D11CreateDeviceAndSwapChain;
pD3DCompile ptr_D3DCompile;
LoadD3D11Error LoadD3D11() {
@ -21,6 +22,7 @@ LoadD3D11Error LoadD3D11() {
g_D3D11Module = LoadLibrary(L"d3d11.dll");
if (g_D3D11Module) {
ptr_D3D11CreateDevice = (LPD3D11CREATEDEVICE)GetProcAddress(g_D3D11Module, "D3D11CreateDevice");
ptr_D3D11CreateDeviceAndSwapChain = (LPD3D11CREATEDEVICEANDSWAPCHAIN)GetProcAddress(g_D3D11Module, "D3D11CreateDeviceAndSwapChain");
} else {
return LoadD3D11Error::FAIL_NO_D3D11;
}

View file

@ -14,10 +14,12 @@
#endif
typedef HRESULT (WINAPI *LPCREATEDXGIFACTORY)(REFIID, void **);
typedef HRESULT (WINAPI *LPD3D11CREATEDEVICEANDSWAPCHAIN)(__in_opt IDXGIAdapter *pAdapter, D3D_DRIVER_TYPE DriverType, HMODULE Software, UINT Flags, __in_ecount_opt(FeatureLevels) CONST D3D_FEATURE_LEVEL *pFeatureLevels, UINT FeatureLevels, UINT SDKVersion, __in_opt CONST DXGI_SWAP_CHAIN_DESC *pSwapChainDesc, __out_opt IDXGISwapChain **ppSwapChain, __out_opt ID3D11Device **ppDevice, __out_opt D3D_FEATURE_LEVEL *pFeatureLevel, __out_opt ID3D11DeviceContext **ppImmediateContext);
typedef HRESULT (WINAPI *LPD3D11CREATEDEVICE)(IDXGIAdapter *, D3D_DRIVER_TYPE, HMODULE, UINT32, D3D_FEATURE_LEVEL *, UINT, UINT32, ID3D11Device **, D3D_FEATURE_LEVEL *, ID3D11DeviceContext **);
extern LPCREATEDXGIFACTORY ptr_CreateDXGIFactory;
extern LPD3D11CREATEDEVICE ptr_D3D11CreateDevice;
extern LPD3D11CREATEDEVICEANDSWAPCHAIN ptr_D3D11CreateDeviceAndSwapChain;
extern pD3DCompile ptr_D3DCompile;
enum class LoadD3D11Error {

View file

@ -11,7 +11,6 @@
#include "Common/Data/Convert/ColorConv.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Data/Encoding/Utf8.h"
#include "Common/TimeUtil.h"
#include "Common/Log.h"
#include <map>
@ -63,7 +62,7 @@ public:
class D3D11DrawContext : public DrawContext {
public:
D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *deviceContext, ID3D11Device1 *device1, ID3D11DeviceContext1 *deviceContext1, IDXGISwapChain *swapChain, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> deviceList, int maxInflightFrames);
D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *deviceContext, ID3D11Device1 *device1, ID3D11DeviceContext1 *deviceContext1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> deviceList);
~D3D11DrawContext();
const DeviceCaps &GetDeviceCaps() const override {
@ -76,6 +75,10 @@ public:
return (uint32_t)ShaderLanguage::HLSL_D3D11;
}
uint32_t GetDataFormatSupport(DataFormat fmt) const override;
PresentationMode GetPresentationMode() const override {
// TODO: Fix. Not yet used.
return PresentationMode::FIFO;
}
InputLayout *CreateInputLayout(const InputLayoutDesc &desc) override;
DepthStencilState *CreateDepthStencilState(const DepthStencilStateDesc &desc) override;
@ -89,7 +92,6 @@ public:
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
@ -106,7 +108,7 @@ public:
void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) override;
void BindNativeTexture(int index, void *nativeTexture) override;
void BindSamplerStates(int start, int count, SamplerState **states) override;
void BindVertexBuffer(Buffer *buffers, int offset) override;
void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override;
void BindIndexBuffer(Buffer *indexBuffer, int offset) override;
void BindPipeline(Pipeline *pipeline) override;
@ -128,17 +130,14 @@ public:
stencilDirty_ = true;
}
void EndFrame() override;
void Draw(int vertexCount, int offset) override;
void DrawIndexed(int vertexCount, int offset) override;
void DrawUP(const void *vdata, int vertexCount) override;
void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override;
void BeginFrame(DebugFlags debugFlags) override;
void EndFrame() override;
void Present(PresentMode presentMode, int vblanks) override;
int GetFrameCount() override { return frameCount_; }
void BeginFrame() override;
std::string GetInfoString(InfoField info) const override {
switch (info) {
@ -179,10 +178,9 @@ private:
HWND hWnd_;
ID3D11Device *device_;
ID3D11Device1 *device1_;
ID3D11DeviceContext *context_;
ID3D11Device1 *device1_;
ID3D11DeviceContext1 *context1_;
IDXGISwapChain *swapChain_;
ID3D11Texture2D *bbRenderTargetTex_ = nullptr; // NOT OWNED
ID3D11RenderTargetView *bbRenderTargetView_ = nullptr;
@ -214,15 +212,14 @@ private:
ID3D11GeometryShader *curGS_ = nullptr;
D3D11_PRIMITIVE_TOPOLOGY curTopology_ = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
ID3D11Buffer *nextVertexBuffer_ = nullptr;
UINT nextVertexBufferOffset_ = 0;
ID3D11Buffer *nextVertexBuffers_[4]{};
int nextVertexBufferOffsets_[4]{};
bool dirtyIndexBuffer_ = false;
ID3D11Buffer *nextIndexBuffer_ = nullptr;
UINT nextIndexBufferOffset_ = 0;
int nextIndexBufferOffset_ = 0;
InvalidationCallback invalidationCallback_;
int frameCount_ = FRAME_TIME_HISTORY_LENGTH;
// Dynamic state
float blendFactor_[4]{};
@ -243,14 +240,13 @@ private:
std::vector<std::string> deviceList_;
};
D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *deviceContext, ID3D11Device1 *device1, ID3D11DeviceContext1 *deviceContext1, IDXGISwapChain *swapChain, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> deviceList, int maxInflightFrames)
D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *deviceContext, ID3D11Device1 *device1, ID3D11DeviceContext1 *deviceContext1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> deviceList)
: hWnd_(hWnd),
device_(device),
context_(deviceContext1),
device1_(device1),
context1_(deviceContext1),
featureLevel_(featureLevel),
swapChain_(swapChain),
deviceList_(deviceList) {
// We no longer support Windows Phone.
@ -281,10 +277,6 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
caps_.blendMinMaxSupported = true;
caps_.multiSampleLevelsMask = 1; // More could be supported with some work.
caps_.presentInstantModeChange = true;
caps_.presentMaxInterval = 4;
caps_.presentModesSupported = PresentMode::FIFO | PresentMode::IMMEDIATE;
D3D11_FEATURE_DATA_D3D11_OPTIONS options{};
HRESULT result = device_->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options));
if (SUCCEEDED(result)) {
@ -350,13 +342,6 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
const size_t UP_MAX_BYTES = 65536 * 24;
upBuffer_ = CreateBuffer(UP_MAX_BYTES, BufferUsageFlag::DYNAMIC | BufferUsageFlag::VERTEXDATA);
IDXGIDevice1 *dxgiDevice1 = nullptr;
hr = device_->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast<void **>(&dxgiDevice1));
if (SUCCEEDED(hr)) {
caps_.setMaxFrameLatencySupported = true;
dxgiDevice1->SetMaximumFrameLatency(maxInflightFrames);
}
}
D3D11DrawContext::~D3D11DrawContext() {
@ -427,31 +412,18 @@ void D3D11DrawContext::HandleEvent(Event ev, int width, int height, void *param1
curRTHeight_ = height;
break;
}
case Event::PRESENTED:
// Make sure that we don't eliminate the next time the render target is set.
curRenderTargetView_ = nullptr;
curDepthStencilView_ = nullptr;
break;
}
}
void D3D11DrawContext::EndFrame() {
// Fake a submit time.
frameTimeHistory_[frameCount_].firstSubmit = time_now_d();
curPipeline_ = nullptr;
}
void D3D11DrawContext::Present(PresentMode presentMode, int vblanks) {
frameTimeHistory_[frameCount_].queuePresent = time_now_d();
int interval = vblanks;
if (presentMode != PresentMode::FIFO) {
interval = 0;
}
// Safety for libretro
if (swapChain_) {
swapChain_->Present(interval, 0);
}
curRenderTargetView_ = nullptr;
curDepthStencilView_ = nullptr;
frameCount_++;
}
void D3D11DrawContext::SetViewport(const Viewport &viewport) {
DisplayRect<float> rc{ viewport.TopLeftX , viewport.TopLeftY, viewport.Width, viewport.Height };
if (curRenderTargetView_ == bbRenderTargetView_) // Only the backbuffer is actually rotated wrong!
@ -725,7 +697,7 @@ public:
D3D11InputLayout() {}
InputLayoutDesc desc;
std::vector<D3D11_INPUT_ELEMENT_DESC> elements;
UINT stride; // type to match function parameter
std::vector<int> strides;
};
const char *semanticToD3D11(int semantic, UINT *index) {
@ -752,13 +724,15 @@ InputLayout *D3D11DrawContext::CreateInputLayout(const InputLayoutDesc &desc) {
D3D11_INPUT_ELEMENT_DESC el;
el.AlignedByteOffset = desc.attributes[i].offset;
el.Format = dataFormatToD3D11(desc.attributes[i].format);
el.InstanceDataStepRate = 0;
el.InputSlot = 0;
el.InstanceDataStepRate = desc.bindings[desc.attributes[i].binding].instanceRate ? 1 : 0;
el.InputSlot = desc.attributes[i].binding;
el.SemanticName = semanticToD3D11(desc.attributes[i].location, &el.SemanticIndex);
el.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
el.InputSlotClass = desc.bindings[desc.attributes[i].binding].instanceRate ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA;
inputLayout->elements.push_back(el);
}
inputLayout->stride = desc.stride;
for (size_t i = 0; i < desc.bindings.size(); i++) {
inputLayout->strides.push_back(desc.bindings[i].stride);
}
return inputLayout;
}
@ -821,83 +795,35 @@ public:
width_ = desc.width;
height_ = desc.height;
depth_ = desc.depth;
format_ = desc.format;
mipLevels_ = desc.mipLevels;
}
~D3D11Texture() {
if (tex_)
tex_->Release();
if (stagingTex_)
stagingTex_->Release();
if (view_)
view_->Release();
if (tex)
tex->Release();
if (stagingTex)
stagingTex->Release();
if (view)
view->Release();
}
bool Create(ID3D11DeviceContext *context, ID3D11Device *device, const TextureDesc &desc, bool generateMips);
bool CreateStagingTexture(ID3D11Device *device);
void UpdateTextureLevels(ID3D11DeviceContext *context, ID3D11Device *device, Texture *texture, const uint8_t *const *data, TextureCallback initDataCallback, int numLevels);
ID3D11ShaderResourceView *View() { return view_; }
private:
bool FillLevel(ID3D11DeviceContext *context, int level, int w, int h, int d, const uint8_t *const *data, TextureCallback initDataCallback);
ID3D11Texture2D *tex_ = nullptr;
ID3D11Texture2D *stagingTex_ = nullptr;
ID3D11ShaderResourceView *view_ = nullptr;
int mipLevels_ = 0;
ID3D11Texture2D *tex = nullptr;
ID3D11Texture2D *stagingTex = nullptr;
ID3D11ShaderResourceView *view = nullptr;
};
bool D3D11Texture::FillLevel(ID3D11DeviceContext *context, int level, int w, int h, int d, const uint8_t *const *data, TextureCallback initDataCallback) {
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = context->Map(stagingTex_, level, D3D11_MAP_WRITE, 0, &mapped);
if (!SUCCEEDED(hr)) {
tex_->Release();
tex_ = nullptr;
return false;
Texture *D3D11DrawContext::CreateTexture(const TextureDesc &desc) {
if (!(GetDataFormatSupport(desc.format) & FMT_TEXTURE)) {
// D3D11 does not support this format as a texture format.
return nullptr;
}
if (!initDataCallback((uint8_t *)mapped.pData, data[level], w, h, d, mapped.RowPitch, mapped.DepthPitch)) {
for (int s = 0; s < d; ++s) {
for (int y = 0; y < h; ++y) {
void *dest = (uint8_t *)mapped.pData + mapped.DepthPitch * s + mapped.RowPitch * y;
uint32_t byteStride = w * (uint32_t)DataFormatSizeInBytes(format_);
const void *src = data[level] + byteStride * (y + h * s);
memcpy(dest, src, byteStride);
}
}
D3D11Texture *tex = new D3D11Texture(desc);
bool generateMips = desc.generateMips;
if (desc.generateMips && !(GetDataFormatSupport(desc.format) & FMT_AUTOGEN_MIPS)) {
// D3D11 does not support autogenerating mipmaps for this format.
generateMips = false;
}
context->Unmap(stagingTex_, level);
return true;
}
bool D3D11Texture::CreateStagingTexture(ID3D11Device *device) {
if (stagingTex_)
return true;
D3D11_TEXTURE2D_DESC descColor{};
descColor.Width = width_;
descColor.Height = height_;
descColor.MipLevels = mipLevels_;
descColor.ArraySize = 1;
descColor.Format = dataFormatToD3D11(format_);
descColor.SampleDesc.Count = 1;
descColor.SampleDesc.Quality = 0;
descColor.Usage = D3D11_USAGE_STAGING;
descColor.BindFlags = 0;
descColor.MiscFlags = 0;
descColor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = device->CreateTexture2D(&descColor, nullptr, &stagingTex_);
if (!SUCCEEDED(hr)) {
stagingTex_->Release();
stagingTex_ = nullptr;
return false;
}
return true;
}
bool D3D11Texture::Create(ID3D11DeviceContext *context, ID3D11Device *device, const TextureDesc &desc, bool generateMips) {
D3D11_TEXTURE2D_DESC descColor{};
descColor.Width = desc.width;
descColor.Height = desc.height;
@ -906,16 +832,25 @@ bool D3D11Texture::Create(ID3D11DeviceContext *context, ID3D11Device *device, co
descColor.Format = dataFormatToD3D11(desc.format);
descColor.SampleDesc.Count = 1;
descColor.SampleDesc.Quality = 0;
if (desc.initDataCallback) {
descColor.Usage = D3D11_USAGE_STAGING;
descColor.BindFlags = 0;
descColor.MiscFlags = 0;
descColor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = device_->CreateTexture2D(&descColor, nullptr, &tex->stagingTex);
if (!SUCCEEDED(hr)) {
delete tex;
return nullptr;
}
}
descColor.Usage = D3D11_USAGE_DEFAULT;
descColor.BindFlags = generateMips ? (D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) : D3D11_BIND_SHADER_RESOURCE;
descColor.MiscFlags = generateMips ? D3D11_RESOURCE_MISC_GENERATE_MIPS : 0;
descColor.CPUAccessFlags = 0;
// Make sure we have a staging texture if we'll need it.
if (desc.initDataCallback && !CreateStagingTexture(device)) {
return false;
}
D3D11_SUBRESOURCE_DATA *initDataParam = nullptr;
D3D11_SUBRESOURCE_DATA initData[12]{};
std::vector<uint8_t> initDataBuffer[12];
@ -935,39 +870,62 @@ bool D3D11Texture::Create(ID3D11DeviceContext *context, ID3D11Device *device, co
initDataParam = initData;
}
HRESULT hr = device->CreateTexture2D(&descColor, initDataParam, &tex_);
HRESULT hr = device_->CreateTexture2D(&descColor, initDataParam, &tex->tex);
if (!SUCCEEDED(hr)) {
tex_ = nullptr;
return false;
delete tex;
return nullptr;
}
hr = device->CreateShaderResourceView(tex_, nullptr, &view_);
hr = device_->CreateShaderResourceView(tex->tex, nullptr, &tex->view);
if (!SUCCEEDED(hr)) {
return false;
delete tex;
return nullptr;
}
auto populateLevelCallback = [&](int level, int w, int h, int d) {
D3D11_MAPPED_SUBRESOURCE mapped;
hr = context_->Map(tex->stagingTex, level, D3D11_MAP_WRITE, 0, &mapped);
if (!SUCCEEDED(hr)) {
return false;
}
if (!desc.initDataCallback((uint8_t *)mapped.pData, desc.initData[level], w, h, d, mapped.RowPitch, mapped.DepthPitch)) {
for (int s = 0; s < d; ++s) {
for (int y = 0; y < h; ++y) {
void *dest = (uint8_t *)mapped.pData + mapped.DepthPitch * s + mapped.RowPitch * y;
uint32_t byteStride = w * (uint32_t)DataFormatSizeInBytes(desc.format);
const void *src = desc.initData[level] + byteStride * (y + h * d);
memcpy(dest, src, byteStride);
}
}
}
context_->Unmap(tex->stagingTex, level);
return true;
};
if (generateMips && desc.initData.size() >= 1) {
if (desc.initDataCallback) {
if (!FillLevel(context, 0, desc.width, desc.height, desc.depth, desc.initData.data(), desc.initDataCallback)) {
tex_->Release();
return false;
if (!populateLevelCallback(0, desc.width, desc.height, desc.depth)) {
delete tex;
return nullptr;
}
context->CopyResource(tex_, stagingTex_);
stagingTex_->Release();
stagingTex_ = nullptr;
context_->CopyResource(tex->stagingTex, tex->stagingTex);
tex->stagingTex->Release();
tex->stagingTex = nullptr;
} else {
uint32_t byteStride = desc.width * (uint32_t)DataFormatSizeInBytes(desc.format);
context->UpdateSubresource(tex_, 0, nullptr, desc.initData[0], byteStride, 0);
context_->UpdateSubresource(tex->tex, 0, nullptr, desc.initData[0], byteStride, 0);
}
context->GenerateMips(view_);
context_->GenerateMips(tex->view);
} else if (desc.initDataCallback) {
int w = desc.width;
int h = desc.height;
int d = desc.depth;
for (int i = 0; i < (int)desc.initData.size(); i++) {
if (!FillLevel(context, i, w, h, d, desc.initData.data(), desc.initDataCallback)) {
if (!populateLevelCallback(i, desc.width, desc.height, desc.depth)) {
if (i == 0) {
return false;
delete tex;
return nullptr;
} else {
break;
}
@ -978,62 +936,13 @@ bool D3D11Texture::Create(ID3D11DeviceContext *context, ID3D11Device *device, co
d = (d + 1) / 2;
}
context->CopyResource(tex_, stagingTex_);
stagingTex_->Release();
stagingTex_ = nullptr;
context_->CopyResource(tex->tex, tex->stagingTex);
tex->stagingTex->Release();
tex->stagingTex = nullptr;
}
return true;
}
void D3D11Texture::UpdateTextureLevels(ID3D11DeviceContext *context, ID3D11Device *device, Texture *texture, const uint8_t * const*data, TextureCallback initDataCallback, int numLevels) {
if (!CreateStagingTexture(device)) {
return;
}
int w = width_;
int h = height_;
int d = depth_;
for (int i = 0; i < (int)numLevels; i++) {
if (!FillLevel(context, i, w, h, d, data, initDataCallback)) {
break;
}
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
context->CopyResource(tex_, stagingTex_);
stagingTex_->Release();
stagingTex_ = nullptr;
}
Texture *D3D11DrawContext::CreateTexture(const TextureDesc &desc) {
if (!(GetDataFormatSupport(desc.format) & FMT_TEXTURE)) {
// D3D11 does not support this format as a texture format.
return nullptr;
}
D3D11Texture *tex = new D3D11Texture(desc);
bool generateMips = desc.generateMips;
if (desc.generateMips && !(GetDataFormatSupport(desc.format) & FMT_AUTOGEN_MIPS)) {
// D3D11 does not support autogenerating mipmaps for this format.
generateMips = false;
}
if (!tex->Create(context_, device_, desc, generateMips)) {
tex->Release();
return nullptr;
}
return tex;
}
void D3D11DrawContext::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
D3D11Texture *tex = (D3D11Texture *)texture;
tex->UpdateTextureLevels(context_, device_, texture, data, initDataCallback, numLevels);
}
ShaderModule *D3D11DrawContext::CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t dataSize, const char *tag) {
if (language != ShaderLanguage::HLSL_D3D11) {
ERROR_LOG(G3D, "Unsupported shader language");
@ -1251,7 +1160,8 @@ void D3D11DrawContext::ApplyCurrentState() {
}
if (curPipeline_->input != nullptr) {
context_->IASetVertexBuffers(0, 1, &nextVertexBuffer_, &curPipeline_->input->stride, &nextVertexBufferOffset_);
int numVBs = (int)curPipeline_->input->strides.size();
context_->IASetVertexBuffers(0, numVBs, nextVertexBuffers_, (UINT *)curPipeline_->input->strides.data(), (UINT *)nextVertexBufferOffsets_);
}
if (dirtyIndexBuffer_) {
context_->IASetIndexBuffer(nextIndexBuffer_, DXGI_FORMAT_R16_UINT, nextIndexBufferOffset_);
@ -1320,11 +1230,14 @@ void D3D11DrawContext::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t
context_->UpdateSubresource(buf->buf, 0, &box, data, 0, 0);
}
void D3D11DrawContext::BindVertexBuffer(Buffer *buffer, int offset) {
void D3D11DrawContext::BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) {
_assert_(start + count <= ARRAY_SIZE(nextVertexBuffers_));
// Lazy application
D3D11Buffer *buf = (D3D11Buffer *)buffer;
nextVertexBuffer_ = buf->buf;
nextVertexBufferOffset_ = offset;
for (int i = 0; i < count; i++) {
D3D11Buffer *buf = (D3D11Buffer *)buffers[i];
nextVertexBuffers_[start + i] = buf->buf;
nextVertexBufferOffsets_[start + i] = offsets ? offsets[i] : 0;
}
}
void D3D11DrawContext::BindIndexBuffer(Buffer *indexBuffer, int offset) {
@ -1348,10 +1261,10 @@ void D3D11DrawContext::DrawIndexed(int indexCount, int offset) {
void D3D11DrawContext::DrawUP(const void *vdata, int vertexCount) {
ApplyCurrentState();
int byteSize = vertexCount * curPipeline_->input->stride;
int byteSize = vertexCount * curPipeline_->input->strides[0];
UpdateBuffer(upBuffer_, (const uint8_t *)vdata, 0, byteSize, Draw::UPDATE_DISCARD);
BindVertexBuffer(upBuffer_, 0);
BindVertexBuffers(0, 1, &upBuffer_, nullptr);
int offset = 0;
Draw(vertexCount, offset);
}
@ -1498,7 +1411,7 @@ void D3D11DrawContext::BindTextures(int start, int count, Texture **textures, Te
_assert_(start + count <= ARRAY_SIZE(views));
for (int i = 0; i < count; i++) {
D3D11Texture *tex = (D3D11Texture *)textures[i];
views[i] = tex ? tex->View() : nullptr;
views[i] = tex ? tex->view : nullptr;
}
context_->PSSetShaderResources(start, count, views);
}
@ -1535,11 +1448,7 @@ void D3D11DrawContext::Clear(int mask, uint32_t colorval, float depthVal, int st
}
}
void D3D11DrawContext::BeginFrame(DebugFlags debugFlags) {
FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameCount_);
frameTimeData.afterFenceWait = time_now_d();
frameTimeData.frameBegin = frameTimeData.afterFenceWait;
void D3D11DrawContext::BeginFrame() {
context_->OMSetRenderTargets(1, &curRenderTargetView_, curDepthStencilView_);
if (curBlend_ != nullptr) {
@ -1559,7 +1468,7 @@ void D3D11DrawContext::BeginFrame(DebugFlags debugFlags) {
context_->IASetPrimitiveTopology(curTopology_);
}
if (curPipeline_ != nullptr) {
context_->IASetVertexBuffers(0, 1, &nextVertexBuffer_, &curPipeline_->input->stride, &nextVertexBufferOffset_);
context_->IASetVertexBuffers(0, 1, nextVertexBuffers_, (UINT *)curPipeline_->input->strides.data(), (UINT *)nextVertexBufferOffsets_);
context_->IASetIndexBuffer(nextIndexBuffer_, DXGI_FORMAT_R16_UINT, nextIndexBufferOffset_);
if (curPipeline_->dynamicUniforms) {
context_->VSSetConstantBuffers(0, 1, &curPipeline_->dynamicUniforms);
@ -1862,7 +1771,7 @@ uint64_t D3D11DrawContext::GetNativeObject(NativeObject obj, void *srcObject) {
case NativeObject::FEATURE_LEVEL:
return (uint64_t)(uintptr_t)featureLevel_;
case NativeObject::TEXTURE_VIEW:
return (uint64_t)(((D3D11Texture *)srcObject)->View());
return (uint64_t)(((D3D11Texture *)srcObject)->view);
default:
return 0;
}
@ -1879,8 +1788,8 @@ void D3D11DrawContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h
}
}
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, IDXGISwapChain *swapChain, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> adapterNames, int maxInflightFrames) {
return new D3D11DrawContext(device, context, device1, context1, swapChain, featureLevel, hWnd, adapterNames, maxInflightFrames);
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> adapterNames) {
return new D3D11DrawContext(device, context, device1, context1, featureLevel, hWnd, adapterNames);
}
} // namespace Draw

View file

@ -25,7 +25,6 @@
#include "Common/GPU/D3D9/D3D9StateCache.h"
#include "Common/OSVersion.h"
#include "Common/StringUtils.h"
#include "Common/TimeUtil.h"
#include "Common/Log.h"
@ -231,14 +230,14 @@ public:
decl_->Release();
}
}
int GetStride() const { return stride_; }
int GetStride(int binding) const { return stride_[binding]; }
void Apply(LPDIRECT3DDEVICE9 device) {
device->SetVertexDeclaration(decl_);
}
private:
LPDIRECT3DVERTEXDECLARATION9 decl_;
int stride_;
int stride_[4];
};
class D3D9ShaderModule : public ShaderModule {
@ -309,14 +308,14 @@ public:
return nullptr;
}
}
void UpdateTextureLevels(const uint8_t * const *data, int numLevels, TextureCallback initDataCallback);
private:
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback initDataCallback);
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback);
bool Create(const TextureDesc &desc);
LPDIRECT3DDEVICE9 device_;
LPDIRECT3DDEVICE9EX deviceEx_;
TextureType type_;
DataFormat format_;
D3DFORMAT d3dfmt_;
LPDIRECT3DTEXTURE9 tex_ = nullptr;
LPDIRECT3DVOLUMETEXTURE9 volTex_ = nullptr;
@ -375,31 +374,27 @@ bool D3D9Texture::Create(const TextureDesc &desc) {
break;
}
if (FAILED(hr)) {
ERROR_LOG(G3D, "D3D9 Texture creation failed");
ERROR_LOG(G3D, "Texture creation failed");
return false;
}
if (desc.initData.size()) {
// In D3D9, after setting D3DUSAGE_AUTOGENMIPS, we can only access the top layer. The rest will be
// automatically generated.
int numLevels = desc.generateMips ? 1 : (int)desc.initData.size();
UpdateTextureLevels(desc.initData.data(), numLevels, desc.initDataCallback);
int maxLevel = desc.generateMips ? 1 : (int)desc.initData.size();
int w = desc.width;
int h = desc.height;
int d = desc.depth;
for (int i = 0; i < maxLevel; i++) {
SetImageData(0, 0, 0, w, h, d, i, 0, desc.initData[i], desc.initDataCallback);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
}
return true;
}
void D3D9Texture::UpdateTextureLevels(const uint8_t * const *data, int numLevels, TextureCallback initDataCallback) {
int w = width_;
int h = height_;
int d = depth_;
for (int i = 0; i < numLevels; i++) {
SetImageData(0, 0, 0, w, h, d, i, 0, data[i], initDataCallback);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
}
// Just switches R and G.
inline uint32_t Shuffle8888(uint32_t x) {
return (x & 0xFF00FF00) | ((x >> 16) & 0xFF) | ((x << 16) & 0xFF0000);
@ -519,6 +514,10 @@ public:
return (uint32_t)ShaderLanguage::HLSL_D3D9;
}
uint32_t GetDataFormatSupport(DataFormat fmt) const override;
PresentationMode GetPresentationMode() const override {
// TODO: Fix. Not yet used.
return PresentationMode::FIFO;
}
ShaderModule *CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t dataSize, const char *tag) override;
DepthStencilState *CreateDepthStencilState(const DepthStencilStateDesc &desc) override;
@ -533,7 +532,6 @@ public:
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override {
// Not implemented
@ -560,9 +558,12 @@ public:
s->Apply(device_, start + i);
}
}
void BindVertexBuffer(Buffer *vertexBuffer, int offset) override {
curVBuffer_ = (D3D9Buffer *)vertexBuffer;
curVBufferOffset_ = offset;
void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override {
_assert_(start + count <= ARRAY_SIZE(curVBuffers_));
for (int i = 0; i < count; i++) {
curVBuffers_[i + start] = (D3D9Buffer *)buffers[i];
curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0;
}
}
void BindIndexBuffer(Buffer *indexBuffer, int offset) override {
curIBuffer_ = (D3D9Buffer *)indexBuffer;
@ -573,11 +574,7 @@ public:
curPipeline_ = (D3D9Pipeline *)pipeline;
}
void BeginFrame(Draw::DebugFlags debugFlags) override;
void EndFrame() override;
void Present(PresentMode presentMode, int vblanks) override;
int GetFrameCount() override { return frameCount_; }
void UpdateDynamicUniformBuffer(const void *ub, size_t size) override;
@ -638,12 +635,11 @@ private:
D3DCAPS9 d3dCaps_;
char shadeLangVersion_[64]{};
DeviceCaps caps_{};
int frameCount_ = FRAME_TIME_HISTORY_LENGTH;
// Bound state
AutoRef<D3D9Pipeline> curPipeline_;
AutoRef<D3D9Buffer> curVBuffer_;
int curVBufferOffset_ = 0;
AutoRef<D3D9Buffer> curVBuffers_[4];
int curVBufferOffsets_[4]{};
AutoRef<D3D9Buffer> curIBuffer_;
int curIBufferOffset_ = 0;
AutoRef<Framebuffer> curRenderTarget_;
@ -780,9 +776,6 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.multiSampleLevelsMask = 1; // More could be supported with some work.
caps_.clipPlanesSupported = caps.MaxUserClipPlanes;
caps_.presentInstantModeChange = false;
caps_.presentMaxInterval = 1;
caps_.presentModesSupported = PresentMode::FIFO;
if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {
caps_.anisoSupported = true;
@ -941,12 +934,6 @@ Texture *D3D9Context::CreateTexture(const TextureDesc &desc) {
return tex;
}
void D3D9Context::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
D3D9Texture *tex = (D3D9Texture *)texture;
tex->UpdateTextureLevels(data, numLevels, initDataCallback);
}
void D3D9Context::BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) {
_assert_(start + count <= MAX_BOUND_TEXTURES);
for (int i = start; i < start + count; i++) {
@ -964,31 +951,10 @@ void D3D9Context::BindNativeTexture(int index, void *nativeTexture) {
device_->SetTexture(index, texture);
}
void D3D9Context::BeginFrame(Draw::DebugFlags debugFlags) {
FrameTimeData frameTimeData = frameTimeHistory_.Add(frameCount_);
frameTimeData.frameBegin = time_now_d();
frameTimeData.afterFenceWait = frameTimeData.frameBegin; // no fence wait
}
void D3D9Context::EndFrame() {
frameTimeHistory_[frameCount_].firstSubmit = time_now_d();
curPipeline_ = nullptr;
}
void D3D9Context::Present(PresentMode presentMode, int vblanks) {
frameTimeHistory_[frameCount_].queuePresent = time_now_d();
if (deviceEx_) {
deviceEx_->EndScene();
deviceEx_->PresentEx(NULL, NULL, NULL, NULL, 0);
deviceEx_->BeginScene();
} else {
device_->EndScene();
device_->Present(NULL, NULL, NULL, NULL);
device_->BeginScene();
}
frameCount_++;
}
static void SemanticToD3D9UsageAndIndex(int semantic, BYTE *usage, BYTE *index) {
*index = 0;
switch (semantic) {
@ -1025,7 +991,7 @@ D3D9InputLayout::D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc
D3DVERTEXELEMENT9 *elements = new D3DVERTEXELEMENT9[desc.attributes.size() + 1];
size_t i;
for (i = 0; i < desc.attributes.size(); i++) {
elements[i].Stream = 0;
elements[i].Stream = desc.attributes[i].binding;
elements[i].Offset = desc.attributes[i].offset;
elements[i].Method = D3DDECLMETHOD_DEFAULT;
SemanticToD3D9UsageAndIndex(desc.attributes[i].location, &elements[i].Usage, &elements[i].UsageIndex);
@ -1035,7 +1001,9 @@ D3D9InputLayout::D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc
// Zero the last one.
memcpy(&elements[i], &end, sizeof(elements[i]));
stride_ = desc.stride;
for (i = 0; i < desc.bindings.size(); i++) {
stride_[i] = desc.bindings[i].stride;
}
HRESULT hr = device->CreateVertexDeclaration(elements, &decl_);
if (FAILED(hr)) {
@ -1169,7 +1137,7 @@ inline int D3DPrimCount(D3DPRIMITIVETYPE prim, int size) {
}
void D3D9Context::Draw(int vertexCount, int offset) {
device_->SetStreamSource(0, curVBuffer_->vbuffer_, curVBufferOffset_, curPipeline_->inputLayout->GetStride());
device_->SetStreamSource(0, curVBuffers_[0]->vbuffer_, curVBufferOffsets_[0], curPipeline_->inputLayout->GetStride(0));
curPipeline_->inputLayout->Apply(device_);
curPipeline_->Apply(device_, stencilRef_, stencilWriteMask_, stencilCompareMask_);
ApplyDynamicState();
@ -1180,7 +1148,7 @@ void D3D9Context::DrawIndexed(int vertexCount, int offset) {
curPipeline_->inputLayout->Apply(device_);
curPipeline_->Apply(device_, stencilRef_, stencilWriteMask_, stencilCompareMask_);
ApplyDynamicState();
device_->SetStreamSource(0, curVBuffer_->vbuffer_, curVBufferOffset_, curPipeline_->inputLayout->GetStride());
device_->SetStreamSource(0, curVBuffers_[0]->vbuffer_, curVBufferOffsets_[0], curPipeline_->inputLayout->GetStride(0));
device_->SetIndices(curIBuffer_->ibuffer_);
device_->DrawIndexedPrimitive(curPipeline_->prim, 0, 0, vertexCount, offset, D3DPrimCount(curPipeline_->prim, vertexCount));
}
@ -1190,7 +1158,7 @@ void D3D9Context::DrawUP(const void *vdata, int vertexCount) {
curPipeline_->Apply(device_, stencilRef_, stencilWriteMask_, stencilCompareMask_);
ApplyDynamicState();
device_->DrawPrimitiveUP(curPipeline_->prim, D3DPrimCount(curPipeline_->prim, vertexCount), vdata, curPipeline_->inputLayout->GetStride());
device_->DrawPrimitiveUP(curPipeline_->prim, D3DPrimCount(curPipeline_->prim, vertexCount), vdata, curPipeline_->inputLayout->GetStride(0));
}
static uint32_t SwapRB(uint32_t c) {

View file

@ -1,28 +0,0 @@
#include <mutex>
#include <set>
#include "Common/GPU/GPUBackendCommon.h"
// Global push buffer tracker for GPU memory profiling.
// Don't want to manually dig up all the active push buffers.
static std::mutex g_pushBufferListMutex;
static std::set<GPUMemoryManager *> g_pushBuffers;
std::vector<GPUMemoryManager *> GetActiveGPUMemoryManagers() {
std::vector<GPUMemoryManager *> buffers;
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
for (auto iter : g_pushBuffers) {
buffers.push_back(iter);
}
return buffers;
}
void RegisterGPUMemoryManager(GPUMemoryManager *manager) {
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.insert(manager);
}
void UnregisterGPUMemoryManager(GPUMemoryManager *manager) {
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.erase(manager);
}

View file

@ -1,17 +0,0 @@
#pragma once
#include <vector>
// Just an abstract thing to get debug information.
class GPUMemoryManager {
public:
virtual ~GPUMemoryManager() {}
virtual void GetDebugString(char *buffer, size_t bufSize) const = 0;
virtual const char *Name() const = 0; // for sorting
};
std::vector<GPUMemoryManager *> GetActiveGPUMemoryManagers();
void RegisterGPUMemoryManager(GPUMemoryManager *manager);
void UnregisterGPUMemoryManager(GPUMemoryManager *manager);

View file

@ -2,8 +2,6 @@
#include "Common/Common.h"
// Flags and structs shared between backends that haven't found a good home.
enum class InvalidationFlags {
CACHED_RENDER_STATE = 1,
};
@ -16,22 +14,3 @@ enum class InvalidationCallbackFlags {
ENUM_CLASS_BITOPS(InvalidationCallbackFlags);
typedef std::function<void(InvalidationCallbackFlags)> InvalidationCallback;
// These are separate from FrameData because we store some history of these.
// Also, this might be joined with more non-GPU timing information later.
struct FrameTimeData {
uint64_t frameId;
int waitCount;
double frameBegin;
double afterFenceWait;
double firstSubmit;
double queuePresent;
double actualPresent;
double desiredPresentTime;
double earliestPresentTime;
double presentMargin;
};
constexpr size_t FRAME_TIME_HISTORY_LENGTH = 32;

View file

@ -55,7 +55,7 @@ bool Thin3DFormatToGLFormatAndType(DataFormat fmt, GLuint &internalFormat, GLuin
internalFormat = GL_RGB;
format = GL_RGB;
type = GL_UNSIGNED_BYTE;
alignment = 3;
alignment = 1;
break;
case DataFormat::R4G4B4A4_UNORM_PACK16:
@ -146,14 +146,12 @@ bool Thin3DFormatToGLFormatAndType(DataFormat fmt, GLuint &internalFormat, GLuin
alignment = 16;
break;
#ifdef GL_COMPRESSED_RGBA_ASTC_4x4_KHR
case DataFormat::ASTC_4x4_UNORM_BLOCK:
internalFormat = GL_COMPRESSED_RGBA_ASTC_4x4_KHR;
format = GL_RGBA;
type = GL_FLOAT;
alignment = 16;
break;
#endif
default:
return false;

View file

@ -592,9 +592,7 @@ bool CheckGLExtensions() {
for (int i = 0; i < numCompressedFormats; i++) {
switch (compressedFormats[i]) {
case GL_COMPRESSED_RGB8_ETC2: gl_extensions.supportsETC2 = true; break;
#ifdef GL_COMPRESSED_RGBA_ASTC_4x4_KHR
case GL_COMPRESSED_RGBA_ASTC_4x4_KHR: gl_extensions.supportsASTC = true; break;
#endif
#ifndef USING_GLES2
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: gl_extensions.supportsBC123 = true; break;
case GL_COMPRESSED_RGBA_BPTC_UNORM: gl_extensions.supportsBC7 = true; break;
@ -628,13 +626,11 @@ bool CheckGLExtensions() {
}
void SetGLCoreContext(bool flag) {
if (!extensionsDone) {
useCoreContext = flag;
// For convenience, it'll get reset later.
gl_extensions.IsCoreContext = useCoreContext;
} else {
_assert_(flag == useCoreContext);
}
_assert_msg_(!extensionsDone, "SetGLCoreContext() after CheckGLExtensions()");
useCoreContext = flag;
// For convenience, it'll get reset later.
gl_extensions.IsCoreContext = useCoreContext;
}
void ResetGLExtensions() {

View file

@ -18,7 +18,6 @@ enum {
GPU_VENDOR_BROADCOM = 7, // Raspberry PI etc
GPU_VENDOR_VIVANTE = 8,
GPU_VENDOR_APPLE = 9,
GPU_VENDOR_MESA = 10,
GPU_VENDOR_UNKNOWN = 0,
};

View file

@ -32,25 +32,25 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
}
pushBuffers.clear();
for (auto shader : shaders) {
if (skipGLCalls && shader)
if (skipGLCalls)
shader->shader = 0; // prevent the glDeleteShader
delete shader;
}
shaders.clear();
for (auto program : programs) {
if (skipGLCalls && program)
if (skipGLCalls)
program->program = 0; // prevent the glDeleteProgram
delete program;
}
programs.clear();
for (auto buffer : buffers) {
if (skipGLCalls && buffer)
if (skipGLCalls)
buffer->buffer_ = 0;
delete buffer;
}
buffers.clear();
for (auto texture : textures) {
if (skipGLCalls && texture)
if (skipGLCalls)
texture->texture = 0;
delete texture;
}

View file

@ -3,7 +3,6 @@
#include <mutex>
#include <condition_variable>
#include <vector>
#include <string>
#include <set>
#include "Common/GPU/OpenGL/GLCommon.h"
@ -40,8 +39,7 @@ struct GLQueueProfileContext {
bool enabled;
double cpuStartTime;
double cpuEndTime;
std::string passesString;
int commandCounts[25]; // Can't grab count from the enum as it would mean a circular include. Might clean this up later.
int drawArraysRebindsAvoided;
};
@ -49,10 +47,6 @@ struct GLQueueProfileContext {
struct GLFrameData {
bool skipSwap = false;
// Frames need unique IDs to wait for present on, let's keep them here.
// Also used for indexing into the frame timing history buffer.
uint64_t frameId;
std::mutex fenceMutex;
std::condition_variable fenceCondVar;
bool readyForFence = true;

View file

@ -1,288 +0,0 @@
#include "Common/MemoryUtil.h"
#include "Common/GPU/OpenGL/GLMemory.h"
#include "Common/GPU/OpenGL/GLRenderManager.h"
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/Data/Text/Parsers.h"
extern std::thread::id renderThreadId;
#if MAX_LOGLEVEL >= DEBUG_LEVEL
static bool OnRenderThread() {
return std::this_thread::get_id() == renderThreadId;
}
#endif
void *GLRBuffer::Map(GLBufferStrategy strategy) {
_assert_(buffer_ != 0);
GLbitfield access = GL_MAP_WRITE_BIT;
if ((strategy & GLBufferStrategy::MASK_FLUSH) != 0) {
access |= GL_MAP_FLUSH_EXPLICIT_BIT;
}
if ((strategy & GLBufferStrategy::MASK_INVALIDATE) != 0) {
access |= GL_MAP_INVALIDATE_BUFFER_BIT;
}
void *p = nullptr;
bool allowNativeBuffer = strategy != GLBufferStrategy::SUBDATA;
if (allowNativeBuffer) {
glBindBuffer(target_, buffer_);
if (gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage) {
#if !PPSSPP_PLATFORM(IOS)
if (!hasStorage_) {
GLbitfield storageFlags = access & ~(GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
#ifdef USING_GLES2
#ifdef GL_EXT_buffer_storage
glBufferStorageEXT(target_, size_, nullptr, storageFlags);
#endif
#else
glBufferStorage(target_, size_, nullptr, storageFlags);
#endif
hasStorage_ = true;
}
#endif
p = glMapBufferRange(target_, 0, size_, access);
} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
// GLES3 or desktop 3.
p = glMapBufferRange(target_, 0, size_, access);
} else if (!gl_extensions.IsGLES) {
#ifndef USING_GLES2
p = glMapBuffer(target_, GL_READ_WRITE);
#endif
}
}
mapped_ = p != nullptr;
return p;
}
bool GLRBuffer::Unmap() {
glBindBuffer(target_, buffer_);
mapped_ = false;
return glUnmapBuffer(target_) == GL_TRUE;
}
GLPushBuffer::GLPushBuffer(GLRenderManager *render, GLuint target, size_t size, const char *tag) : render_(render), size_(size), target_(target), tag_(tag) {
bool res = AddBuffer();
_assert_(res);
RegisterGPUMemoryManager(this);
}
GLPushBuffer::~GLPushBuffer() {
UnregisterGPUMemoryManager(this);
Destroy(true);
}
void GLPushBuffer::Map() {
_assert_(!writePtr_);
auto &info = buffers_[buf_];
writePtr_ = info.deviceMemory ? info.deviceMemory : info.localMemory;
info.flushOffset = 0;
// Force alignment. This is needed for PushAligned() to work as expected.
while ((intptr_t)writePtr_ & 15) {
writePtr_++;
offset_++;
info.flushOffset++;
}
_assert_(writePtr_);
}
void GLPushBuffer::Unmap() {
_assert_(writePtr_);
if (!buffers_[buf_].deviceMemory) {
// Here we simply upload the data to the last buffer.
// Might be worth trying with size_ instead of offset_, so the driver can replace
// the whole buffer. At least if it's close.
render_->BufferSubdata(buffers_[buf_].buffer, 0, offset_, buffers_[buf_].localMemory, false);
} else {
buffers_[buf_].flushOffset = offset_;
}
writePtr_ = nullptr;
}
void GLPushBuffer::Flush() {
// Must be called from the render thread.
_dbg_assert_(OnRenderThread());
if (buf_ >= buffers_.size()) {
_dbg_assert_msg_(false, "buf_ somehow got out of sync: %d vs %d", (int)buf_, (int)buffers_.size());
return;
}
buffers_[buf_].flushOffset = offset_;
if (!buffers_[buf_].deviceMemory && writePtr_) {
auto &info = buffers_[buf_];
if (info.flushOffset != 0) {
_assert_(info.buffer->buffer_);
glBindBuffer(target_, info.buffer->buffer_);
glBufferSubData(target_, 0, info.flushOffset, info.localMemory);
}
// Here we will submit all the draw calls, with the already known buffer and offsets.
// Might as well reset the write pointer here and start over the current buffer.
writePtr_ = info.localMemory;
offset_ = 0;
info.flushOffset = 0;
}
// For device memory, we flush all buffers here.
if ((strategy_ & GLBufferStrategy::MASK_FLUSH) != 0) {
for (auto &info : buffers_) {
if (info.flushOffset == 0 || !info.deviceMemory)
continue;
glBindBuffer(target_, info.buffer->buffer_);
glFlushMappedBufferRange(target_, 0, info.flushOffset);
info.flushOffset = 0;
}
}
}
bool GLPushBuffer::AddBuffer() {
// INFO_LOG(G3D, "GLPushBuffer(%s): Allocating %d bytes", tag_, size_);
BufInfo info;
info.localMemory = (uint8_t *)AllocateAlignedMemory(size_, 16);
if (!info.localMemory)
return false;
info.buffer = render_->CreateBuffer(target_, size_, GL_DYNAMIC_DRAW);
info.size = size_;
buf_ = buffers_.size();
buffers_.push_back(info);
return true;
}
void GLPushBuffer::Destroy(bool onRenderThread) {
if (buf_ == -1)
return; // Already destroyed
for (BufInfo &info : buffers_) {
// This will automatically unmap device memory, if needed.
// NOTE: We immediately delete the buffer, don't go through the deleter, if we're on the render thread.
if (onRenderThread) {
delete info.buffer;
} else {
render_->DeleteBuffer(info.buffer);
}
FreeAlignedMemory(info.localMemory);
}
buffers_.clear();
buf_ = -1;
}
void GLPushBuffer::NextBuffer(size_t minSize) {
// First, unmap the current memory.
Unmap();
buf_++;
if (buf_ >= buffers_.size() || minSize > size_) {
// Before creating the buffer, adjust to the new size_ if necessary.
while (size_ < minSize) {
size_ <<= 1;
}
bool res = AddBuffer();
_assert_(res);
if (!res) {
// Let's try not to crash at least?
buf_ = 0;
}
}
// Now, move to the next buffer and map it.
offset_ = 0;
Map();
}
void GLPushBuffer::Defragment() {
_dbg_assert_msg_(!OnRenderThread(), "Defragment must not run on the render thread");
if (buffers_.size() <= 1) {
// Let's take this opportunity to jettison any localMemory we don't need.
for (auto &info : buffers_) {
if (info.deviceMemory) {
FreeAlignedMemory(info.localMemory);
info.localMemory = nullptr;
}
}
return;
}
// Okay, we have more than one. Destroy them all and start over with a larger one.
// When calling AddBuffer, we sometimes increase size_. So if we allocated multiple buffers in a frame,
// they won't all have the same size. Sum things up properly.
size_t newSize = 0;
for (int i = 0; i < (int)buffers_.size(); i++) {
newSize += buffers_[i].size;
}
Destroy(false);
// Set some sane but very free limits. If there's another spike, we'll just allocate more anyway.
size_ = std::min(std::max(newSize, (size_t)65536), (size_t)(512 * 1024 * 1024));
bool res = AddBuffer();
_assert_msg_(res, "AddBuffer failed");
}
size_t GLPushBuffer::GetTotalSize() const {
size_t sum = 0;
// When calling AddBuffer, we sometimes increase size_. So if we allocated multiple buffers in a frame,
// they won't all have the same size. Sum things up properly.
if (buffers_.size() > 1) {
for (int i = 0; i < (int)buffers_.size() - 1; i++) {
sum += buffers_[i].size;
}
}
sum += offset_;
return sum;
}
void GLPushBuffer::MapDevice(GLBufferStrategy strategy) {
_dbg_assert_msg_(OnRenderThread(), "MapDevice must run on render thread");
strategy_ = strategy;
if (strategy_ == GLBufferStrategy::SUBDATA) {
return;
}
bool mapChanged = false;
for (auto &info : buffers_) {
if (!info.buffer->buffer_ || info.deviceMemory) {
// Can't map - no device buffer associated yet or already mapped.
continue;
}
info.deviceMemory = (uint8_t *)info.buffer->Map(strategy_);
mapChanged = mapChanged || info.deviceMemory != nullptr;
if (!info.deviceMemory && !info.localMemory) {
// Somehow it failed, let's dodge crashing.
info.localMemory = (uint8_t *)AllocateAlignedMemory(info.buffer->size_, 16);
mapChanged = true;
}
_dbg_assert_msg_(info.localMemory || info.deviceMemory, "Local or device memory must succeed");
}
if (writePtr_ && mapChanged) {
// This can happen during a sync. Remap.
writePtr_ = nullptr;
Map();
}
}
void GLPushBuffer::UnmapDevice() {
_dbg_assert_msg_(OnRenderThread(), "UnmapDevice must run on render thread");
for (auto &info : buffers_) {
if (info.deviceMemory) {
// TODO: Technically this can return false?
info.buffer->Unmap();
info.deviceMemory = nullptr;
}
}
}
void GLPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
snprintf(buffer, bufSize, "%s: %s/%s (%d)", tag_, NiceSizeFormat(this->offset_).c_str(), NiceSizeFormat(this->size_).c_str(), (int)buffers_.size());
}

View file

@ -1,185 +0,0 @@
#pragma once
#include <vector>
#include <cstdint>
#include <cstring>
#include "Common/GPU/GPUBackendCommon.h"
#include "Common/GPU/OpenGL/GLCommon.h"
#include "Common/Log.h"
enum class GLBufferStrategy {
SUBDATA = 0,
MASK_FLUSH = 0x10,
MASK_INVALIDATE = 0x20,
// Map/unmap the buffer each frame.
FRAME_UNMAP = 1,
// Map/unmap and also invalidate the buffer on map.
INVALIDATE_UNMAP = MASK_INVALIDATE,
// Map/unmap and explicitly flushed changed ranges.
FLUSH_UNMAP = MASK_FLUSH,
// Map/unmap, invalidate on map, and explicit flush.
FLUSH_INVALIDATE_UNMAP = MASK_FLUSH | MASK_INVALIDATE,
};
static inline int operator &(const GLBufferStrategy &lhs, const GLBufferStrategy &rhs) {
return (int)lhs & (int)rhs;
}
class GLRBuffer {
public:
GLRBuffer(GLuint target, size_t size) : target_(target), size_((int)size) {}
~GLRBuffer() {
if (buffer_) {
glDeleteBuffers(1, &buffer_);
}
}
void *Map(GLBufferStrategy strategy);
bool Unmap();
bool Mapped() const {
return mapped_;
}
GLuint buffer_ = 0;
GLuint target_;
int size_;
private:
bool mapped_ = false;
bool hasStorage_ = false;
};
class GLRenderManager;
// Similar to VulkanPushBuffer but is currently less efficient - it collects all the data in
// RAM then does a big memcpy/buffer upload at the end of the frame. This is at least a lot
// faster than the hundreds of buffer uploads or memory array buffers we used before.
// On modern GL we could avoid the copy using glBufferStorage but not sure it's worth the
// trouble.
// We need to manage the lifetime of this together with the other resources so its destructor
// runs on the render thread.
class GLPushBuffer : public GPUMemoryManager {
public:
friend class GLRenderManager;
struct BufInfo {
GLRBuffer *buffer = nullptr;
uint8_t *localMemory = nullptr;
uint8_t *deviceMemory = nullptr;
size_t flushOffset = 0;
size_t size;
};
GLPushBuffer(GLRenderManager *render, GLuint target, size_t size, const char *tag);
~GLPushBuffer();
void Reset() { offset_ = 0; }
void GetDebugString(char *buffer, size_t bufSize) const override;
const char *Name() const override { return tag_; }; // for sorting
// Utility for users of this class, not used internally.
enum { INVALID_OFFSET = 0xFFFFFFFF };
private:
// Needs context in case of defragment.
void Begin() {
buf_ = 0;
offset_ = 0;
// Note: we must defrag because some buffers may be smaller than size_.
Defragment();
Map();
_dbg_assert_(writePtr_);
}
void BeginNoReset() {
Map();
}
void End() {
Unmap();
}
public:
void Map();
void Unmap();
bool IsReady() const {
return writePtr_ != nullptr;
}
// Recommended - lets you write directly into the buffer through the returned pointer.
// If you didn't end up using all the memory you grabbed here, then before calling Allocate or Push
// again, call Rewind (see below).
uint8_t *Allocate(uint32_t numBytes, uint32_t alignment, GLRBuffer **buf, uint32_t *bindOffset) {
uint32_t offset = ((uint32_t)offset_ + alignment - 1) & ~(alignment - 1);
if (offset + numBytes <= size_) {
// Common path.
offset_ = offset + numBytes;
*buf = buffers_[buf_].buffer;
*bindOffset = offset;
return writePtr_ + offset;
}
NextBuffer(numBytes);
*bindOffset = 0;
*buf = buffers_[buf_].buffer;
// Need to mark the allocated range used in the new buffer. How did things work before this?
offset_ = numBytes;
return writePtr_;
}
// For convenience if all you'll do is to copy.
uint32_t Push(const void *data, uint32_t numBytes, int alignment, GLRBuffer **buf) {
uint32_t bindOffset;
uint8_t *ptr = Allocate(numBytes, alignment, buf, &bindOffset);
memcpy(ptr, data, numBytes);
return bindOffset;
}
uint8_t *GetPtr(uint32_t offset) {
return writePtr_ + offset;
}
// If you didn't use all of the previous allocation you just made (obviously can't be another one),
// you can return memory to the buffer by specifying the offset up until which you wrote data.
// Pass in the buffer you got last time. If that buffer has been filled already, no rewind can be safely done.
// (well technically would be possible but not worth the trouble).
void Rewind(GLRBuffer *buffer, uint32_t offset) {
if (buffer == buffers_[buf_].buffer) {
_dbg_assert_(offset != INVALID_OFFSET);
_dbg_assert_(offset <= offset_);
offset_ = offset;
}
}
size_t GetOffset() const { return offset_; }
size_t GetTotalSize() const;
void Destroy(bool onRenderThread);
void Flush();
protected:
void MapDevice(GLBufferStrategy strategy);
void UnmapDevice();
private:
bool AddBuffer();
void NextBuffer(size_t minSize);
void Defragment();
GLRenderManager *render_;
std::vector<BufInfo> buffers_;
size_t buf_ = 0;
size_t offset_ = 0;
size_t size_ = 0;
uint8_t *writePtr_ = nullptr;
GLuint target_;
GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA;
const char *tag_;
};

View file

@ -73,9 +73,7 @@ void GLQueueRunner::CreateDeviceObjects() {
populate(GL_SHADING_LANGUAGE_VERSION);
CHECK_GL_ERROR_IF_DEBUG();
#if !PPSSPP_ARCH(X86) // Doesn't work on AMD for some reason. See issue #17787
useDebugGroups_ = !gl_extensions.IsGLES && gl_extensions.VersionGEThan(4, 3);
#endif
}
void GLQueueRunner::DestroyDeviceObjects() {
@ -120,7 +118,7 @@ static std::string GetStereoBufferLayout(const char *uniformName) {
else return "undefined";
}
void GLQueueRunner::RunInitSteps(const FastVec<GLRInitStep> &steps, bool skipGLCalls) {
void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps, bool skipGLCalls) {
if (skipGLCalls) {
// Some bookkeeping still needs to be done.
for (size_t i = 0; i < steps.size(); i++) {
@ -334,10 +332,10 @@ void GLQueueRunner::RunInitSteps(const FastVec<GLRInitStep> &steps, bool skipGLC
step.create_shader.shader->desc.c_str(),
infoLog.c_str(),
LineNumberString(code).c_str());
std::vector<std::string_view> lines;
std::vector<std::string> lines;
SplitString(errorString, '\n', lines);
for (auto line : lines) {
ERROR_LOG(G3D, "%.*s", (int)line.size(), line.data());
for (auto &line : lines) {
ERROR_LOG(G3D, "%s", line.c_str());
}
if (errorCallback_) {
std::string desc = StringFromFormat("Shader compilation failed: %s", step.create_shader.stage == GL_VERTEX_SHADER ? "vertex" : "fragment");
@ -653,7 +651,7 @@ retry_depth:
currentReadHandle_ = fbo->handle;
}
void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, GLFrameData &frameData, bool skipGLCalls, bool keepSteps, bool useVR) {
void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile) {
if (skipGLCalls) {
if (keepSteps) {
return;
@ -702,7 +700,7 @@ void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, GLFrameData &f
CHECK_GL_ERROR_IF_DEBUG();
size_t renderCount = 0;
for (size_t i = 0; i < steps.size(); i++) {
GLRStep &step = *steps[i];
const GLRStep &step = *steps[i];
#if !defined(USING_GLES2)
if (useDebugGroups_)
@ -713,10 +711,11 @@ void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, GLFrameData &f
case GLRStepType::RENDER:
renderCount++;
if (IsVREnabled()) {
PreprocessStepVR(&step);
PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount, frameData.profile);
GLRStep vrStep = step;
PreprocessStepVR(&vrStep);
PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount, profile);
} else {
PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount, frameData.profile);
PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount, profile);
}
break;
case GLRStepType::COPY:
@ -742,14 +741,11 @@ void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, GLFrameData &f
if (useDebugGroups_)
glPopDebugGroup();
#endif
if (frameData.profile.enabled) {
frameData.profile.passesString += StepToString(step);
}
if (!keepSteps) {
delete steps[i];
}
}
CHECK_GL_ERROR_IF_DEBUG();
}
@ -838,45 +834,19 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
bool logicEnabled = false;
#endif
bool clipDistanceEnabled[8]{};
bool lastDrawIsArray = false;
int lastBindOffset = 0;
GLRInputLayout *lastDrawLayout = nullptr;
GLuint blendEqColor = (GLuint)-1;
GLuint blendEqAlpha = (GLuint)-1;
GLenum blendSrcColor = (GLenum)-1;
GLenum blendDstColor = (GLenum)-1;
GLenum blendSrcAlpha = (GLenum)-1;
GLenum blendDstAlpha = (GLenum)-1;
GLuint stencilWriteMask = (GLuint)-1;
GLenum stencilFunc = (GLenum)-1;
GLuint stencilRef = (GLuint)-1;
GLuint stencilCompareMask = (GLuint)-1;
GLenum stencilSFail = (GLenum)-1;
GLenum stencilZFail = (GLenum)-1;
GLenum stencilPass = (GLenum)-1;
GLenum frontFace = (GLenum)-1;
GLenum cullFace = (GLenum)-1;
GLRTexture *curTex[MAX_GL_TEXTURE_SLOTS]{};
GLRViewport viewport = {
-1000000000.0f,
-1000000000.0f,
-1000000000.0f,
-1000000000.0f,
-1000000000.0f,
-1000000000.0f,
};
GLRect2D scissorRc = { -1, -1, -1, -1 };
CHECK_GL_ERROR_IF_DEBUG();
auto &commands = step.commands;
for (const auto &c : commands) {
#ifdef _DEBUG
if (profile.enabled) {
if ((size_t)c.cmd < ARRAY_SIZE(profile.commandCounts)) {
profile.commandCounts[(size_t)c.cmd]++;
}
}
#endif
switch (c.cmd) {
case GLRRenderCommand::DEPTH:
if (c.depth.enabled) {
@ -897,34 +867,23 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
depthEnabled = false;
}
break;
case GLRRenderCommand::STENCIL:
if (c.stencil.enabled) {
case GLRRenderCommand::STENCILFUNC:
if (c.stencilFunc.enabled) {
if (!stencilEnabled) {
glEnable(GL_STENCIL_TEST);
stencilEnabled = true;
}
if (c.stencil.func != stencilFunc || c.stencil.ref != stencilRef || c.stencil.compareMask != stencilCompareMask) {
glStencilFunc(c.stencil.func, c.stencil.ref, c.stencil.compareMask);
stencilFunc = c.stencil.func;
stencilRef = c.stencil.ref;
stencilCompareMask = c.stencil.compareMask;
}
if (c.stencil.sFail != stencilSFail || c.stencil.zFail != stencilZFail || c.stencil.pass != stencilPass) {
glStencilOp(c.stencil.sFail, c.stencil.zFail, c.stencil.pass);
stencilSFail = c.stencil.sFail;
stencilZFail = c.stencil.zFail;
stencilPass = c.stencil.pass;
}
if (c.stencil.writeMask != stencilWriteMask) {
glStencilMask(c.stencil.writeMask);
stencilWriteMask = c.stencil.writeMask;
}
glStencilFunc(c.stencilFunc.func, c.stencilFunc.ref, c.stencilFunc.compareMask);
} else if (/* !c.stencilFunc.enabled && */stencilEnabled) {
glDisable(GL_STENCIL_TEST);
stencilEnabled = false;
}
CHECK_GL_ERROR_IF_DEBUG();
break;
case GLRRenderCommand::STENCILOP:
glStencilOp(c.stencilOp.sFail, c.stencilOp.zFail, c.stencilOp.pass);
glStencilMask(c.stencilOp.writeMask);
break;
case GLRRenderCommand::BLEND:
if (c.blend.enabled) {
if (!blendEnabled) {
@ -936,13 +895,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
blendEqColor = c.blend.funcColor;
blendEqAlpha = c.blend.funcAlpha;
}
if (blendSrcColor != c.blend.srcColor || blendDstColor != c.blend.dstColor || blendSrcAlpha != c.blend.srcAlpha || blendDstAlpha != c.blend.dstAlpha) {
glBlendFuncSeparate(c.blend.srcColor, c.blend.dstColor, c.blend.srcAlpha, c.blend.dstAlpha);
blendSrcColor = c.blend.srcColor;
blendDstColor = c.blend.dstColor;
blendSrcAlpha = c.blend.srcAlpha;
blendDstAlpha = c.blend.dstAlpha;
}
glBlendFuncSeparate(c.blend.srcColor, c.blend.dstColor, c.blend.srcAlpha, c.blend.dstAlpha);
} else if (/* !c.blend.enabled && */ blendEnabled) {
glDisable(GL_BLEND);
blendEnabled = false;
@ -1020,27 +973,16 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
y = curFBHeight_ - y - c.viewport.vp.h;
// TODO: Support FP viewports through glViewportArrays
if (viewport.x != c.viewport.vp.x || viewport.y != y || viewport.w != c.viewport.vp.w || viewport.h != c.viewport.vp.h) {
glViewport((GLint)c.viewport.vp.x, (GLint)y, (GLsizei)c.viewport.vp.w, (GLsizei)c.viewport.vp.h);
viewport.x = c.viewport.vp.x;
viewport.y = y;
viewport.w = c.viewport.vp.w;
viewport.h = c.viewport.vp.h;
}
if (viewport.minZ != c.viewport.vp.minZ || viewport.maxZ != c.viewport.vp.maxZ) {
viewport.minZ = c.viewport.vp.minZ;
viewport.maxZ = c.viewport.vp.maxZ;
glViewport((GLint)c.viewport.vp.x, (GLint)y, (GLsizei)c.viewport.vp.w, (GLsizei)c.viewport.vp.h);
#if !defined(USING_GLES2)
if (gl_extensions.IsGLES) {
glDepthRangef(c.viewport.vp.minZ, c.viewport.vp.maxZ);
} else {
glDepthRange(c.viewport.vp.minZ, c.viewport.vp.maxZ);
}
#else
if (gl_extensions.IsGLES) {
glDepthRangef(c.viewport.vp.minZ, c.viewport.vp.maxZ);
#endif
} else {
glDepthRange(c.viewport.vp.minZ, c.viewport.vp.maxZ);
}
#else
glDepthRangef(c.viewport.vp.minZ, c.viewport.vp.maxZ);
#endif
CHECK_GL_ERROR_IF_DEBUG();
break;
}
@ -1049,13 +991,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
int y = c.scissor.rc.y;
if (!curFB_)
y = curFBHeight_ - y - c.scissor.rc.h;
if (scissorRc.x != c.scissor.rc.x || scissorRc.y != y || scissorRc.w != c.scissor.rc.w || scissorRc.h != c.scissor.rc.h) {
glScissor(c.scissor.rc.x, y, c.scissor.rc.w, c.scissor.rc.h);
scissorRc.x = c.scissor.rc.x;
scissorRc.y = y;
scissorRc.w = c.scissor.rc.w;
scissorRc.h = c.scissor.rc.h;
}
glScissor(c.scissor.rc.x, y, c.scissor.rc.w, c.scissor.rc.h);
CHECK_GL_ERROR_IF_DEBUG();
break;
}
@ -1238,36 +1174,63 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
}
case GLRRenderCommand::DRAW:
{
// TODO: Add fast path for glBindVertexBuffer
GLRInputLayout *layout = c.draw.inputLayout;
GLuint buf = c.draw.vertexBuffer->buffer_;
_dbg_assert_(!c.draw.vertexBuffer->Mapped());
GLuint buf = c.draw.buffer ? c.draw.buffer->buffer_ : 0;
_dbg_assert_(!c.draw.buffer || !c.draw.buffer->Mapped());
if (buf != curArrayBuffer) {
glBindBuffer(GL_ARRAY_BUFFER, buf);
curArrayBuffer = buf;
// Invalidate any draw offset caching.
lastDrawLayout = nullptr;
}
if (attrMask != layout->semanticsMask_) {
EnableDisableVertexArrays(attrMask, layout->semanticsMask_);
attrMask = layout->semanticsMask_;
}
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, layout->stride, (const void *)(c.draw.vertexOffset + entry.offset));
}
if (c.draw.indexBuffer) {
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, layout->stride, (const void *)(c.draw.offset + entry.offset));
}
GLuint buf = c.draw.indexBuffer->buffer_;
_dbg_assert_(!c.draw.indexBuffer->Mapped());
_dbg_assert_(!(c.draw.indexBuffer && c.draw.indexBuffer->Mapped()));
if (buf != curElemArrayBuffer) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf);
curElemArrayBuffer = buf;
}
if (c.draw.instances == 1) {
glDrawElements(c.draw.mode, c.draw.count, c.draw.indexType, (void *)(intptr_t)c.draw.indexOffset);
glDrawElements(c.draw.mode, c.draw.count, c.draw.indexType, c.draw.indices);
} else {
glDrawElementsInstanced(c.draw.mode, c.draw.count, c.draw.indexType, (void *)(intptr_t)c.draw.indexOffset, c.draw.instances);
glDrawElementsInstanced(c.draw.mode, c.draw.count, c.draw.indexType, c.draw.indices, c.draw.instances);
}
lastDrawIsArray = false;
} else {
glDrawArrays(c.draw.mode, c.draw.first, c.draw.count);
// See if we can avoid calling glVertexAttribPointer.
int offset = 0;
bool rebind = true;
if (lastDrawIsArray && layout == lastDrawLayout) {
unsigned int diff = (unsigned int)c.draw.offset - (unsigned int)lastBindOffset;
if (diff % layout->stride == 0) {
// Compatible draws.
offset = diff / layout->stride;
rebind = false;
profile.drawArraysRebindsAvoided++;
}
}
if (rebind) {
// Rebind.
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, layout->stride, (const void *)(c.draw.offset + entry.offset));
}
lastBindOffset = (int)c.draw.offset;
}
glDrawArrays(c.draw.mode, c.draw.first + offset, c.draw.count);
lastDrawIsArray = true;
}
lastDrawLayout = layout;
CHECK_GL_ERROR_IF_DEBUG();
break;
}
@ -1378,14 +1341,8 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
glEnable(GL_CULL_FACE);
cullEnabled = true;
}
if (frontFace != c.raster.frontFace) {
glFrontFace(c.raster.frontFace);
frontFace = c.raster.frontFace;
}
if (cullFace != c.raster.cullFace) {
glCullFace(c.raster.cullFace);
cullFace = c.raster.cullFace;
}
glFrontFace(c.raster.frontFace);
glCullFace(c.raster.cullFace);
} else if (/* !c.raster.cullEnable && */ cullEnabled) {
glDisable(GL_CULL_FACE);
cullEnabled = false;
@ -1853,74 +1810,3 @@ GLRFramebuffer::~GLRFramebuffer() {
glDeleteRenderbuffers(1, &stencil_buffer);
CHECK_GL_ERROR_IF_DEBUG();
}
std::string GLQueueRunner::StepToString(const GLRStep &step) const {
char buffer[256];
switch (step.stepType) {
case GLRStepType::RENDER:
{
int w = step.render.framebuffer ? step.render.framebuffer->width : targetWidth_;
int h = step.render.framebuffer ? step.render.framebuffer->height : targetHeight_;
snprintf(buffer, sizeof(buffer), "RENDER %s %s (commands: %d, %dx%d)\n", step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", (int)step.commands.size(), w, h);
break;
}
case GLRStepType::COPY:
snprintf(buffer, sizeof(buffer), "COPY '%s' %s -> %s (%dx%d, %s)\n", step.tag, step.copy.src->Tag(), step.copy.dst->Tag(), step.copy.srcRect.w, step.copy.srcRect.h, GLRAspectToString((GLRAspect)step.copy.aspectMask));
break;
case GLRStepType::BLIT:
snprintf(buffer, sizeof(buffer), "BLIT '%s' %s -> %s (%dx%d->%dx%d, %s)\n", step.tag, step.copy.src->Tag(), step.copy.dst->Tag(), step.blit.srcRect.w, step.blit.srcRect.h, step.blit.dstRect.w, step.blit.dstRect.h, GLRAspectToString((GLRAspect)step.blit.aspectMask));
break;
case GLRStepType::READBACK:
snprintf(buffer, sizeof(buffer), "READBACK '%s' %s (%dx%d, %s)\n", step.tag, step.readback.src ? step.readback.src->Tag() : "(backbuffer)", step.readback.srcRect.w, step.readback.srcRect.h, GLRAspectToString((GLRAspect)step.readback.aspectMask));
break;
case GLRStepType::READBACK_IMAGE:
snprintf(buffer, sizeof(buffer), "READBACK_IMAGE '%s' (%dx%d)\n", step.tag, step.readback_image.srcRect.w, step.readback_image.srcRect.h);
break;
case GLRStepType::RENDER_SKIP:
snprintf(buffer, sizeof(buffer), "(RENDER_SKIP) %s\n", step.tag);
break;
default:
buffer[0] = 0;
break;
}
return std::string(buffer);
}
const char *GLRAspectToString(GLRAspect aspect) {
switch (aspect) {
case GLR_ASPECT_COLOR: return "COLOR";
case GLR_ASPECT_DEPTH: return "DEPTH";
case GLR_ASPECT_STENCIL: return "STENCIL";
default: return "N/A";
}
}
const char *RenderCommandToString(GLRRenderCommand cmd) {
switch (cmd) {
case GLRRenderCommand::DEPTH: return "DEPTH";
case GLRRenderCommand::STENCIL: return "STENCIL";
case GLRRenderCommand::BLEND: return "BLEND";
case GLRRenderCommand::BLENDCOLOR: return "BLENDCOLOR";
case GLRRenderCommand::LOGICOP: return "LOGICOP";
case GLRRenderCommand::UNIFORM4I: return "UNIFORM4I";
case GLRRenderCommand::UNIFORM4UI: return "UNIFORM4UI";
case GLRRenderCommand::UNIFORM4F: return "UNIFORM4F";
case GLRRenderCommand::UNIFORMMATRIX: return "UNIFORMMATRIX";
case GLRRenderCommand::UNIFORMSTEREOMATRIX: return "UNIFORMSTEREOMATRIX";
case GLRRenderCommand::TEXTURESAMPLER: return "TEXTURESAMPLER";
case GLRRenderCommand::TEXTURELOD: return "TEXTURELOD";
case GLRRenderCommand::VIEWPORT: return "VIEWPORT";
case GLRRenderCommand::SCISSOR: return "SCISSOR";
case GLRRenderCommand::RASTER: return "RASTER";
case GLRRenderCommand::CLEAR: return "CLEAR";
case GLRRenderCommand::INVALIDATE: return "INVALIDATE";
case GLRRenderCommand::BINDPROGRAM: return "BINDPROGRAM";
case GLRRenderCommand::BINDTEXTURE: return "BINDTEXTURE";
case GLRRenderCommand::BIND_FB_TEXTURE: return "BIND_FB_TEXTURE";
case GLRRenderCommand::BIND_VERTEX_BUFFER: return "BIND_VERTEX_BUFFER";
case GLRRenderCommand::GENMIPS: return "GENMIPS";
case GLRRenderCommand::DRAW: return "DRAW";
case GLRRenderCommand::TEXTURE_SUBIMAGE: return "TEXTURE_SUBIMAGE";
default: return "N/A";
}
}

View file

@ -11,7 +11,7 @@
#include "Common/GPU/Shader.h"
#include "Common/GPU/thin3d.h"
#include "Common/Data/Collections/TinySet.h"
#include "Common/Data/Collections/FastVec.h"
struct GLRViewport {
float x, y, w, h, minZ, maxZ;
@ -40,7 +40,8 @@ class GLRInputLayout;
enum class GLRRenderCommand : uint8_t {
DEPTH,
STENCIL,
STENCILFUNC,
STENCILOP,
BLEND,
BLENDCOLOR,
LOGICOP,
@ -69,7 +70,6 @@ enum class GLRRenderCommand : uint8_t {
// type field, smashed right after each other?)
// Also, all GLenums are really only 16 bits.
struct GLRRenderData {
GLRRenderData(GLRRenderCommand _cmd) : cmd(_cmd) {}
GLRRenderCommand cmd;
union {
struct {
@ -99,21 +99,23 @@ struct GLRRenderData {
GLenum func;
uint8_t ref;
uint8_t compareMask;
} stencilFunc;
struct {
GLenum sFail;
GLenum zFail;
GLenum pass;
uint8_t writeMask;
} stencil;
} stencilOp; // also write mask
struct {
GLRInputLayout *inputLayout;
GLRBuffer *vertexBuffer;
GLRBuffer *buffer;
size_t offset;
GLRBuffer *indexBuffer;
uint32_t vertexOffset;
uint32_t indexOffset;
GLenum mode; // primitive
GLint first;
GLint count;
GLint indexType;
void *indices;
GLint instances;
} draw;
struct {
@ -290,17 +292,16 @@ enum class GLRRenderPassAction {
class GLRFramebuffer;
enum GLRAspect {
enum {
GLR_ASPECT_COLOR = 1,
GLR_ASPECT_DEPTH = 2,
GLR_ASPECT_STENCIL = 3,
};
const char *GLRAspectToString(GLRAspect aspect);
struct GLRStep {
GLRStep(GLRStepType _type) : stepType(_type) {}
GLRStepType stepType;
FastVec<GLRRenderData> commands;
std::vector<GLRRenderData> commands;
TinySet<const GLRFramebuffer *, 8> dependencies;
const char *tag;
union {
@ -309,6 +310,8 @@ struct GLRStep {
GLRRenderPassAction color;
GLRRenderPassAction depth;
GLRRenderPassAction stencil;
// Note: not accurate.
int numDraws;
} render;
struct {
GLRFramebuffer *src;
@ -352,9 +355,9 @@ public:
caps_ = caps;
}
void RunInitSteps(const FastVec<GLRInitStep> &steps, bool skipGLCalls);
void RunInitSteps(const std::vector<GLRInitStep> &steps, bool skipGLCalls);
void RunSteps(const std::vector<GLRStep *> &steps, GLFrameData &frameData, bool skipGLCalls, bool keepSteps, bool useVR);
void RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile);
void CreateDeviceObjects();
void DestroyDeviceObjects();
@ -390,8 +393,6 @@ private:
GLenum fbo_get_fb_target(bool read, GLuint **cached);
void fbo_unbind();
std::string StepToString(const GLRStep &step) const;
GLRFramebuffer *curFB_ = nullptr;
GLuint globalVAO_ = 0;
@ -423,5 +424,3 @@ private:
ErrorCallbackFn errorCallback_ = nullptr;
void *errorCallbackUserData_ = nullptr;
};
const char *RenderCommandToString(GLRRenderCommand cmd);

View file

@ -17,7 +17,12 @@
#define VLOG(...)
#endif
std::thread::id renderThreadId;
static std::thread::id renderThreadId;
#if MAX_LOGLEVEL >= DEBUG_LEVEL
static bool OnRenderThread() {
return std::this_thread::get_id() == renderThreadId;
}
#endif
GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {
if (caps.textureNPOTFullySupported) {
@ -37,7 +42,7 @@ GLRTexture::~GLRTexture() {
}
}
GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {
GLRenderManager::GLRenderManager() {
// size_t sz = sizeof(GLRRenderData);
// _dbg_assert_(sz == 88);
}
@ -129,25 +134,25 @@ bool GLRenderManager::ThreadFrame() {
return false;
}
GLRRenderThreadTask *task = nullptr;
GLRRenderThreadTask task;
// In case of syncs or other partial completion, we keep going until we complete a frame.
while (true) {
// Pop a task of the queue and execute it.
// NOTE: We need to actually wait for a task, we can't just bail!
{
std::unique_lock<std::mutex> lock(pushMutex_);
while (renderThreadQueue_.empty()) {
pushCondVar_.wait(lock);
}
task = std::move(renderThreadQueue_.front());
task = renderThreadQueue_.front();
renderThreadQueue_.pop();
}
// We got a task! We can now have pushMutex_ unlocked, allowing the host to
// push more work when it feels like it, and just start working.
if (task->runType == GLRRunType::EXIT) {
delete task;
if (task.runType == GLRRunType::EXIT) {
// Oh, host wanted out. Let's leave, and also let's notify the host.
// This is unlike Vulkan too which can just block on the thread existing.
std::unique_lock<std::mutex> lock(syncMutex_);
@ -157,13 +162,11 @@ bool GLRenderManager::ThreadFrame() {
}
// Render the scene.
VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());
if (Run(*task)) {
VLOG(" PULL: Frame %d RUN (%0.3f)", task.frame, time_now_d());
if (Run(task)) {
// Swap requested, so we just bail the loop.
delete task;
break;
}
delete task;
};
return true;
@ -176,7 +179,9 @@ void GLRenderManager::StopThread() {
run_ = false;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT));
GLRRenderThreadTask exitTask{};
exitTask.runType = GLRRunType::EXIT;
renderThreadQueue_.push(exitTask);
pushCondVar_.notify_one();
} else {
WARN_LOG(G3D, "GL submission thread was already paused.");
@ -184,11 +189,15 @@ void GLRenderManager::StopThread() {
}
std::string GLRenderManager::GetGpuProfileString() const {
int curFrame = curFrame_;
int curFrame = GetCurFrame();
const GLQueueProfileContext &profile = frameData_[curFrame].profile;
float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);
return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());
return StringFromFormat(
"CPU time to run the list: %0.2f ms\n"
"Avoided DrawArrays rebinds: %d",
cputime_ms,
profile.drawArraysRebindsAvoided);
}
void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
@ -215,11 +224,13 @@ void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRende
step->render.color = color;
step->render.depth = depth;
step->render.stencil = stencil;
step->render.numDraws = 0;
step->tag = tag;
steps_.push_back(step);
GLuint clearMask = 0;
GLRRenderData data(GLRRenderCommand::CLEAR);
GLRRenderData data;
data.cmd = GLRRenderCommand::CLEAR;
if (color == GLRRenderPassAction::CLEAR) {
clearMask |= GL_COLOR_BUFFER_BIT;
data.clear.clearColor = clearColor;
@ -350,18 +361,11 @@ void GLRenderManager::BeginFrame(bool enableProfiling) {
int curFrame = GetCurFrame();
FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);
frameTimeData.frameBegin = time_now_d();
frameTimeData.afterFenceWait = frameTimeData.frameBegin;
GLFrameData &frameData = frameData_[curFrame];
frameData.frameId = frameIdGen_;
frameData.profile.enabled = enableProfiling;
frameIdGen_++;
{
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
while (!frameData.readyForFence) {
frameData.fenceCondVar.wait(lock);
}
@ -378,100 +382,37 @@ void GLRenderManager::BeginFrame(bool enableProfiling) {
void GLRenderManager::Finish() {
curRenderStep_ = nullptr; // EndCurRenderStep is this simple here.
int curFrame = curFrame_;
int curFrame = GetCurFrame();
GLFrameData &frameData = frameData_[curFrame];
frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();
frameData_[curFrame].deleter.Take(deleter_);
if (frameData.profile.enabled) {
profilePassesString_ = std::move(frameData.profile.passesString);
#ifdef _DEBUG
std::string cmdString;
for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {
if (frameData.profile.commandCounts[i] > 0) {
cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);
}
}
memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));
profilePassesString_ = cmdString + profilePassesString_;
#endif
frameData.profile.passesString.clear();
}
VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);
task->frame = curFrame;
GLRRenderThreadTask task;
task.frame = curFrame;
task.runType = GLRRunType::PRESENT;
{
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back()->initSteps = std::move(initSteps_);
renderThreadQueue_.back()->steps = std::move(steps_);
renderThreadQueue_.back().initSteps = std::move(initSteps_);
renderThreadQueue_.back().steps = std::move(steps_);
initSteps_.clear();
steps_.clear();
pushCondVar_.notify_one();
}
}
void GLRenderManager::Present() {
GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);
presentTask->frame = curFrame_;
{
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(presentTask);
pushCondVar_.notify_one();
}
int newCurFrame = curFrame_ + 1;
if (newCurFrame >= inflightFrames_) {
newCurFrame = 0;
}
curFrame_ = newCurFrame;
curFrame_++;
if (curFrame_ >= inflightFrames_)
curFrame_ = 0;
insideFrame_ = false;
}
// Render thread. Returns true if the caller should handle a swap.
bool GLRenderManager::Run(GLRRenderThreadTask &task) {
_dbg_assert_(task.frame >= 0);
GLFrameData &frameData = frameData_[task.frame];
if (task.runType == GLRRunType::PRESENT) {
bool swapRequest = false;
if (!frameData.skipSwap) {
frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();
if (swapIntervalChanged_) {
swapIntervalChanged_ = false;
if (swapIntervalFunction_) {
swapIntervalFunction_(swapInterval_);
}
}
// This is the swapchain framebuffer flip.
if (swapFunction_) {
VLOG(" PULL: SwapFunction()");
swapFunction_();
}
swapRequest = true;
} else {
frameData.skipSwap = false;
}
frameData.hasBegun = false;
VLOG(" PULL: Frame %d.readyForFence = true", task.frame);
{
std::lock_guard<std::mutex> lock(frameData.fenceMutex);
frameData.readyForFence = true;
frameData.fenceCondVar.notify_one();
// At this point, we're done with this framedata (for now).
}
return swapRequest;
}
if (!frameData.hasBegun) {
frameData.hasBegun = true;
@ -492,17 +433,18 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) {
if (frameData.profile.enabled) {
frameData.profile.cpuStartTime = time_now_d();
frameData.profile.drawArraysRebindsAvoided = 0;
}
if (IsVREnabled()) {
int passes = GetVRPassesCount();
for (int i = 0; i < passes; i++) {
PreVRFrameRender(i);
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);
queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true, frameData.profile);
PostVRFrameRender();
}
} else {
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);
queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false, frameData.profile);
}
if (frameData.profile.enabled) {
@ -515,8 +457,43 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) {
}
}
bool swapRequest = false;
switch (task.runType) {
case GLRRunType::SUBMIT:
case GLRRunType::PRESENT:
if (!frameData.skipSwap) {
if (swapIntervalChanged_) {
swapIntervalChanged_ = false;
if (swapIntervalFunction_) {
swapIntervalFunction_(swapInterval_);
}
}
// This is the swapchain framebuffer flip.
if (swapFunction_) {
VLOG(" PULL: SwapFunction()");
swapFunction_();
if (!retainControl_) {
// get out of here.
swapRequest = true;
}
} else {
VLOG(" PULL: SwapRequested");
swapRequest = true;
}
} else {
frameData.skipSwap = false;
}
frameData.hasBegun = false;
VLOG(" PULL: Frame %d.readyForFence = true", task.frame);
{
std::lock_guard<std::mutex> lock(frameData.fenceMutex);
frameData.readyForFence = true;
frameData.fenceCondVar.notify_one();
// At this point, we're done with this framedata (for now).
}
break;
case GLRRunType::SYNC:
@ -525,7 +502,7 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) {
// glFinish is not actually necessary here, and won't be unless we start using
// glBufferStorage. Then we need to use fences.
{
std::lock_guard<std::mutex> lock(syncMutex_);
std::unique_lock<std::mutex> lock(syncMutex_);
syncDone_ = true;
syncCondVar_.notify_one();
}
@ -535,20 +512,21 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) {
_assert_(false);
}
VLOG(" PULL: ::Run(): Done running tasks");
return false;
return swapRequest;
}
void GLRenderManager::FlushSync() {
{
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);
task->frame = curFrame_;
GLRRenderThreadTask task;
task.frame = curFrame_;
task.runType = GLRRunType::SYNC;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back()->initSteps = std::move(initSteps_);
renderThreadQueue_.back()->steps = std::move(steps_);
renderThreadQueue_.back().initSteps = std::move(initSteps_);
renderThreadQueue_.back().steps = std::move(steps_);
pushCondVar_.notify_one();
steps_.clear();
}
@ -563,3 +541,272 @@ void GLRenderManager::FlushSync() {
syncDone_ = false;
}
}
GLPushBuffer::GLPushBuffer(GLRenderManager *render, GLuint target, size_t size) : render_(render), size_(size), target_(target) {
bool res = AddBuffer();
_assert_(res);
}
GLPushBuffer::~GLPushBuffer() {
Destroy(true);
}
void GLPushBuffer::Map() {
_assert_(!writePtr_);
auto &info = buffers_[buf_];
writePtr_ = info.deviceMemory ? info.deviceMemory : info.localMemory;
info.flushOffset = 0;
// Force alignment. This is needed for PushAligned() to work as expected.
while ((intptr_t)writePtr_ & 15) {
writePtr_++;
offset_++;
info.flushOffset++;
}
_assert_(writePtr_);
}
void GLPushBuffer::Unmap() {
_assert_(writePtr_);
if (!buffers_[buf_].deviceMemory) {
// Here we simply upload the data to the last buffer.
// Might be worth trying with size_ instead of offset_, so the driver can replace
// the whole buffer. At least if it's close.
render_->BufferSubdata(buffers_[buf_].buffer, 0, offset_, buffers_[buf_].localMemory, false);
} else {
buffers_[buf_].flushOffset = offset_;
}
writePtr_ = nullptr;
}
void GLPushBuffer::Flush() {
// Must be called from the render thread.
_dbg_assert_(OnRenderThread());
if (buf_ >= buffers_.size()) {
_dbg_assert_msg_(false, "buf_ somehow got out of sync: %d vs %d", (int)buf_, (int)buffers_.size());
return;
}
buffers_[buf_].flushOffset = offset_;
if (!buffers_[buf_].deviceMemory && writePtr_) {
auto &info = buffers_[buf_];
if (info.flushOffset != 0) {
_assert_(info.buffer->buffer_);
glBindBuffer(target_, info.buffer->buffer_);
glBufferSubData(target_, 0, info.flushOffset, info.localMemory);
}
// Here we will submit all the draw calls, with the already known buffer and offsets.
// Might as well reset the write pointer here and start over the current buffer.
writePtr_ = info.localMemory;
offset_ = 0;
info.flushOffset = 0;
}
// For device memory, we flush all buffers here.
if ((strategy_ & GLBufferStrategy::MASK_FLUSH) != 0) {
for (auto &info : buffers_) {
if (info.flushOffset == 0 || !info.deviceMemory)
continue;
glBindBuffer(target_, info.buffer->buffer_);
glFlushMappedBufferRange(target_, 0, info.flushOffset);
info.flushOffset = 0;
}
}
}
bool GLPushBuffer::AddBuffer() {
BufInfo info;
info.localMemory = (uint8_t *)AllocateAlignedMemory(size_, 16);
if (!info.localMemory)
return false;
info.buffer = render_->CreateBuffer(target_, size_, GL_DYNAMIC_DRAW);
info.size = size_;
buf_ = buffers_.size();
buffers_.push_back(info);
return true;
}
void GLPushBuffer::Destroy(bool onRenderThread) {
if (buf_ == -1)
return; // Already destroyed
for (BufInfo &info : buffers_) {
// This will automatically unmap device memory, if needed.
// NOTE: We immediately delete the buffer, don't go through the deleter, if we're on the render thread.
if (onRenderThread) {
delete info.buffer;
} else {
render_->DeleteBuffer(info.buffer);
}
FreeAlignedMemory(info.localMemory);
}
buffers_.clear();
buf_ = -1;
}
void GLPushBuffer::NextBuffer(size_t minSize) {
// First, unmap the current memory.
Unmap();
buf_++;
if (buf_ >= buffers_.size() || minSize > size_) {
// Before creating the buffer, adjust to the new size_ if necessary.
while (size_ < minSize) {
size_ <<= 1;
}
bool res = AddBuffer();
_assert_(res);
if (!res) {
// Let's try not to crash at least?
buf_ = 0;
}
}
// Now, move to the next buffer and map it.
offset_ = 0;
Map();
}
void GLPushBuffer::Defragment() {
_dbg_assert_msg_(!OnRenderThread(), "Defragment must not run on the render thread");
if (buffers_.size() <= 1) {
// Let's take this opportunity to jettison any localMemory we don't need.
for (auto &info : buffers_) {
if (info.deviceMemory) {
FreeAlignedMemory(info.localMemory);
info.localMemory = nullptr;
}
}
return;
}
// Okay, we have more than one. Destroy them all and start over with a larger one.
// When calling AddBuffer, we sometimes increase size_. So if we allocated multiple buffers in a frame,
// they won't all have the same size. Sum things up properly.
size_t newSize = 0;
for (int i = 0; i < (int)buffers_.size(); i++) {
newSize += buffers_[i].size;
}
Destroy(false);
// Set some sane but very free limits. If there's another spike, we'll just allocate more anyway.
size_ = std::min(std::max(newSize, (size_t)65536), (size_t)(512 * 1024 * 1024));
bool res = AddBuffer();
_assert_msg_(res, "AddBuffer failed");
}
size_t GLPushBuffer::GetTotalSize() const {
size_t sum = 0;
// When calling AddBuffer, we sometimes increase size_. So if we allocated multiple buffers in a frame,
// they won't all have the same size. Sum things up properly.
if (buffers_.size() > 1) {
for (int i = 0; i < (int)buffers_.size() - 1; i++) {
sum += buffers_[i].size;
}
}
sum += offset_;
return sum;
}
void GLPushBuffer::MapDevice(GLBufferStrategy strategy) {
_dbg_assert_msg_(OnRenderThread(), "MapDevice must run on render thread");
strategy_ = strategy;
if (strategy_ == GLBufferStrategy::SUBDATA) {
return;
}
bool mapChanged = false;
for (auto &info : buffers_) {
if (!info.buffer->buffer_ || info.deviceMemory) {
// Can't map - no device buffer associated yet or already mapped.
continue;
}
info.deviceMemory = (uint8_t *)info.buffer->Map(strategy_);
mapChanged = mapChanged || info.deviceMemory != nullptr;
if (!info.deviceMemory && !info.localMemory) {
// Somehow it failed, let's dodge crashing.
info.localMemory = (uint8_t *)AllocateAlignedMemory(info.buffer->size_, 16);
mapChanged = true;
}
_dbg_assert_msg_(info.localMemory || info.deviceMemory, "Local or device memory must succeed");
}
if (writePtr_ && mapChanged) {
// This can happen during a sync. Remap.
writePtr_ = nullptr;
Map();
}
}
void GLPushBuffer::UnmapDevice() {
_dbg_assert_msg_(OnRenderThread(), "UnmapDevice must run on render thread");
for (auto &info : buffers_) {
if (info.deviceMemory) {
// TODO: Technically this can return false?
info.buffer->Unmap();
info.deviceMemory = nullptr;
}
}
}
void *GLRBuffer::Map(GLBufferStrategy strategy) {
_assert_(buffer_ != 0);
GLbitfield access = GL_MAP_WRITE_BIT;
if ((strategy & GLBufferStrategy::MASK_FLUSH) != 0) {
access |= GL_MAP_FLUSH_EXPLICIT_BIT;
}
if ((strategy & GLBufferStrategy::MASK_INVALIDATE) != 0) {
access |= GL_MAP_INVALIDATE_BUFFER_BIT;
}
void *p = nullptr;
bool allowNativeBuffer = strategy != GLBufferStrategy::SUBDATA;
if (allowNativeBuffer) {
glBindBuffer(target_, buffer_);
if (gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage) {
#if !PPSSPP_PLATFORM(IOS)
if (!hasStorage_) {
GLbitfield storageFlags = access & ~(GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
#ifdef USING_GLES2
#ifdef GL_EXT_buffer_storage
glBufferStorageEXT(target_, size_, nullptr, storageFlags);
#endif
#else
glBufferStorage(target_, size_, nullptr, storageFlags);
#endif
hasStorage_ = true;
}
#endif
p = glMapBufferRange(target_, 0, size_, access);
} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
// GLES3 or desktop 3.
p = glMapBufferRange(target_, 0, size_, access);
} else if (!gl_extensions.IsGLES) {
#ifndef USING_GLES2
p = glMapBuffer(target_, GL_READ_WRITE);
#endif
}
}
mapped_ = p != nullptr;
return p;
}
bool GLRBuffer::Unmap() {
glBindBuffer(target_, buffer_);
mapped_ = false;
return glUnmapBuffer(target_) == GL_TRUE;
}

View file

@ -16,7 +16,6 @@
#include "Common/GPU/OpenGL/GLQueueRunner.h"
#include "Common/GPU/OpenGL/GLFrameData.h"
#include "Common/GPU/OpenGL/GLCommon.h"
#include "Common/GPU/OpenGL/GLMemory.h"
class GLRInputLayout;
class GLPushBuffer;
@ -53,13 +52,12 @@ public:
class GLRFramebuffer {
public:
GLRFramebuffer(const Draw::DeviceCaps &caps, int _width, int _height, bool z_stencil, const char *tag)
GLRFramebuffer(const Draw::DeviceCaps &caps, int _width, int _height, bool z_stencil)
: color_texture(caps, _width, _height, 1, 1), z_stencil_texture(caps, _width, _height, 1, 1),
width(_width), height(_height), z_stencil_(z_stencil) {
}
~GLRFramebuffer();
const char *Tag() const { return tag_.c_str(); }
~GLRFramebuffer();
GLuint handle = 0;
GLRTexture color_texture;
@ -72,10 +70,8 @@ public:
int width;
int height;
GLuint colorDepth = 0;
bool z_stencil_;
private:
std::string tag_;
bool z_stencil_;
};
// We need to create some custom heap-allocated types so we can forward things that need to be created on the GL thread, before
@ -183,6 +179,179 @@ private:
std::unordered_map<std::string, UniformInfo> uniformCache_;
};
enum class GLBufferStrategy {
SUBDATA = 0,
MASK_FLUSH = 0x10,
MASK_INVALIDATE = 0x20,
// Map/unmap the buffer each frame.
FRAME_UNMAP = 1,
// Map/unmap and also invalidate the buffer on map.
INVALIDATE_UNMAP = MASK_INVALIDATE,
// Map/unmap and explicitly flushed changed ranges.
FLUSH_UNMAP = MASK_FLUSH,
// Map/unmap, invalidate on map, and explicit flush.
FLUSH_INVALIDATE_UNMAP = MASK_FLUSH | MASK_INVALIDATE,
};
static inline int operator &(const GLBufferStrategy &lhs, const GLBufferStrategy &rhs) {
return (int)lhs & (int)rhs;
}
class GLRBuffer {
public:
GLRBuffer(GLuint target, size_t size) : target_(target), size_((int)size) {}
~GLRBuffer() {
if (buffer_) {
glDeleteBuffers(1, &buffer_);
}
}
void *Map(GLBufferStrategy strategy);
bool Unmap();
bool Mapped() const {
return mapped_;
}
GLuint buffer_ = 0;
GLuint target_;
int size_;
private:
bool mapped_ = false;
bool hasStorage_ = false;
};
class GLRenderManager;
// Similar to VulkanPushBuffer but is currently less efficient - it collects all the data in
// RAM then does a big memcpy/buffer upload at the end of the frame. This is at least a lot
// faster than the hundreds of buffer uploads or memory array buffers we used before.
// On modern GL we could avoid the copy using glBufferStorage but not sure it's worth the
// trouble.
// We need to manage the lifetime of this together with the other resources so its destructor
// runs on the render thread.
class GLPushBuffer {
public:
friend class GLRenderManager;
struct BufInfo {
GLRBuffer *buffer = nullptr;
uint8_t *localMemory = nullptr;
uint8_t *deviceMemory = nullptr;
size_t flushOffset = 0;
size_t size;
};
GLPushBuffer(GLRenderManager *render, GLuint target, size_t size);
~GLPushBuffer();
void Reset() { offset_ = 0; }
private:
// Needs context in case of defragment.
void Begin() {
buf_ = 0;
offset_ = 0;
// Note: we must defrag because some buffers may be smaller than size_.
Defragment();
Map();
_dbg_assert_(writePtr_);
}
void BeginNoReset() {
Map();
}
void End() {
Unmap();
}
public:
void Map();
void Unmap();
bool IsReady() const {
return writePtr_ != nullptr;
}
// When using the returned memory, make sure to bind the returned vkbuf.
// This will later allow for handling overflow correctly.
size_t Allocate(size_t numBytes, GLRBuffer **vkbuf) {
size_t out = offset_;
if (offset_ + ((numBytes + 3) & ~3) >= size_) {
NextBuffer(numBytes);
out = offset_;
offset_ += (numBytes + 3) & ~3;
} else {
offset_ += (numBytes + 3) & ~3; // Round up to 4 bytes.
}
*vkbuf = buffers_[buf_].buffer;
return out;
}
// Returns the offset that should be used when binding this buffer to get this data.
size_t Push(const void *data, size_t size, GLRBuffer **vkbuf) {
_dbg_assert_(writePtr_);
size_t off = Allocate(size, vkbuf);
memcpy(writePtr_ + off, data, size);
return off;
}
uint32_t PushAligned(const void *data, size_t size, int align, GLRBuffer **vkbuf) {
_dbg_assert_(writePtr_);
offset_ = (offset_ + align - 1) & ~(align - 1);
size_t off = Allocate(size, vkbuf);
memcpy(writePtr_ + off, data, size);
return (uint32_t)off;
}
size_t GetOffset() const {
return offset_;
}
// "Zero-copy" variant - you can write the data directly as you compute it.
// Recommended.
void *Push(size_t size, uint32_t *bindOffset, GLRBuffer **vkbuf) {
_dbg_assert_(writePtr_);
size_t off = Allocate(size, vkbuf);
*bindOffset = (uint32_t)off;
return writePtr_ + off;
}
void *PushAligned(size_t size, uint32_t *bindOffset, GLRBuffer **vkbuf, int align) {
_dbg_assert_(writePtr_);
offset_ = (offset_ + align - 1) & ~(align - 1);
size_t off = Allocate(size, vkbuf);
*bindOffset = (uint32_t)off;
return writePtr_ + off;
}
size_t GetTotalSize() const;
void Destroy(bool onRenderThread);
void Flush();
protected:
void MapDevice(GLBufferStrategy strategy);
void UnmapDevice();
private:
bool AddBuffer();
void NextBuffer(size_t minSize);
void Defragment();
GLRenderManager *render_;
std::vector<BufInfo> buffers_;
size_t buf_ = 0;
size_t offset_ = 0;
size_t size_ = 0;
uint8_t *writePtr_ = nullptr;
GLuint target_;
GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA;
};
class GLRInputLayout {
public:
struct Entry {
@ -193,12 +362,11 @@ public:
intptr_t offset;
};
std::vector<Entry> entries;
int stride;
unsigned int stride;
int semanticsMask_ = 0;
};
enum class GLRRunType {
SUBMIT,
PRESENT,
SYNC,
EXIT,
@ -207,19 +375,14 @@ enum class GLRRunType {
class GLRenderManager;
class GLPushBuffer;
// These are enqueued from the main thread, and the render thread pops them off
// These are enqueued from the main thread,
// and the render thread pops them off
struct GLRRenderThreadTask {
GLRRenderThreadTask(GLRRunType _runType) : runType(_runType) {}
std::vector<GLRStep *> steps;
FastVec<GLRInitStep> initSteps;
std::vector<GLRInitStep> initSteps;
int frame = -1;
int frame;
GLRRunType runType;
// Avoid copying these by accident.
GLRRenderThreadTask(GLRRenderThreadTask &) = delete;
GLRRenderThreadTask& operator =(GLRRenderThreadTask &) = delete;
};
// Note: The GLRenderManager is created and destroyed on the render thread, and the latter
@ -227,12 +390,9 @@ struct GLRRenderThreadTask {
// directly in the destructor.
class GLRenderManager {
public:
GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory);
GLRenderManager();
~GLRenderManager();
GLRenderManager(GLRenderManager &) = delete;
GLRenderManager &operator=(GLRenderManager &) = delete;
void SetInvalidationCallback(InvalidationCallback callback) {
invalidationCallback_ = callback;
}
@ -254,47 +414,43 @@ public:
// Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again.
void BeginFrame(bool enableProfiling);
// Can run on a different thread!
void Finish();
void Present();
void Finish();
// Creation commands. These were not needed in Vulkan since there we can do that on the main thread.
// We pass in width/height here even though it's not strictly needed until we support glTextureStorage
// and then we'll also need formats and stuff.
GLRTexture *CreateTexture(GLenum target, int width, int height, int depth, int numMips) {
_dbg_assert_(target != 0);
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::CREATE_TEXTURE;
GLRInitStep step { GLRInitStepType::CREATE_TEXTURE };
step.create_texture.texture = new GLRTexture(caps_, width, height, depth, numMips);
step.create_texture.texture->target = target;
initSteps_.push_back(step);
return step.create_texture.texture;
}
GLRBuffer *CreateBuffer(GLuint target, size_t size, GLuint usage) {
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::CREATE_BUFFER;
GLRInitStep step{ GLRInitStepType::CREATE_BUFFER };
step.create_buffer.buffer = new GLRBuffer(target, size);
step.create_buffer.size = (int)size;
step.create_buffer.usage = usage;
initSteps_.push_back(step);
return step.create_buffer.buffer;
}
GLRShader *CreateShader(GLuint stage, const std::string &code, const std::string &desc) {
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::CREATE_SHADER;
GLRInitStep step{ GLRInitStepType::CREATE_SHADER };
step.create_shader.shader = new GLRShader();
step.create_shader.shader->desc = desc;
step.create_shader.stage = stage;
step.create_shader.code = new char[code.size() + 1];
memcpy(step.create_shader.code, code.data(), code.size() + 1);
initSteps_.push_back(step);
return step.create_shader.shader;
}
GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil, const char *tag) {
_dbg_assert_(width > 0 && height > 0 && tag != nullptr);
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::CREATE_FRAMEBUFFER;
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, width, height, z_stencil, tag);
GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil) {
GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER };
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, width, height, z_stencil);
initSteps_.push_back(step);
return step.create_framebuffer.framebuffer;
}
@ -303,8 +459,7 @@ public:
GLRProgram *CreateProgram(
std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries,
std::vector<GLRProgram::Initializer> initializers, GLRProgramLocData *locData, const GLRProgramFlags &flags) {
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::CREATE_PROGRAM;
GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
_assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders));
step.create_program.program = new GLRProgram();
step.create_program.program->semantics_ = semantics;
@ -328,53 +483,47 @@ public:
}
#endif
step.create_program.num_shaders = (int)shaders.size();
initSteps_.push_back(step);
return step.create_program.program;
}
GLRInputLayout *CreateInputLayout(const std::vector<GLRInputLayout::Entry> &entries, int stride) {
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::CREATE_INPUT_LAYOUT;
GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT };
step.create_input_layout.inputLayout = new GLRInputLayout();
step.create_input_layout.inputLayout->entries = entries;
step.create_input_layout.inputLayout->stride = stride;
for (auto &iter : step.create_input_layout.inputLayout->entries) {
step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location;
}
initSteps_.push_back(step);
return step.create_input_layout.inputLayout;
}
GLPushBuffer *CreatePushBuffer(int frame, GLuint target, size_t size, const char *tag) {
GLPushBuffer *push = new GLPushBuffer(this, target, size, tag);
GLPushBuffer *CreatePushBuffer(int frame, GLuint target, size_t size) {
GLPushBuffer *push = new GLPushBuffer(this, target, size);
RegisterPushBuffer(frame, push);
return push;
}
void DeleteShader(GLRShader *shader) {
_dbg_assert_(shader != nullptr);
deleter_.shaders.push_back(shader);
}
void DeleteProgram(GLRProgram *program) {
_dbg_assert_(program != nullptr);
deleter_.programs.push_back(program);
}
void DeleteBuffer(GLRBuffer *buffer) {
_dbg_assert_(buffer != nullptr);
deleter_.buffers.push_back(buffer);
}
void DeleteTexture(GLRTexture *texture) {
_dbg_assert_(texture != nullptr);
deleter_.textures.push_back(texture);
}
void DeleteInputLayout(GLRInputLayout *inputLayout) {
_dbg_assert_(inputLayout != nullptr);
deleter_.inputLayouts.push_back(inputLayout);
}
void DeleteFramebuffer(GLRFramebuffer *framebuffer) {
_dbg_assert_(framebuffer != nullptr);
deleter_.framebuffers.push_back(framebuffer);
}
void DeletePushBuffer(GLPushBuffer *pushbuffer) {
_dbg_assert_(pushbuffer != nullptr);
deleter_.pushBuffers.push_back(pushbuffer);
}
@ -419,8 +568,7 @@ public:
void BufferSubdata(GLRBuffer *buffer, size_t offset, size_t size, uint8_t *data, bool deleteData = true) {
// TODO: Maybe should be a render command instead of an init command? When possible it's better as
// an init command, that's for sure.
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::BUFFER_SUBDATA;
GLRInitStep step{ GLRInitStepType::BUFFER_SUBDATA };
_dbg_assert_(offset >= 0);
_dbg_assert_(offset <= buffer->size_ - size);
step.buffer_subdata.buffer = buffer;
@ -428,12 +576,12 @@ public:
step.buffer_subdata.size = (int)size;
step.buffer_subdata.data = data;
step.buffer_subdata.deleteData = deleteData;
initSteps_.push_back(step);
}
// Takes ownership over the data pointer and delete[]-s it.
void TextureImage(GLRTexture *texture, int level, int width, int height, int depth, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) {
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::TEXTURE_IMAGE;
GLRInitStep step{ GLRInitStepType::TEXTURE_IMAGE };
step.texture_image.texture = texture;
step.texture_image.data = data;
step.texture_image.format = format;
@ -443,11 +591,12 @@ public:
step.texture_image.depth = depth;
step.texture_image.allocType = allocType;
step.texture_image.linearFilter = linearFilter;
initSteps_.push_back(step);
}
void TextureSubImage(int slot, GLRTexture *texture, int level, int x, int y, int width, int height, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData _data(GLRRenderCommand::TEXTURE_SUBIMAGE);
GLRRenderData _data{ GLRRenderCommand::TEXTURE_SUBIMAGE };
_data.texture_subimage.texture = texture;
_data.texture_subimage.data = data;
_data.texture_subimage.format = format;
@ -462,11 +611,11 @@ public:
}
void FinalizeTexture(GLRTexture *texture, int loadedLevels, bool genMips) {
GLRInitStep &step = initSteps_.push_uninitialized();
step.stepType = GLRInitStepType::TEXTURE_FINALIZE;
GLRInitStep step{ GLRInitStepType::TEXTURE_FINALIZE };
step.texture_finalize.texture = texture;
step.texture_finalize.loadedLevels = loadedLevels;
step.texture_finalize.genMips = genMips;
initSteps_.push_back(step);
}
void BindTexture(int slot, GLRTexture *tex) {
@ -477,18 +626,18 @@ public:
}
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::BINDTEXTURE;
GLRRenderData data{ GLRRenderCommand::BINDTEXTURE };
data.texture.slot = slot;
data.texture.texture = tex;
curRenderStep_->commands.push_back(data);
}
void BindProgram(GLRProgram *program) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::BINDPROGRAM;
GLRRenderData data{ GLRRenderCommand::BINDPROGRAM };
_dbg_assert_(program != nullptr);
data.program.program = program;
curRenderStep_->commands.push_back(data);
#ifdef _DEBUG
curProgram_ = program;
#endif
@ -496,25 +645,25 @@ public:
void SetDepth(bool enabled, bool write, GLenum func) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::DEPTH;
GLRRenderData data{ GLRRenderCommand::DEPTH };
data.depth.enabled = enabled;
data.depth.write = write;
data.depth.func = func;
curRenderStep_->commands.push_back(data);
}
void SetViewport(const GLRViewport &vp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::VIEWPORT;
GLRRenderData data{ GLRRenderCommand::VIEWPORT };
data.viewport.vp = vp;
curRenderStep_->commands.push_back(data);
}
void SetScissor(const GLRect2D &rc) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::SCISSOR;
GLRRenderData data{ GLRRenderCommand::SCISSOR };
data.scissor.rc = rc;
curRenderStep_->commands.push_back(data);
}
void SetUniformI(const GLint *loc, int count, const int *udata) {
@ -522,12 +671,11 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORM4I;
data.uniform4.name = nullptr;
GLRRenderData data{ GLRRenderCommand::UNIFORM4I };
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(int) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformI1(const GLint *loc, int udata) {
@ -535,12 +683,11 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORM4I;
data.uniform4.name = nullptr;
GLRRenderData data{ GLRRenderCommand::UNIFORM4I };
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(udata));
curRenderStep_->commands.push_back(data);
}
void SetUniformUI(const GLint *loc, int count, const uint32_t *udata) {
@ -548,12 +695,11 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORM4UI;
data.uniform4.name = nullptr;
GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(uint32_t) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformUI1(const GLint *loc, uint32_t udata) {
@ -561,12 +707,11 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORM4UI;
data.uniform4.name = nullptr;
GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(udata));
curRenderStep_->commands.push_back(data);
}
void SetUniformF(const GLint *loc, int count, const float *udata) {
@ -574,12 +719,11 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORM4F;
data.uniform4.name = nullptr;
GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(float) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformF1(const GLint *loc, const float udata) {
@ -587,12 +731,11 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORM4F;
data.uniform4.name = nullptr;
GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(float));
curRenderStep_->commands.push_back(data);
}
void SetUniformF(const char *name, int count, const float *udata) {
@ -600,12 +743,11 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORM4F;
GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
data.uniform4.name = name;
data.uniform4.loc = nullptr;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(float) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformM4x4(const GLint *loc, const float *udata) {
@ -613,11 +755,10 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORMMATRIX;
data.uniformMatrix4.name = nullptr;
GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX };
data.uniformMatrix4.loc = loc;
memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16);
curRenderStep_->commands.push_back(data);
}
void SetUniformM4x4Stereo(const char *name, const GLint *loc, const float *left, const float *right) {
@ -625,13 +766,13 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORMSTEREOMATRIX;
GLRRenderData data{ GLRRenderCommand::UNIFORMSTEREOMATRIX };
data.uniformStereoMatrix4.name = name;
data.uniformStereoMatrix4.loc = loc;
data.uniformStereoMatrix4.mData = new float[32];
memcpy(&data.uniformStereoMatrix4.mData[0], left, sizeof(float) * 16);
memcpy(&data.uniformStereoMatrix4.mData[16], right, sizeof(float) * 16);
curRenderStep_->commands.push_back(data);
}
void SetUniformM4x4(const char *name, const float *udata) {
@ -639,19 +780,17 @@ public:
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::UNIFORMMATRIX;
GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX };
data.uniformMatrix4.name = name;
data.uniformMatrix4.loc = nullptr;
memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16);
curRenderStep_->commands.push_back(data);
}
void SetBlendAndMask(int colorMask, bool blendEnabled, GLenum srcColor, GLenum dstColor, GLenum srcAlpha, GLenum dstAlpha, GLenum funcColor, GLenum funcAlpha) {
// Make this one only a non-debug _assert_, since it often comes first.
// Lets us collect info about this potential crash through assert extra data.
_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::BLEND;
GLRRenderData data{ GLRRenderCommand::BLEND };
data.blend.mask = colorMask;
data.blend.enabled = blendEnabled;
data.blend.srcColor = srcColor;
@ -660,88 +799,96 @@ public:
data.blend.dstAlpha = dstAlpha;
data.blend.funcColor = funcColor;
data.blend.funcAlpha = funcAlpha;
curRenderStep_->commands.push_back(data);
}
void SetNoBlendAndMask(int colorMask) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::BLEND;
GLRRenderData data{ GLRRenderCommand::BLEND };
data.blend.mask = colorMask;
data.blend.enabled = false;
curRenderStep_->commands.push_back(data);
}
#ifndef USING_GLES2
void SetLogicOp(bool enabled, GLenum logicOp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::LOGICOP;
GLRRenderData data{ GLRRenderCommand::LOGICOP };
data.logic.enabled = enabled;
data.logic.logicOp = logicOp;
curRenderStep_->commands.push_back(data);
}
#endif
void SetStencil(bool enabled, GLenum func, uint8_t refValue, uint8_t compareMask, uint8_t writeMask, GLenum sFail, GLenum zFail, GLenum pass) {
void SetStencilFunc(bool enabled, GLenum func, uint8_t refValue, uint8_t compareMask) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::STENCIL;
data.stencil.enabled = enabled;
data.stencil.func = func;
data.stencil.ref = refValue;
data.stencil.compareMask = compareMask;
data.stencil.writeMask = writeMask;
data.stencil.sFail = sFail;
data.stencil.zFail = zFail;
data.stencil.pass = pass;
GLRRenderData data{ GLRRenderCommand::STENCILFUNC };
data.stencilFunc.enabled = enabled;
data.stencilFunc.func = func;
data.stencilFunc.ref = refValue;
data.stencilFunc.compareMask = compareMask;
curRenderStep_->commands.push_back(data);
}
void SetStencilOp(uint8_t writeMask, GLenum sFail, GLenum zFail, GLenum pass) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::STENCILOP };
data.stencilOp.writeMask = writeMask;
data.stencilOp.sFail = sFail;
data.stencilOp.zFail = zFail;
data.stencilOp.pass = pass;
curRenderStep_->commands.push_back(data);
}
void SetStencilDisabled() {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::STENCIL;
data.stencil.enabled = false;
GLRRenderData data;
data.cmd = GLRRenderCommand::STENCILFUNC;
data.stencilFunc.enabled = false;
curRenderStep_->commands.push_back(data);
}
void SetBlendFactor(const float color[4]) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::BLENDCOLOR;
GLRRenderData data{ GLRRenderCommand::BLENDCOLOR };
CopyFloat4(data.blendColor.color, color);
curRenderStep_->commands.push_back(data);
}
void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::RASTER;
GLRRenderData data{ GLRRenderCommand::RASTER };
data.raster.cullEnable = cullEnable;
data.raster.frontFace = frontFace;
data.raster.cullFace = cullFace;
data.raster.ditherEnable = ditherEnable;
data.raster.depthClampEnable = depthClamp;
curRenderStep_->commands.push_back(data);
}
// Modifies the current texture as per GL specs, not global state.
void SetTextureSampler(int slot, GLenum wrapS, GLenum wrapT, GLenum magFilter, GLenum minFilter, float anisotropy) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::TEXTURESAMPLER;
GLRRenderData data{ GLRRenderCommand::TEXTURESAMPLER };
data.textureSampler.slot = slot;
data.textureSampler.wrapS = wrapS;
data.textureSampler.wrapT = wrapT;
data.textureSampler.magFilter = magFilter;
data.textureSampler.minFilter = minFilter;
data.textureSampler.anisotropy = anisotropy;
curRenderStep_->commands.push_back(data);
}
void SetTextureLod(int slot, float minLod, float maxLod, float lodBias) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::TEXTURELOD;
GLRRenderData data{ GLRRenderCommand::TEXTURELOD};
data.textureLod.slot = slot;
data.textureLod.minLod = minLod;
data.textureLod.maxLod = maxLod;
data.textureLod.lodBias = lodBias;
curRenderStep_->commands.push_back(data);
}
// If scissorW == 0, no scissor is applied (the whole render target is cleared).
@ -749,8 +896,7 @@ public:
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
if (!clearMask)
return;
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::CLEAR;
GLRRenderData data{ GLRRenderCommand::CLEAR };
data.clear.clearMask = clearMask;
data.clear.clearColor = clearColor;
data.clear.clearZ = clearZ;
@ -760,36 +906,38 @@ public:
data.clear.scissorY = scissorY;
data.clear.scissorW = scissorW;
data.clear.scissorH = scissorH;
curRenderStep_->commands.push_back(data);
}
void Draw(GLRInputLayout *inputLayout, GLRBuffer *vertexBuffer, uint32_t vertexOffset, GLenum mode, int first, int count) {
_dbg_assert_(vertexBuffer && curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::DRAW;
void Draw(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset, GLenum mode, int first, int count) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::DRAW };
data.draw.inputLayout = inputLayout;
data.draw.vertexOffset = vertexOffset;
data.draw.vertexBuffer = vertexBuffer;
data.draw.offset = offset;
data.draw.buffer = buffer;
data.draw.indexBuffer = nullptr;
data.draw.mode = mode;
data.draw.first = first;
data.draw.count = count;
data.draw.indexType = 0;
curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
// Would really love to have a basevertex parameter, but impossible in unextended GLES, without glDrawElementsBaseVertex, unfortunately.
void DrawIndexed(GLRInputLayout *inputLayout, GLRBuffer *vertexBuffer, uint32_t vertexOffset, GLRBuffer *indexBuffer, uint32_t indexOffset, GLenum mode, int count, GLenum indexType, int instances = 1) {
_dbg_assert_(vertexBuffer && indexBuffer && curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::DRAW;
void DrawIndexed(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset, GLRBuffer *indexBuffer, GLenum mode, int count, GLenum indexType, void *indices, int instances = 1) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData data{ GLRRenderCommand::DRAW };
data.draw.inputLayout = inputLayout;
data.draw.vertexOffset = vertexOffset;
data.draw.vertexBuffer = vertexBuffer;
data.draw.offset = offset;
data.draw.buffer = buffer;
data.draw.indexBuffer = indexBuffer;
data.draw.indexOffset = indexOffset;
data.draw.mode = mode;
data.draw.count = count;
data.draw.indexType = indexType;
data.draw.indices = indices;
data.draw.instances = instances;
curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
enum { MAX_INFLIGHT_FRAMES = 3 };
@ -820,8 +968,9 @@ public:
_dbg_assert_(foundCount == 1);
}
void SetSwapFunction(std::function<void()> swapFunction) {
void SetSwapFunction(std::function<void()> swapFunction, bool retainControl) {
swapFunction_ = swapFunction;
retainControl_ = retainControl;
}
void SetSwapIntervalFunction(std::function<void(int)> swapIntervalFunction) {
@ -871,7 +1020,7 @@ private:
GLRStep *curRenderStep_ = nullptr;
std::vector<GLRStep *> steps_;
FastVec<GLRInitStep> initSteps_;
std::vector<GLRInitStep> initSteps_;
// Execution time state
bool run_ = true;
@ -883,7 +1032,7 @@ private:
std::mutex pushMutex_;
std::condition_variable pushCondVar_;
std::queue<GLRRenderThreadTask *> renderThreadQueue_;
std::queue<GLRRenderThreadTask> renderThreadQueue_;
// For readbacks and other reasons we need to sync with the render thread.
std::mutex syncMutex_;
@ -900,6 +1049,7 @@ private:
std::function<void()> swapFunction_;
std::function<void(int)> swapIntervalFunction_;
bool retainControl_ = false;
GLBufferStrategy bufferStrategy_ = GLBufferStrategy::SUBDATA;
int inflightFrames_ = MAX_INFLIGHT_FRAMES;
@ -916,9 +1066,5 @@ private:
#endif
Draw::DeviceCaps caps_{};
std::string profilePassesString_;
InvalidationCallback invalidationCallback_;
uint64_t frameIdGen_ = FRAME_TIME_HISTORY_LENGTH;
HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory_;
};

View file

@ -180,9 +180,8 @@ public:
void Apply(GLRenderManager *render, uint8_t stencilRef, uint8_t stencilWriteMask, uint8_t stencilCompareMask) {
render->SetDepth(depthTestEnabled, depthWriteEnabled, depthComp);
render->SetStencil(
stencilEnabled, stencilCompareOp, stencilRef, stencilCompareMask,
stencilWriteMask, stencilFail, stencilZFail, stencilPass);
render->SetStencilFunc(stencilEnabled, stencilCompareOp, stencilRef, stencilCompareMask);
render->SetStencilOp(stencilWriteMask, stencilFail, stencilZFail, stencilPass);
}
};
@ -322,13 +321,16 @@ class OpenGLTexture;
class OpenGLContext : public DrawContext {
public:
OpenGLContext(bool canChangeSwapInterval);
OpenGLContext();
~OpenGLContext();
void SetTargetSize(int w, int h) override {
DrawContext::SetTargetSize(w, h);
renderManager_.Resize(w, h);
}
void SetDebugFlags(DebugFlags flags) override {
debugFlags_ = flags;
}
const DeviceCaps &GetDeviceCaps() const override {
return caps_;
@ -347,6 +349,11 @@ public:
renderManager_.SetErrorCallback(callback, userdata);
}
PresentationMode GetPresentationMode() const override {
// TODO: Fix. Not yet used.
return PresentationMode::FIFO;
}
DepthStencilState *CreateDepthStencilState(const DepthStencilStateDesc &desc) override;
BlendState *CreateBlendState(const BlendStateDesc &desc) override;
SamplerState *CreateSamplerState(const SamplerStateDesc &desc) override;
@ -359,16 +366,10 @@ public:
Buffer *CreateBuffer(size_t size, uint32_t usageFlags) override;
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
void BeginFrame(DebugFlags debugFlags) override;
void BeginFrame() override;
void EndFrame() override;
void Present(PresentMode mode, int vblanks) override;
int GetFrameCount() override {
return frameCount_;
}
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
@ -406,11 +407,12 @@ public:
stencilWriteMask_ = writeMask;
stencilCompareMask_ = compareMask;
// Do we need to update on the fly here?
renderManager_.SetStencil(
renderManager_.SetStencilFunc(
curPipeline_->depthStencil->stencilEnabled,
curPipeline_->depthStencil->stencilCompareOp,
refValue,
compareMask,
compareMask);
renderManager_.SetStencilOp(
writeMask,
curPipeline_->depthStencil->stencilFail,
curPipeline_->depthStencil->stencilZFail,
@ -421,9 +423,12 @@ public:
void BindNativeTexture(int sampler, void *nativeTexture) override;
void BindPipeline(Pipeline *pipeline) override;
void BindVertexBuffer(Buffer *buffer, int offset) override {
curVBuffer_ = (OpenGLBuffer *)buffer;
curVBufferOffset_ = offset;
void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override {
_assert_(start + count <= ARRAY_SIZE(curVBuffers_));
for (int i = 0; i < count; i++) {
curVBuffers_[i + start] = (OpenGLBuffer *)buffers[i];
curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0;
}
}
void BindIndexBuffer(Buffer *indexBuffer, int offset) override {
curIBuffer_ = (OpenGLBuffer *)indexBuffer;
@ -460,7 +465,6 @@ public:
case GPUVendor::VENDOR_BROADCOM: return "VENDOR_BROADCOM";
case GPUVendor::VENDOR_VIVANTE: return "VENDOR_VIVANTE";
case GPUVendor::VENDOR_APPLE: return "VENDOR_APPLE";
case GPUVendor::VENDOR_MESA: return "VENDOR_MESA";
case GPUVendor::VENDOR_UNKNOWN:
default:
return "VENDOR_UNKNOWN";
@ -483,15 +487,10 @@ public:
renderManager_.SetInvalidationCallback(callback);
}
std::string GetGpuProfileString() const override {
return renderManager_.GetGpuProfileString();
}
private:
void ApplySamplers();
GLRenderManager renderManager_;
int frameCount_ = 0;
DeviceCaps caps_{};
@ -502,9 +501,9 @@ private:
const GLRTexture *boundTextures_[MAX_TEXTURE_SLOTS]{};
AutoRef<OpenGLPipeline> curPipeline_;
AutoRef<OpenGLBuffer> curVBuffer_;
AutoRef<OpenGLBuffer> curVBuffers_[4]{};
int curVBufferOffsets_[4]{};
AutoRef<OpenGLBuffer> curIBuffer_;
int curVBufferOffset_ = 0;
int curIBufferOffset_ = 0;
AutoRef<Framebuffer> curRenderTarget_;
@ -518,6 +517,8 @@ private:
GLPushBuffer *push;
};
FrameData frameData_[GLRenderManager::MAX_INFLIGHT_FRAMES]{};
DebugFlags debugFlags_ = DebugFlags::NONE;
};
static constexpr int MakeIntelSimpleVer(int v1, int v2, int v3) {
@ -540,7 +541,7 @@ static bool HasIntelDualSrcBug(const int versions[4]) {
}
}
OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameTimeHistory_) {
OpenGLContext::OpenGLContext() {
if (gl_extensions.IsGLES) {
if (gl_extensions.OES_packed_depth_stencil || gl_extensions.OES_depth24) {
caps_.preferredDepthBufferFormat = DataFormat::D24_S8;
@ -564,7 +565,6 @@ OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameT
caps_.textureDepthSupported = true;
}
caps_.setMaxFrameLatencySupported = true;
caps_.dualSourceBlend = gl_extensions.ARB_blend_func_extended || gl_extensions.EXT_blend_func_extended;
caps_.anisoSupported = gl_extensions.EXT_texture_filter_anisotropic;
caps_.framebufferCopySupported = gl_extensions.OES_copy_image || gl_extensions.NV_copy_image || gl_extensions.EXT_copy_image || gl_extensions.ARB_copy_image;
@ -613,7 +613,6 @@ OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameT
case GPU_VENDOR_IMGTEC: caps_.vendor = GPUVendor::VENDOR_IMGTEC; break;
case GPU_VENDOR_VIVANTE: caps_.vendor = GPUVendor::VENDOR_VIVANTE; break;
case GPU_VENDOR_APPLE: caps_.vendor = GPUVendor::VENDOR_APPLE; break;
case GPU_VENDOR_MESA: caps_.vendor = GPUVendor::VENDOR_MESA; break;
case GPU_VENDOR_UNKNOWN:
default:
caps_.vendor = GPUVendor::VENDOR_UNKNOWN;
@ -632,7 +631,7 @@ OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameT
caps_.isTilingGPU = gl_extensions.IsGLES && caps_.vendor != GPUVendor::VENDOR_NVIDIA && caps_.vendor != GPUVendor::VENDOR_INTEL;
for (int i = 0; i < GLRenderManager::MAX_INFLIGHT_FRAMES; i++) {
frameData_[i].push = renderManager_.CreatePushBuffer(i, GL_ARRAY_BUFFER, 64 * 1024, "thin3d_vbuf");
frameData_[i].push = renderManager_.CreatePushBuffer(i, GL_ARRAY_BUFFER, 64 * 1024);
}
if (!gl_extensions.VersionGEThan(3, 0, 0)) {
@ -772,16 +771,6 @@ OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameT
}
}
if (canChangeSwapInterval) {
caps_.presentInstantModeChange = true;
caps_.presentMaxInterval = 4;
caps_.presentModesSupported = PresentMode::FIFO | PresentMode::IMMEDIATE;
} else {
caps_.presentInstantModeChange = false;
caps_.presentModesSupported = PresentMode::FIFO;
caps_.presentMaxInterval = 1;
}
renderManager_.SetDeviceCaps(caps_);
}
@ -793,8 +782,8 @@ OpenGLContext::~OpenGLContext() {
}
}
void OpenGLContext::BeginFrame(DebugFlags debugFlags) {
renderManager_.BeginFrame(debugFlags & DebugFlags::PROFILE_TIMESTAMPS);
void OpenGLContext::BeginFrame() {
renderManager_.BeginFrame(debugFlags_ & DebugFlags::PROFILE_TIMESTAMPS);
FrameData &frameData = frameData_[renderManager_.GetCurFrame()];
renderManager_.BeginPushBuffer(frameData.push);
}
@ -803,12 +792,8 @@ void OpenGLContext::EndFrame() {
FrameData &frameData = frameData_[renderManager_.GetCurFrame()];
renderManager_.EndPushBuffer(frameData.push); // upload the data!
renderManager_.Finish();
Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
}
void OpenGLContext::Present(PresentMode presentMode, int vblanks) {
renderManager_.Present();
frameCount_++;
Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
}
void OpenGLContext::Invalidate(InvalidationFlags flags) {
@ -830,7 +815,7 @@ InputLayout *OpenGLContext::CreateInputLayout(const InputLayoutDesc &desc) {
return fmt;
}
static GLuint TypeToTarget(TextureType type) {
GLuint TypeToTarget(TextureType type) {
switch (type) {
#ifndef USING_GLES2
case TextureType::LINEAR1D: return GL_TEXTURE_1D;
@ -868,33 +853,25 @@ public:
return tex_;
}
void UpdateTextureLevels(GLRenderManager *render, const uint8_t *const *data, int numLevels, TextureCallback initDataCallback);
private:
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback initDataCallback);
void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback);
GLRenderManager *render_;
GLRTexture *tex_;
DataFormat format_;
TextureType type_;
int mipLevels_;
bool generateMips_; // Generate mips requested
bool generatedMips_; // Has generated mips
bool generatedMips_;
};
OpenGLTexture::OpenGLTexture(GLRenderManager *render, const TextureDesc &desc) : render_(render) {
_dbg_assert_(desc.format != Draw::DataFormat::UNDEFINED);
_dbg_assert_(desc.width > 0 && desc.height > 0 && desc.depth > 0);
_dbg_assert_(desc.type != Draw::TextureType::UNKNOWN);
generatedMips_ = false;
generateMips_ = desc.generateMips;
width_ = desc.width;
height_ = desc.height;
depth_ = desc.depth;
format_ = desc.format;
type_ = desc.type;
GLenum target = TypeToTarget(desc.type);
tex_ = render->CreateTexture(target, desc.width, desc.height, 1, desc.mipLevels);
@ -902,25 +879,21 @@ OpenGLTexture::OpenGLTexture(GLRenderManager *render, const TextureDesc &desc) :
if (desc.initData.empty())
return;
UpdateTextureLevels(render, desc.initData.data(), (int)desc.initData.size(), desc.initDataCallback);
}
void OpenGLTexture::UpdateTextureLevels(GLRenderManager *render, const uint8_t * const *data, int numLevels, TextureCallback initDataCallback) {
int level = 0;
int width = width_;
int height = height_;
int depth = depth_;
for (int i = 0; i < numLevels; i++) {
SetImageData(0, 0, 0, width, height, depth, level, 0, data[i], initDataCallback);
for (auto data : desc.initData) {
SetImageData(0, 0, 0, width, height, depth, level, 0, data, desc.initDataCallback);
width = (width + 1) / 2;
height = (height + 1) / 2;
depth = (depth + 1) / 2;
level++;
}
mipLevels_ = generateMips_ ? mipLevels_ : level;
mipLevels_ = desc.generateMips ? desc.mipLevels : level;
bool genMips = false;
if (numLevels < mipLevels_ && generateMips_) {
if ((int)desc.initData.size() < desc.mipLevels && desc.generateMips) {
// Assumes the texture is bound for editing
genMips = true;
generatedMips_ = true;
@ -931,7 +904,7 @@ void OpenGLTexture::UpdateTextureLevels(GLRenderManager *render, const uint8_t *
OpenGLTexture::~OpenGLTexture() {
if (tex_) {
render_->DeleteTexture(tex_);
tex_ = nullptr;
tex_ = 0;
generatedMips_ = false;
}
}
@ -950,7 +923,7 @@ public:
GLRFramebuffer *framebuffer_ = nullptr;
};
void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback initDataCallback) {
void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback) {
if ((width != width_ || height != height_ || depth != depth_) && level == 0) {
// When switching to texStorage we need to handle this correctly.
width_ = width;
@ -966,8 +939,8 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int
uint8_t *texData = new uint8_t[(size_t)(width * height * depth * alignment)];
bool texDataPopulated = false;
if (initDataCallback) {
texDataPopulated = initDataCallback(texData, data, width, height, depth, width * (int)alignment, height * width * (int)alignment);
if (callback) {
texDataPopulated = callback(texData, data, width, height, depth, width * (int)alignment, height * width * (int)alignment);
}
if (texDataPopulated) {
if (format_ == DataFormat::A1R5G5B5_UNORM_PACK16) {
@ -1048,11 +1021,6 @@ Texture *OpenGLContext::CreateTexture(const TextureDesc &desc) {
return new OpenGLTexture(&renderManager_, desc);
}
void OpenGLContext::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
OpenGLTexture *tex = (OpenGLTexture *)texture;
tex->UpdateTextureLevels(&renderManager_, data, numLevels, initDataCallback);
}
DepthStencilState *OpenGLContext::CreateDepthStencilState(const DepthStencilStateDesc &desc) {
OpenGLDepthStencilState *ds = new OpenGLDepthStencilState();
ds->depthTestEnabled = desc.depthTestEnabled;
@ -1367,35 +1335,31 @@ void OpenGLContext::UpdateDynamicUniformBuffer(const void *ub, size_t size) {
}
void OpenGLContext::Draw(int vertexCount, int offset) {
_dbg_assert_msg_(curVBuffer_ != nullptr, "Can't call Draw without a vertex buffer");
_dbg_assert_msg_(curVBuffers_[0] != nullptr, "Can't call Draw without a vertex buffer");
ApplySamplers();
_assert_(curPipeline_->inputLayout);
renderManager_.Draw(curPipeline_->inputLayout->inputLayout_, curVBuffer_->buffer_, curVBufferOffset_, curPipeline_->prim, offset, vertexCount);
renderManager_.Draw(curPipeline_->inputLayout->inputLayout_, curVBuffers_[0]->buffer_, curVBufferOffsets_[0], curPipeline_->prim, offset, vertexCount);
}
void OpenGLContext::DrawIndexed(int vertexCount, int offset) {
_dbg_assert_msg_(curVBuffer_ != nullptr, "Can't call DrawIndexed without a vertex buffer");
_dbg_assert_msg_(curVBuffers_[0] != nullptr, "Can't call DrawIndexed without a vertex buffer");
_dbg_assert_msg_(curIBuffer_ != nullptr, "Can't call DrawIndexed without an index buffer");
ApplySamplers();
_assert_(curPipeline_->inputLayout);
renderManager_.DrawIndexed(
curPipeline_->inputLayout->inputLayout_,
curVBuffer_->buffer_, curVBufferOffset_,
curIBuffer_->buffer_, curIBufferOffset_ + offset * sizeof(uint32_t),
curPipeline_->prim, vertexCount, GL_UNSIGNED_SHORT);
curPipeline_->inputLayout->inputLayout_, curVBuffers_[0]->buffer_, curVBufferOffsets_[0], curIBuffer_->buffer_,
curPipeline_->prim, vertexCount, GL_UNSIGNED_SHORT, (void *)((intptr_t)curIBufferOffset_ + offset * sizeof(uint32_t)));
}
void OpenGLContext::DrawUP(const void *vdata, int vertexCount) {
_assert_(curPipeline_->inputLayout != nullptr);
int stride = curPipeline_->inputLayout->stride;
uint32_t dataSize = stride * vertexCount;
size_t dataSize = stride * vertexCount;
FrameData &frameData = frameData_[renderManager_.GetCurFrame()];
GLRBuffer *buf;
uint32_t offset;
uint8_t *dest = frameData.push->Allocate(dataSize, 4, &buf, &offset);
memcpy(dest, vdata, dataSize);
size_t offset = frameData.push->Push(vdata, dataSize, &buf);
ApplySamplers();
_assert_(curPipeline_->inputLayout);
@ -1418,8 +1382,8 @@ void OpenGLContext::Clear(int mask, uint32_t colorval, float depthVal, int stenc
renderManager_.Clear(colorval, depthVal, stencilVal, glMask, 0xF, 0, 0, targetWidth_, targetHeight_);
}
DrawContext *T3DCreateGLContext(bool canChangeSwapInterval) {
return new OpenGLContext(canChangeSwapInterval);
DrawContext *T3DCreateGLContext() {
return new OpenGLContext();
}
OpenGLInputLayout::~OpenGLInputLayout() {
@ -1429,7 +1393,8 @@ OpenGLInputLayout::~OpenGLInputLayout() {
void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) {
// TODO: This is only accurate if there's only one stream. But whatever, for now we
// never use multiple streams anyway.
stride = desc.stride;
_dbg_assert_(desc.bindings.size() == 1);
stride = (GLsizei)desc.bindings[0].stride;
std::vector<GLRInputLayout::Entry> entries;
for (auto &attr : desc.attributes) {
@ -1478,7 +1443,7 @@ Framebuffer *OpenGLContext::CreateFramebuffer(const FramebufferDesc &desc) {
// TODO: Support multiview later. (It's our only use case for multi layers).
_dbg_assert_(desc.numLayers == 1);
GLRFramebuffer *framebuffer = renderManager_.CreateFramebuffer(desc.width, desc.height, desc.z_stencil, desc.tag);
GLRFramebuffer *framebuffer = renderManager_.CreateFramebuffer(desc.width, desc.height, desc.z_stencil);
OpenGLFramebuffer *fbo = new OpenGLFramebuffer(&renderManager_, framebuffer);
return fbo;
}

View file

@ -63,10 +63,15 @@ static EShLanguage GetShLanguageFromStage(const ShaderStage stage) {
}
void ShaderTranslationInit() {
// TODO: We have TLS issues on UWP
#if !PPSSPP_PLATFORM(UWP)
glslang::InitializeProcess();
#endif
}
void ShaderTranslationShutdown() {
#if !PPSSPP_PLATFORM(UWP)
glslang::FinalizeProcess();
#endif
}
struct Builtin {
@ -224,6 +229,11 @@ bool TranslateShader(std::string *dest, ShaderLanguage destLang, const ShaderLan
return result;
}
#if PPSSPP_PLATFORM(UWP)
*errorMessage = "No shader translation available (UWP)";
return false;
#endif
errorMessage->clear();
glslang::TProgram program;

View file

@ -57,7 +57,6 @@ std::string VulkanVendorString(uint32_t vendorId) {
case VULKAN_VENDOR_QUALCOMM: return "Qualcomm";
case VULKAN_VENDOR_IMGTEC: return "Imagination";
case VULKAN_VENDOR_APPLE: return "Apple";
case VULKAN_VENDOR_MESA: return "Mesa";
default:
return StringFromFormat("%08x", vendorId);
}
@ -550,13 +549,12 @@ void VulkanContext::ChooseDevice(int physical_device) {
vkGetPhysicalDeviceQueueFamilyProperties(physical_devices_[physical_device_], &queue_count, queueFamilyProperties_.data());
_dbg_assert_(queue_count >= 1);
// Detect preferred depth/stencil formats, in this order. All supported devices will support at least one of these.
// Detect preferred formats, in this order.
static const VkFormat depthStencilFormats[] = {
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_D16_UNORM_S8_UINT,
};
deviceInfo_.preferredDepthStencilFormat = VK_FORMAT_UNDEFINED;
for (size_t i = 0; i < ARRAY_SIZE(depthStencilFormats); i++) {
VkFormatProperties props;
@ -575,8 +573,7 @@ void VulkanContext::ChooseDevice(int physical_device) {
deviceInfo_.canBlitToPreferredDepthStencilFormat = true;
}
// This is as good a place as any to do this. Though, we don't use this much anymore after we added
// support for VMA.
// This is as good a place as any to do this.
vkGetPhysicalDeviceMemoryProperties(physical_devices_[physical_device_], &memory_properties_);
INFO_LOG(G3D, "Memory Types (%d):", memory_properties_.memoryTypeCount);
for (int i = 0; i < (int)memory_properties_.memoryTypeCount; i++) {
@ -595,24 +592,31 @@ void VulkanContext::ChooseDevice(int physical_device) {
VkPhysicalDeviceFeatures2 features2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR};
// Add to chain even if not supported, GetPhysicalDeviceFeatures is supposed to ignore unknown structs.
VkPhysicalDeviceMultiviewFeatures multiViewFeatures{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES };
VkPhysicalDevicePresentWaitFeaturesKHR presentWaitFeatures{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_WAIT_FEATURES_KHR };
VkPhysicalDevicePresentIdFeaturesKHR presentIdFeatures{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_ID_FEATURES_KHR };
features2.pNext = &multiViewFeatures;
multiViewFeatures.pNext = &presentWaitFeatures;
presentWaitFeatures.pNext = &presentIdFeatures;
presentIdFeatures.pNext = nullptr;
vkGetPhysicalDeviceFeatures2KHR(physical_devices_[physical_device_], &features2);
deviceFeatures_.available.standard = features2.features;
deviceFeatures_.available.multiview = multiViewFeatures;
deviceFeatures_.available.presentWait = presentWaitFeatures;
deviceFeatures_.available.presentId = presentIdFeatures;
} else {
vkGetPhysicalDeviceFeatures(physical_devices_[physical_device_], &deviceFeatures_.available.standard);
deviceFeatures_.available.multiview = {};
}
deviceFeatures_.enabled = {};
// Enable a few safe ones if they are available.
deviceFeatures_.enabled.standard.dualSrcBlend = deviceFeatures_.available.standard.dualSrcBlend;
deviceFeatures_.enabled.standard.logicOp = deviceFeatures_.available.standard.logicOp;
deviceFeatures_.enabled.standard.depthClamp = deviceFeatures_.available.standard.depthClamp;
deviceFeatures_.enabled.standard.depthBounds = deviceFeatures_.available.standard.depthBounds;
deviceFeatures_.enabled.standard.samplerAnisotropy = deviceFeatures_.available.standard.samplerAnisotropy;
deviceFeatures_.enabled.standard.shaderClipDistance = deviceFeatures_.available.standard.shaderClipDistance;
deviceFeatures_.enabled.standard.shaderCullDistance = deviceFeatures_.available.standard.shaderCullDistance;
deviceFeatures_.enabled.standard.geometryShader = deviceFeatures_.available.standard.geometryShader;
deviceFeatures_.enabled.standard.sampleRateShading = deviceFeatures_.available.standard.sampleRateShading;
deviceFeatures_.enabled.multiview = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES };
deviceFeatures_.enabled.multiview.multiview = deviceFeatures_.available.multiview.multiview;
// deviceFeatures_.enabled.multiview.multiviewGeometryShader = deviceFeatures_.available.multiview.multiviewGeometryShader;
GetDeviceLayerExtensionList(nullptr, device_extension_properties_);
device_extensions_enabled_.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
@ -644,8 +648,8 @@ VkResult VulkanContext::CreateDevice() {
return VK_ERROR_INITIALIZATION_FAILED;
}
VkDeviceQueueCreateInfo queue_info{ VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO };
float queue_priorities[1] = { 1.0f };
VkDeviceQueueCreateInfo queue_info{VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO};
float queue_priorities[1] = {1.0f};
queue_info.queueCount = 1;
queue_info.pQueuePriorities = queue_priorities;
bool found = false;
@ -677,41 +681,6 @@ VkResult VulkanContext::CreateDevice() {
extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME);
extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME);
#if !PPSSPP_PLATFORM(MAC) && !PPSSPP_PLATFORM(IOS)
extensionsLookup_.GOOGLE_display_timing = EnableDeviceExtension(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME);
#endif
if (!extensionsLookup_.GOOGLE_display_timing) {
extensionsLookup_.KHR_present_id = EnableDeviceExtension(VK_KHR_PRESENT_ID_EXTENSION_NAME);
extensionsLookup_.KHR_present_wait = EnableDeviceExtension(VK_KHR_PRESENT_WAIT_EXTENSION_NAME);
}
deviceFeatures_.enabled = {};
// Enable a few safe ones if they are available.
deviceFeatures_.enabled.standard.dualSrcBlend = deviceFeatures_.available.standard.dualSrcBlend;
deviceFeatures_.enabled.standard.logicOp = deviceFeatures_.available.standard.logicOp;
deviceFeatures_.enabled.standard.depthClamp = deviceFeatures_.available.standard.depthClamp;
deviceFeatures_.enabled.standard.depthBounds = deviceFeatures_.available.standard.depthBounds;
deviceFeatures_.enabled.standard.samplerAnisotropy = deviceFeatures_.available.standard.samplerAnisotropy;
deviceFeatures_.enabled.standard.shaderClipDistance = deviceFeatures_.available.standard.shaderClipDistance;
deviceFeatures_.enabled.standard.shaderCullDistance = deviceFeatures_.available.standard.shaderCullDistance;
deviceFeatures_.enabled.standard.geometryShader = deviceFeatures_.available.standard.geometryShader;
deviceFeatures_.enabled.standard.sampleRateShading = deviceFeatures_.available.standard.sampleRateShading;
deviceFeatures_.enabled.multiview = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES };
if (extensionsLookup_.KHR_multiview) {
deviceFeatures_.enabled.multiview.multiview = deviceFeatures_.available.multiview.multiview;
}
// Strangely, on Intel, it reports these as available even though the extension isn't in the list.
deviceFeatures_.enabled.presentId = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_ID_FEATURES_KHR };
if (extensionsLookup_.KHR_present_id) {
deviceFeatures_.enabled.presentId.presentId = deviceFeatures_.available.presentId.presentId;
}
deviceFeatures_.enabled.presentWait = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_WAIT_FEATURES_KHR };
if (extensionsLookup_.KHR_present_wait) {
deviceFeatures_.enabled.presentWait.presentWait = deviceFeatures_.available.presentWait.presentWait;
}
// deviceFeatures_.enabled.multiview.multiviewGeometryShader = deviceFeatures_.available.multiview.multiviewGeometryShader;
VkPhysicalDeviceFeatures2 features2{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 };
VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
@ -726,9 +695,6 @@ VkResult VulkanContext::CreateDevice() {
device_info.pNext = &features2;
features2.features = deviceFeatures_.enabled.standard;
features2.pNext = &deviceFeatures_.enabled.multiview;
deviceFeatures_.enabled.multiview.pNext = &deviceFeatures_.enabled.presentWait;
deviceFeatures_.enabled.presentWait.pNext = &deviceFeatures_.enabled.presentId;
deviceFeatures_.enabled.presentId.pNext = nullptr;
} else {
device_info.pEnabledFeatures = &deviceFeatures_.enabled.standard;
}
@ -1290,9 +1256,9 @@ bool VulkanContext::InitSwapchain() {
for (size_t i = 0; i < presentModeCount; i++) {
bool match = false;
match = match || ((flags_ & VULKAN_FLAG_PRESENT_MAILBOX) && presentModes[i] == VK_PRESENT_MODE_MAILBOX_KHR);
match = match || ((flags_ & VULKAN_FLAG_PRESENT_IMMEDIATE) && presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR);
match = match || ((flags_ & VULKAN_FLAG_PRESENT_FIFO_RELAXED) && presentModes[i] == VK_PRESENT_MODE_FIFO_RELAXED_KHR);
match = match || ((flags_ & VULKAN_FLAG_PRESENT_FIFO) && presentModes[i] == VK_PRESENT_MODE_FIFO_KHR);
match = match || ((flags_ & VULKAN_FLAG_PRESENT_IMMEDIATE) && presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR);
// Default to the first present mode from the list.
if (match || swapchainPresentMode == VK_PRESENT_MODE_MAX_ENUM_KHR) {
@ -1302,6 +1268,10 @@ bool VulkanContext::InitSwapchain() {
break;
}
}
#ifdef __ANDROID__
// HACK
swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR;
#endif
delete[] presentModes;
// Determine the number of VkImage's to use in the swap chain (we desire to
// own only 1 image at a time, besides the images being displayed and
@ -1666,101 +1636,80 @@ void VulkanDeleteList::Take(VulkanDeleteList &del) {
}
void VulkanDeleteList::PerformDeletes(VulkanContext *vulkan, VmaAllocator allocator) {
int deleteCount = 0;
for (auto &callback : callbacks_) {
callback.func(vulkan, callback.userdata);
deleteCount++;
}
callbacks_.clear();
VkDevice device = vulkan->GetDevice();
for (auto &cmdPool : cmdPools_) {
vkDestroyCommandPool(device, cmdPool, nullptr);
deleteCount++;
}
cmdPools_.clear();
for (auto &descPool : descPools_) {
vkDestroyDescriptorPool(device, descPool, nullptr);
deleteCount++;
}
descPools_.clear();
for (auto &module : modules_) {
vkDestroyShaderModule(device, module, nullptr);
deleteCount++;
}
modules_.clear();
for (auto &buf : buffers_) {
vkDestroyBuffer(device, buf, nullptr);
deleteCount++;
}
buffers_.clear();
for (auto &buf : buffersWithAllocs_) {
vmaDestroyBuffer(allocator, buf.buffer, buf.alloc);
deleteCount++;
}
buffersWithAllocs_.clear();
for (auto &bufView : bufferViews_) {
vkDestroyBufferView(device, bufView, nullptr);
deleteCount++;
}
bufferViews_.clear();
for (auto &imageWithAlloc : imagesWithAllocs_) {
vmaDestroyImage(allocator, imageWithAlloc.image, imageWithAlloc.alloc);
deleteCount++;
}
imagesWithAllocs_.clear();
for (auto &imageView : imageViews_) {
vkDestroyImageView(device, imageView, nullptr);
deleteCount++;
}
imageViews_.clear();
for (auto &mem : deviceMemory_) {
vkFreeMemory(device, mem, nullptr);
deleteCount++;
}
deviceMemory_.clear();
for (auto &sampler : samplers_) {
vkDestroySampler(device, sampler, nullptr);
deleteCount++;
}
samplers_.clear();
for (auto &pipeline : pipelines_) {
vkDestroyPipeline(device, pipeline, nullptr);
deleteCount++;
}
pipelines_.clear();
for (auto &pcache : pipelineCaches_) {
vkDestroyPipelineCache(device, pcache, nullptr);
deleteCount++;
}
pipelineCaches_.clear();
for (auto &renderPass : renderPasses_) {
vkDestroyRenderPass(device, renderPass, nullptr);
deleteCount++;
}
renderPasses_.clear();
for (auto &framebuffer : framebuffers_) {
vkDestroyFramebuffer(device, framebuffer, nullptr);
deleteCount++;
}
framebuffers_.clear();
for (auto &pipeLayout : pipelineLayouts_) {
vkDestroyPipelineLayout(device, pipeLayout, nullptr);
deleteCount++;
}
pipelineLayouts_.clear();
for (auto &descSetLayout : descSetLayouts_) {
vkDestroyDescriptorSetLayout(device, descSetLayout, nullptr);
deleteCount++;
}
descSetLayouts_.clear();
for (auto &queryPool : queryPools_) {
vkDestroyQueryPool(device, queryPool, nullptr);
deleteCount++;
}
queryPools_.clear();
deleteCount_ = deleteCount;
}
void VulkanContext::GetImageMemoryRequirements(VkImage image, VkMemoryRequirements *mem_reqs, bool *dedicatedAllocation) {

View file

@ -36,7 +36,6 @@ enum {
VULKAN_VENDOR_QUALCOMM = 0x00005143,
VULKAN_VENDOR_IMGTEC = 0x00001010, // PowerVR
VULKAN_VENDOR_APPLE = 0x0000106b, // Apple through MoltenVK
VULKAN_VENDOR_MESA = 0x00010005, // lavapipe
};
VK_DEFINE_HANDLE(VmaAllocator);
@ -138,10 +137,6 @@ public:
void Take(VulkanDeleteList &del);
void PerformDeletes(VulkanContext *vulkan, VmaAllocator allocator);
int GetLastDeleteCount() const {
return deleteCount_;
}
private:
std::vector<VkCommandPool> cmdPools_;
std::vector<VkDescriptorPool> descPools_;
@ -161,7 +156,6 @@ private:
std::vector<VkDescriptorSetLayout> descSetLayouts_;
std::vector<VkQueryPool> queryPools_;
std::vector<Callback> callbacks_;
int deleteCount_ = 0;
};
// VulkanContext manages the device and swapchain, and deferred deletion of objects.
@ -270,8 +264,6 @@ public:
struct AllPhysicalDeviceFeatures {
VkPhysicalDeviceFeatures standard;
VkPhysicalDeviceMultiviewFeatures multiview;
VkPhysicalDevicePresentWaitFeaturesKHR presentWait;
VkPhysicalDevicePresentIdFeaturesKHR presentId;
};
const PhysicalDeviceProps &GetPhysicalDeviceProperties(int i = -1) const {
@ -297,13 +289,6 @@ public:
return device_extensions_enabled_;
}
const std::vector<VkExtensionProperties> &GetInstanceExtensionsAvailable() const {
return instance_extension_properties_;
}
const std::vector<const char *> &GetInstanceExtensionsEnabled() const {
return instance_extensions_enabled_;
}
const VkPhysicalDeviceMemoryProperties &GetMemoryProperties() const {
return memory_properties_;
}
@ -327,6 +312,7 @@ public:
for (const auto &iter : instance_layer_properties_) {
for (const auto &ext : iter.extensions) {
if (!strcmp(extensionName, ext.extensionName)) {
INFO_LOG(G3D, "%s found in layer extensions: %s", extensionName, iter.properties.layerName);
return true;
}
}
@ -397,10 +383,6 @@ public:
return availablePresentModes_;
}
int GetLastDeleteCount() const {
return frame_[curFrame_].deleteList.GetLastDeleteCount();
}
private:
bool ChooseQueue();

View file

@ -21,7 +21,6 @@
#include <mutex>
#include "Common/Log.h"
#include "Common/System/System.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanDebug.h"
@ -54,14 +53,8 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
return false;
case 1303270965:
// Benign perf warning, image blit using GENERAL layout.
// TODO: Oops, turns out we filtered out a bit too much here!
// We really need that performance flag check to sort out the stuff that matters.
// Will enable it soon, but it'll take some fixing.
//
if (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT)
return false;
break;
// UNASSIGNED
return false;
case 606910136:
case -392708513:
case -384083808:
@ -83,27 +76,10 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
// Extended validation (ARM best practices)
// Non-fifo validation not recommended
return false;
case 337425955:
// False positive
// https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/3615
return false;
default:
break;
}
/*
// Can be used to temporarily turn errors into info for easier debugging.
switch (messageCode) {
case 1544472022:
if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
messageSeverity = (VkDebugUtilsMessageSeverityFlagBitsEXT)((messageSeverity & ~VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT);
}
break;
default:
break;
}
*/
int count;
{
std::lock_guard<std::mutex> lock(g_errorCountMutex);
@ -140,7 +116,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
#ifdef _WIN32
OutputDebugStringA(msg.c_str());
if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
if (options->breakOnError && System_GetPropertyBool(SYSPROP_DEBUGGER_PRESENT)) {
if (options->breakOnError && IsDebuggerPresent()) {
DebugBreak();
}
if (options->msgBoxOnError) {
@ -148,7 +124,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
}
} else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
// Don't break on perf warnings for now, even with a debugger. We log them at least.
if (options->breakOnWarning && System_GetPropertyBool(SYSPROP_DEBUGGER_PRESENT) && 0 == (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT)) {
if (options->breakOnWarning && IsDebuggerPresent() && 0 == (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT)) {
DebugBreak();
}
}

View file

@ -1,129 +0,0 @@
#include "Common/GPU/Vulkan/VulkanDescSet.h"
VulkanDescSetPool::~VulkanDescSetPool() {
_assert_msg_(descPool_ == VK_NULL_HANDLE, "VulkanDescSetPool %s never destroyed", tag_);
}
void VulkanDescSetPool::Create(VulkanContext *vulkan, const BindingType *bindingTypes, uint32_t bindingTypesCount, uint32_t descriptorCount) {
_assert_msg_(descPool_ == VK_NULL_HANDLE, "VulkanDescSetPool::Create when already exists");
vulkan_ = vulkan;
info_ = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
info_.maxSets = descriptorCount;
_dbg_assert_(sizes_.empty());
uint32_t storageImageCount = 0;
uint32_t storageBufferCount = 0;
uint32_t combinedImageSamplerCount = 0;
uint32_t uniformBufferDynamicCount = 0;
for (uint32_t i = 0; i < bindingTypesCount; i++) {
switch (bindingTypes[i]) {
case BindingType::COMBINED_IMAGE_SAMPLER: combinedImageSamplerCount++; break;
case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:
case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL: uniformBufferDynamicCount++; break;
case BindingType::STORAGE_BUFFER_VERTEX:
case BindingType::STORAGE_BUFFER_COMPUTE: storageBufferCount++; break;
case BindingType::STORAGE_IMAGE_COMPUTE: storageImageCount++; break;
}
}
if (combinedImageSamplerCount) {
sizes_.push_back(VkDescriptorPoolSize{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, combinedImageSamplerCount * descriptorCount });
}
if (uniformBufferDynamicCount) {
sizes_.push_back(VkDescriptorPoolSize{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, uniformBufferDynamicCount * descriptorCount });
}
if (storageBufferCount) {
sizes_.push_back(VkDescriptorPoolSize{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, storageBufferCount * descriptorCount });
}
if (storageImageCount) {
sizes_.push_back(VkDescriptorPoolSize{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, storageImageCount * descriptorCount });
}
VkResult res = Recreate(false);
_assert_msg_(res == VK_SUCCESS, "Could not create VulkanDescSetPool %s", tag_);
}
bool VulkanDescSetPool::Allocate(VkDescriptorSet *descriptorSets, int count, const VkDescriptorSetLayout *layouts) {
if (descPool_ == VK_NULL_HANDLE || usage_ + count >= info_.maxSets) {
// Missing or out of space, need to recreate.
VkResult res = Recreate(grow_);
_assert_msg_(res == VK_SUCCESS, "Could not grow VulkanDescSetPool %s on usage %d", tag_, (int)usage_);
}
VkDescriptorSetAllocateInfo descAlloc{ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO };
descAlloc.descriptorPool = descPool_;
descAlloc.descriptorSetCount = count;
descAlloc.pSetLayouts = layouts;
VkResult result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, descriptorSets);
if (result == VK_ERROR_FRAGMENTED_POOL || result < 0) {
WARN_LOG(G3D, "Pool %s %s - recreating", tag_, result == VK_ERROR_FRAGMENTED_POOL ? "fragmented" : "full");
// There seems to have been a spec revision. Here we should apparently recreate the descriptor pool,
// so let's do that. See https://www.khronos.org/registry/vulkan/specs/1.0/man/html/vkAllocateDescriptorSets.html
// Fragmentation shouldn't really happen though since we wipe the pool every frame.
VkResult res = Recreate(false);
_assert_msg_(res == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to recreate a descriptor pool. sz=%d res=%d", usage_, (int)res);
// Need to update this pointer since we have allocated a new one.
descAlloc.descriptorPool = descPool_;
result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, descriptorSets);
_assert_msg_(result == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to allocate after recreating a descriptor pool. res=%d", (int)result);
}
if (result != VK_SUCCESS) {
return false;
}
usage_ += count;
return true;
}
void VulkanDescSetPool::Reset() {
_assert_msg_(descPool_ != VK_NULL_HANDLE, "VulkanDescSetPool::Reset without valid pool");
vkResetDescriptorPool(vulkan_->GetDevice(), descPool_, 0);
usage_ = 0;
}
void VulkanDescSetPool::Destroy() {
if (descPool_ != VK_NULL_HANDLE) {
vulkan_->Delete().QueueDeleteDescriptorPool(descPool_);
usage_ = 0;
}
sizes_.clear();
}
void VulkanDescSetPool::DestroyImmediately() {
if (descPool_ != VK_NULL_HANDLE) {
vkDestroyDescriptorPool(vulkan_->GetDevice(), descPool_, nullptr);
descPool_ = VK_NULL_HANDLE;
usage_ = 0;
}
sizes_.clear();
}
VkResult VulkanDescSetPool::Recreate(bool grow) {
_assert_msg_(vulkan_ != nullptr, "VulkanDescSetPool::Recreate without VulkanContext");
uint32_t prevSize = info_.maxSets;
if (grow) {
info_.maxSets *= 2;
for (auto &size : sizes_)
size.descriptorCount *= 2;
}
// Delete the pool if it already exists.
if (descPool_ != VK_NULL_HANDLE) {
INFO_LOG(G3D, "Reallocating %s desc pool from %d to %d", tag_, prevSize, info_.maxSets);
vulkan_->Delete().QueueDeleteDescriptorPool(descPool_);
usage_ = 0;
}
info_.pPoolSizes = &sizes_[0];
info_.poolSizeCount = (uint32_t)sizes_.size();
VkResult result = vkCreateDescriptorPool(vulkan_->GetDevice(), &info_, nullptr, &descPool_);
if (result == VK_SUCCESS) {
vulkan_->SetDebugName(descPool_, VK_OBJECT_TYPE_DESCRIPTOR_POOL, tag_);
}
return result;
}

View file

@ -1,48 +0,0 @@
#pragma once
#include "Common/Data/Collections/FastVec.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include <vector>
enum class BindingType {
COMBINED_IMAGE_SAMPLER,
UNIFORM_BUFFER_DYNAMIC_VERTEX,
UNIFORM_BUFFER_DYNAMIC_ALL,
STORAGE_BUFFER_VERTEX,
STORAGE_BUFFER_COMPUTE,
STORAGE_IMAGE_COMPUTE,
};
// Only appropriate for use in a per-frame pool.
class VulkanDescSetPool {
public:
VulkanDescSetPool(const char *tag, bool grow = true) : tag_(tag), grow_(grow) {}
~VulkanDescSetPool();
void Create(VulkanContext *vulkan, const BindingType *bindingTypes, uint32_t bindingTypesCount, uint32_t descriptorCount);
// Allocate a new set, which may resize and empty the current sets.
// Use only for the current frame.
bool Allocate(VkDescriptorSet *descriptorSets, int count, const VkDescriptorSetLayout *layouts);
void Reset();
// This queues up destruction.
void Destroy();
// This actually destroys immediately.
void DestroyImmediately();
bool IsDestroyed() const {
return !descPool_;
}
private:
VkResult Recreate(bool grow);
const char *tag_;
VulkanContext *vulkan_ = nullptr;
VkDescriptorPool descPool_ = VK_NULL_HANDLE;
VkDescriptorPoolCreateInfo info_{};
std::vector<VkDescriptorPoolSize> sizes_;
uint32_t usage_ = 0;
bool grow_;
};

View file

@ -4,12 +4,6 @@
#include "Common/Log.h"
#include "Common/StringUtils.h"
#if 0 // def _DEBUG
#define VLOG(...) NOTICE_LOG(G3D, __VA_ARGS__)
#else
#define VLOG(...)
#endif
void CachedReadback::Destroy(VulkanContext *vulkan) {
if (buffer) {
vulkan->Delete().QueueDeleteBufferAllocation(buffer, allocation);
@ -90,10 +84,6 @@ void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared)
WARN_LOG(G3D, "%s returned from AcquireNextImage - processing the frame, but not presenting", VulkanResultToString(res));
skipSwap = true;
break;
case VK_ERROR_SURFACE_LOST_KHR:
ERROR_LOG(G3D, "%s returned from AcquireNextImage - ignoring, but this better be during shutdown", VulkanResultToString(res));
skipSwap = true;
break;
default:
// Weird, shouldn't get any other values. Maybe lost device?
_assert_msg_(false, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
@ -114,25 +104,6 @@ VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared)
present.pWaitSemaphores = &shared.renderingCompleteSemaphore;
present.waitSemaphoreCount = 1;
// Can't move these into the if.
VkPresentIdKHR presentID{ VK_STRUCTURE_TYPE_PRESENT_ID_KHR };
VkPresentTimesInfoGOOGLE presentGOOGLE{ VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE };
uint64_t frameId = this->frameId;
VkPresentTimeGOOGLE presentTimeGOOGLE{ (uint32_t)frameId, 0 }; // it's ok to truncate this. it'll wrap around and work (if we ever reach 4 billion frames..)
if (shared.measurePresentTime) {
if (vulkan->Extensions().KHR_present_id && vulkan->GetDeviceFeatures().enabled.presentId.presentId) {
presentID.pPresentIds = &frameId;
presentID.swapchainCount = 1;
present.pNext = &presentID;
} else if (vulkan->Extensions().GOOGLE_display_timing) {
presentGOOGLE.pTimes = &presentTimeGOOGLE;
presentGOOGLE.swapchainCount = 1;
present.pNext = &presentGOOGLE;
}
}
return vkQueuePresentKHR(vulkan->GetGraphicsQueue(), &present);
}
@ -150,7 +121,7 @@ VkCommandBuffer FrameData::GetInitCmd(VulkanContext *vulkan) {
}
// Good spot to reset the query pool.
if (profile.enabled) {
if (profilingEnabled_) {
vkCmdResetQueryPool(initCmd, profile.queryPool, 0, MAX_TIMESTAMP_QUERIES);
vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, profile.queryPool, 0);
}
@ -167,7 +138,7 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
VkFence fenceToTrigger = VK_NULL_HANDLE;
if (hasInitCommands) {
if (profile.enabled) {
if (profilingEnabled_) {
// Pre-allocated query ID 1 - end of init cmdbuf.
vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile.queryPool, 1);
}
@ -225,16 +196,12 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
VkResult res;
if (fenceToTrigger == fence) {
VLOG("Doing queue submit, fencing frame %d", this->index);
// The fence is waited on by the main thread, they are not allowed to access it simultaneously.
res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger);
if (sharedData.useMultiThreading) {
std::lock_guard<std::mutex> lock(fenceMutex);
readyForFence = true;
fenceCondVar.notify_one();
}
std::lock_guard<std::mutex> lock(fenceMutex);
readyForFence = true;
fenceCondVar.notify_one();
} else {
VLOG("Doing queue submit, fencing something (%p)", fenceToTrigger);
res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger);
}
@ -252,7 +219,7 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
}
}
void FrameDataShared::Init(VulkanContext *vulkan, bool useMultiThreading, bool measurePresentTime) {
void FrameDataShared::Init(VulkanContext *vulkan) {
VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
semaphoreCreateInfo.flags = 0;
VkResult res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore);
@ -263,9 +230,6 @@ void FrameDataShared::Init(VulkanContext *vulkan, bool useMultiThreading, bool m
// This fence is used for synchronizing readbacks. Does not need preinitialization.
readbackFence = vulkan->CreateFence(false);
vulkan->SetDebugName(readbackFence, VK_OBJECT_TYPE_FENCE, "readbackFence");
this->useMultiThreading = useMultiThreading;
this->measurePresentTime = measurePresentTime;
}
void FrameDataShared::Destroy(VulkanContext *vulkan) {

View file

@ -13,22 +13,17 @@ enum {
};
enum class VKRRunType {
SUBMIT,
PRESENT,
SYNC,
EXIT,
};
struct QueueProfileContext {
bool enabled = false;
bool timestampsEnabled = false;
VkQueryPool queryPool;
std::vector<std::string> timestampDescriptions;
std::string profileSummary;
double cpuStartTime;
double cpuEndTime;
double descWriteTime;
int descriptorsWritten;
};
class VKRFramebuffer;
@ -55,10 +50,8 @@ struct FrameDataShared {
// For synchronous readbacks.
VkFence readbackFence = VK_NULL_HANDLE;
bool useMultiThreading;
bool measurePresentTime;
void Init(VulkanContext *vulkan, bool useMultiThreading, bool measurePresentTime);
void Init(VulkanContext *vulkan);
void Destroy(VulkanContext *vulkan);
};
@ -98,15 +91,12 @@ struct FrameData {
// Swapchain.
uint32_t curSwapchainImage = -1;
// Frames need unique IDs to wait for present on, let's keep them here.
// Also used for indexing into the frame timing history buffer.
uint64_t frameId = 0;
// Profiling.
QueueProfileContext profile{};
QueueProfileContext profile;
bool profilingEnabled_ = false;
// Async readback cache.
DenseHashMap<ReadbackKey, CachedReadback *> readbacks_;
DenseHashMap<ReadbackKey, CachedReadback*, nullptr> readbacks_;
FrameData() : readbacks_(8) {}

View file

@ -2,27 +2,6 @@
#include "Common/GPU/Vulkan/VulkanFramebuffer.h"
#include "Common/GPU/Vulkan/VulkanQueueRunner.h"
static const char *rpTypeDebugNames[] = {
"RENDER",
"RENDER_DEPTH",
"MV_RENDER",
"MV_RENDER_DEPTH",
"MS_RENDER",
"MS_RENDER_DEPTH",
"MS_MV_RENDER",
"MS_MV_RENDER_DEPTH",
"BACKBUF",
};
const char *GetRPTypeName(RenderPassType rpType) {
uint32_t index = (uint32_t)rpType;
if (index < ARRAY_SIZE(rpTypeDebugNames)) {
return rpTypeDebugNames[index];
} else {
return "N/A";
}
}
VkSampleCountFlagBits MultiSampleLevelToFlagBits(int count) {
// TODO: Check hardware support here, or elsewhere?
// Some hardware only supports 4x.
@ -286,17 +265,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType, VkSampleCountFlagBits sampleCount) {
bool selfDependency = RenderPassTypeHasInput(rpType);
bool isBackbuffer = rpType == RenderPassType::BACKBUFFER;
bool hasDepth = RenderPassTypeHasDepth(rpType);
bool multiview = RenderPassTypeHasMultiView(rpType);
bool multisample = RenderPassTypeHasMultisample(rpType);
_dbg_assert_(!(isBackbuffer && multisample));
if (isBackbuffer) {
_dbg_assert_(key.depthLoadAction == VKRRenderPassLoadAction::CLEAR);
}
if (multiview) {
// TODO: Assert that the device has multiview support enabled.
}
@ -323,7 +297,7 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
attachments[attachmentCount].storeOp = ConvertStoreAction(key.depthStoreAction);
attachments[attachmentCount].stencilLoadOp = multisample ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : ConvertLoadAction(key.stencilLoadAction);
attachments[attachmentCount].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction);
attachments[attachmentCount].initialLayout = isBackbuffer ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[attachmentCount].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[attachmentCount].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachmentCount++;
}
@ -356,7 +330,7 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
VkAttachmentReference colorReference{};
colorReference.attachment = colorAttachmentIndex;
colorReference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
colorReference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference depthReference{};
depthReference.attachment = depthAttachmentIndex;
@ -365,15 +339,20 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.flags = 0;
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
if (selfDependency) {
subpass.inputAttachmentCount = 1;
subpass.pInputAttachments = &colorReference;
} else {
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
}
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &colorReference;
VkAttachmentReference colorResolveReference;
if (multisample) {
colorResolveReference.attachment = 0; // the non-msaa color buffer.
colorResolveReference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
colorResolveReference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
subpass.pResolveAttachments = &colorResolveReference;
} else {
subpass.pResolveAttachments = nullptr;
@ -408,25 +387,23 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
}
if (isBackbuffer) {
// We don't specify any explicit transitions for these, so let's use subpass dependencies.
// This makes sure that writes to the depth image are done before we try to write to it again.
// From Sascha's examples.
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[numDeps].dstSubpass = 0;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
deps[numDeps].srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
deps[numDeps].dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
numDeps++;
// Dependencies for the color image.
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[numDeps].dstSubpass = 0;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
deps[numDeps].srcAccessMask = 0;
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
numDeps++;
}
if (selfDependency) {
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
deps[numDeps].srcSubpass = 0;
deps[numDeps].dstSubpass = 0;
numDeps++;
}
@ -486,6 +463,10 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
VkSubpassDescription2KHR subpass2{ VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR };
subpass2.colorAttachmentCount = subpass.colorAttachmentCount;
subpass2.flags = subpass.flags;
if (selfDependency) {
subpass2.inputAttachmentCount = subpass.inputAttachmentCount;
subpass2.pInputAttachments = &colorReference2;
}
subpass2.pColorAttachments = &colorReference2;
if (hasDepth) {
subpass2.pDepthStencilAttachment = &depthReference2;
@ -495,7 +476,7 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
if (multisample) {
colorResolveReference2.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
colorResolveReference2.attachment = colorResolveReference.attachment; // the non-msaa color buffer.
colorResolveReference2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
colorResolveReference2.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
subpass2.pResolveAttachments = &colorResolveReference2;
} else {
subpass2.pResolveAttachments = nullptr;
@ -528,10 +509,6 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
res = vkCreateRenderPass(vulkan->GetDevice(), &rp, nullptr, &pass);
}
if (pass) {
vulkan->SetDebugName(pass, VK_OBJECT_TYPE_RENDER_PASS, GetRPTypeName(rpType));
}
_assert_(res == VK_SUCCESS);
_assert_(pass != VK_NULL_HANDLE);
return pass;

View file

@ -13,14 +13,15 @@ enum class RenderPassType {
// These eight are organized so that bit 0 is DEPTH and bit 1 is INPUT and bit 2 is MULTIVIEW, so
// they can be OR-ed together in MergeRPTypes.
HAS_DEPTH = 1,
MULTIVIEW = 2,
MULTISAMPLE = 4,
COLOR_INPUT = 2, // input attachment
MULTIVIEW = 4,
MULTISAMPLE = 8,
// This is the odd one out, and gets special handling in MergeRPTypes.
// If this flag is set, none of the other flags can be set.
// For the backbuffer we can always use CLEAR/DONT_CARE, so bandwidth cost for a depth channel is negligible
// so we don't bother with a non-depth version.
BACKBUFFER = 8,
BACKBUFFER = 16,
TYPE_COUNT = BACKBUFFER + 1,
};
@ -106,6 +107,10 @@ inline bool RenderPassTypeHasDepth(RenderPassType type) {
return (type & RenderPassType::HAS_DEPTH) || type == RenderPassType::BACKBUFFER;
}
inline bool RenderPassTypeHasInput(RenderPassType type) {
return (type & RenderPassType::COLOR_INPUT) != 0;
}
inline bool RenderPassTypeHasMultiView(RenderPassType type) {
return (type & RenderPassType::MULTIVIEW) != 0;
}
@ -157,5 +162,3 @@ private:
VkSampleCountFlagBits sampleCounts[(size_t)RenderPassType::TYPE_COUNT];
RPKey key_;
};
const char *GetRPTypeName(RenderPassType rpType);

View file

@ -130,12 +130,9 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, i
res = vkCreateImageView(vulkan_->GetDevice(), &view_info, NULL, &view_);
if (res != VK_SUCCESS) {
ERROR_LOG(G3D, "vkCreateImageView failed: %s. Destroying image.", VulkanResultToString(res));
ERROR_LOG(G3D, "vkCreateImageView failed: %s", VulkanResultToString(res));
// This leaks the image.
_assert_(res == VK_ERROR_OUT_OF_HOST_MEMORY || res == VK_ERROR_OUT_OF_DEVICE_MEMORY || res == VK_ERROR_TOO_MANY_OBJECTS);
vmaDestroyImage(vulkan_->Allocator(), image_, allocation_);
view_ = VK_NULL_HANDLE;
image_ = VK_NULL_HANDLE;
allocation_ = VK_NULL_HANDLE;
return false;
}
vulkan_->SetDebugName(view_, VK_OBJECT_TYPE_IMAGE_VIEW, tag_);
@ -144,7 +141,6 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, i
if (view_info.viewType == VK_IMAGE_VIEW_TYPE_2D) {
view_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
res = vkCreateImageView(vulkan_->GetDevice(), &view_info, NULL, &arrayView_);
// Assume that if the above view creation succeeded, so will this.
_assert_(res == VK_SUCCESS);
vulkan_->SetDebugName(arrayView_, VK_OBJECT_TYPE_IMAGE_VIEW, tag_);
}
@ -262,22 +258,6 @@ void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkPipelin
prevStage == VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ? VK_ACCESS_SHADER_WRITE_BIT : VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
}
void VulkanTexture::PrepareForTransferDst(VkCommandBuffer cmd, int levels) {
TransitionImageLayout2(cmd, image_, 0, levels, 1,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
}
void VulkanTexture::RestoreAfterTransferDst(VkCommandBuffer cmd, int levels) {
TransitionImageLayout2(cmd, image_, 0, levels, 1,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
}
VkImageView VulkanTexture::CreateViewForMip(int mip) {
VkImageViewCreateInfo view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
view_info.image = image_;

View file

@ -37,10 +37,6 @@ public:
void GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bool fromCompute);
void EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkPipelineStageFlags prevStage, VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
// For updating levels after creation. Careful with the timelines!
void PrepareForTransferDst(VkCommandBuffer cmd, int levels);
void RestoreAfterTransferDst(VkCommandBuffer cmd, int levels);
// When loading mips from compute shaders, you need to pass VK_IMAGE_LAYOUT_GENERAL to the above function.
// In addition, ignore UploadMip and GenerateMip, and instead use GetViewForMip. Make sure to delete the returned views when used.
VkImageView CreateViewForMip(int mip);

View file

@ -223,16 +223,11 @@ PFN_vkCmdInsertDebugUtilsLabelEXT vkCmdInsertDebugUtilsLabelEXT;
PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT;
PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT;
// Assorted other extensions.
PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR;
PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;
PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR;
PFN_vkWaitForPresentKHR vkWaitForPresentKHR;
PFN_vkGetPastPresentationTimingGOOGLE vkGetPastPresentationTimingGOOGLE;
PFN_vkGetRefreshCycleDurationGOOGLE vkGetRefreshCycleDurationGOOGLE;
} // namespace PPSSPP_VK
using namespace PPSSPP_VK;
@ -314,7 +309,7 @@ static void VulkanFreeLibrary(VulkanLibraryHandle &h) {
}
void VulkanSetAvailable(bool available) {
INFO_LOG(G3D, "Setting Vulkan availability to true");
INFO_LOG(G3D, "Forcing Vulkan availability to true");
g_vulkanAvailabilityChecked = true;
g_vulkanMayBeAvailable = available;
}
@ -731,13 +726,6 @@ void VulkanLoadDeviceFunctions(VkDevice device, const VulkanExtensions &enabledE
LOAD_DEVICE_FUNC(device, vkCmdEndRenderPass);
LOAD_DEVICE_FUNC(device, vkCmdExecuteCommands);
if (enabledExtensions.KHR_present_wait) {
LOAD_DEVICE_FUNC(device, vkWaitForPresentKHR);
}
if (enabledExtensions.GOOGLE_display_timing) {
LOAD_DEVICE_FUNC(device, vkGetPastPresentationTimingGOOGLE);
LOAD_DEVICE_FUNC(device, vkGetRefreshCycleDurationGOOGLE);
}
if (enabledExtensions.KHR_dedicated_allocation) {
LOAD_DEVICE_FUNC(device, vkGetBufferMemoryRequirements2KHR);
LOAD_DEVICE_FUNC(device, vkGetImageMemoryRequirements2KHR);

View file

@ -235,9 +235,6 @@ extern PFN_vkGetMemoryHostPointerPropertiesEXT vkGetMemoryHostPointerPropertiesE
extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
extern PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR;
extern PFN_vkWaitForPresentKHR vkWaitForPresentKHR;
extern PFN_vkGetPastPresentationTimingGOOGLE vkGetPastPresentationTimingGOOGLE;
extern PFN_vkGetRefreshCycleDurationGOOGLE vkGetRefreshCycleDurationGOOGLE;
} // namespace PPSSPP_VK
// For fast extension-enabled checks.
@ -256,9 +253,6 @@ struct VulkanExtensions {
bool EXT_swapchain_colorspace;
bool ARM_rasterization_order_attachment_access;
bool EXT_fragment_shader_interlock;
bool KHR_present_id; // Should probably check the feature flags instead.
bool KHR_present_wait; // Same
bool GOOGLE_display_timing;
// bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers.
};

View file

@ -35,9 +35,252 @@ using namespace PPSSPP_VK;
// Always keep around push buffers at least this long (seconds).
static const double PUSH_GARBAGE_COLLECTION_DELAY = 10.0;
// Global push buffer tracker for vulkan memory profiling.
// Don't want to manually dig up all the active push buffers.
static std::mutex g_pushBufferListMutex;
static std::set<VulkanMemoryManager *> g_pushBuffers;
std::vector<VulkanMemoryManager *> GetActiveVulkanMemoryManagers() {
std::vector<VulkanMemoryManager *> buffers;
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
for (auto iter : g_pushBuffers) {
buffers.push_back(iter);
}
return buffers;
}
VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), size_(size), usage_(usage) {
{
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.insert(this);
}
bool res = AddBuffer();
_assert_(res);
}
VulkanPushBuffer::~VulkanPushBuffer() {
{
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.erase(this);
}
_dbg_assert_(!writePtr_);
_assert_(buffers_.empty());
}
bool VulkanPushBuffer::AddBuffer() {
BufInfo info;
VkDevice device = vulkan_->GetDevice();
VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
b.size = size_;
b.flags = 0;
b.usage = usage_;
b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
b.queueFamilyIndexCount = 0;
b.pQueueFamilyIndices = nullptr;
VmaAllocationCreateInfo allocCreateInfo{};
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
VmaAllocationInfo allocInfo{};
VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &info.buffer, &info.allocation, &allocInfo);
if (VK_SUCCESS != res) {
_assert_msg_(false, "vkCreateBuffer failed! result=%d", (int)res);
return false;
}
vulkan_->SetDebugName(info.buffer, VK_OBJECT_TYPE_BUFFER, name_);
buffers_.push_back(info);
buf_ = buffers_.size() - 1;
return true;
}
void VulkanPushBuffer::Destroy(VulkanContext *vulkan) {
_dbg_assert_(!writePtr_);
for (BufInfo &info : buffers_) {
vulkan->Delete().QueueDeleteBufferAllocation(info.buffer, info.allocation);
}
buffers_.clear();
}
void VulkanPushBuffer::NextBuffer(size_t minSize) {
// First, unmap the current memory.
Unmap();
buf_++;
if (buf_ >= buffers_.size() || minSize > size_) {
// Before creating the buffer, adjust to the new size_ if necessary.
while (size_ < minSize) {
size_ <<= 1;
}
bool res = AddBuffer();
_assert_(res);
if (!res) {
// Let's try not to crash at least?
buf_ = 0;
}
}
// Now, move to the next buffer and map it.
offset_ = 0;
Map();
}
void VulkanPushBuffer::Defragment(VulkanContext *vulkan) {
if (buffers_.size() <= 1) {
return;
}
// Okay, we have more than one. Destroy them all and start over with a larger one.
size_t newSize = size_ * buffers_.size();
Destroy(vulkan);
size_ = newSize;
bool res = AddBuffer();
_assert_(res);
}
size_t VulkanPushBuffer::GetTotalSize() const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
return sum;
}
void VulkanPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
size_t capacity = size_ * buffers_.size();
snprintf(buffer, bufSize, "Push %s: %s / %s", name_, NiceSizeFormat(sum).c_str(), NiceSizeFormat(capacity).c_str());
}
void VulkanPushBuffer::Map() {
_dbg_assert_(!writePtr_);
VkResult res = vmaMapMemory(vulkan_->Allocator(), buffers_[buf_].allocation, (void **)(&writePtr_));
_dbg_assert_(writePtr_);
_assert_(VK_SUCCESS == res);
}
void VulkanPushBuffer::Unmap() {
_dbg_assert_msg_(writePtr_ != nullptr, "VulkanPushBuffer::Unmap: writePtr_ null here means we have a bug (map/unmap mismatch)");
if (!writePtr_)
return;
vmaUnmapMemory(vulkan_->Allocator(), buffers_[buf_].allocation);
writePtr_ = nullptr;
}
VulkanDescSetPool::~VulkanDescSetPool() {
_assert_msg_(descPool_ == VK_NULL_HANDLE, "VulkanDescSetPool %s never destroyed", tag_);
}
void VulkanDescSetPool::Create(VulkanContext *vulkan, const VkDescriptorPoolCreateInfo &info, const std::vector<VkDescriptorPoolSize> &sizes) {
_assert_msg_(descPool_ == VK_NULL_HANDLE, "VulkanDescSetPool::Create when already exists");
vulkan_ = vulkan;
info_ = info;
sizes_ = sizes;
VkResult res = Recreate(false);
_assert_msg_(res == VK_SUCCESS, "Could not create VulkanDescSetPool %s", tag_);
}
VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout *layouts, const char *tag) {
if (descPool_ == VK_NULL_HANDLE || usage_ + n >= info_.maxSets) {
// Missing or out of space, need to recreate.
VkResult res = Recreate(grow_);
_assert_msg_(res == VK_SUCCESS, "Could not grow VulkanDescSetPool %s on usage %d", tag_, (int)usage_);
}
VkDescriptorSet desc;
VkDescriptorSetAllocateInfo descAlloc{ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO };
descAlloc.descriptorPool = descPool_;
descAlloc.descriptorSetCount = n;
descAlloc.pSetLayouts = layouts;
VkResult result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc);
if (result == VK_ERROR_FRAGMENTED_POOL || result < 0) {
// There seems to have been a spec revision. Here we should apparently recreate the descriptor pool,
// so let's do that. See https://www.khronos.org/registry/vulkan/specs/1.0/man/html/vkAllocateDescriptorSets.html
// Fragmentation shouldn't really happen though since we wipe the pool every frame.
VkResult res = Recreate(false);
_assert_msg_(res == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to recreate a descriptor pool. sz=%d res=%d", usage_, (int)res);
// Need to update this pointer since we have allocated a new one.
descAlloc.descriptorPool = descPool_;
result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc);
_assert_msg_(result == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to allocate after recreating a descriptor pool. res=%d", (int)result);
}
if (result != VK_SUCCESS) {
return VK_NULL_HANDLE;
}
vulkan_->SetDebugName(desc, VK_OBJECT_TYPE_DESCRIPTOR_SET, tag);
return desc;
}
void VulkanDescSetPool::Reset() {
_assert_msg_(descPool_ != VK_NULL_HANDLE, "VulkanDescSetPool::Reset without valid pool");
vkResetDescriptorPool(vulkan_->GetDevice(), descPool_, 0);
clear_();
usage_ = 0;
}
void VulkanDescSetPool::Destroy() {
_assert_msg_(vulkan_ != nullptr, "VulkanDescSetPool::Destroy without VulkanContext");
if (descPool_ != VK_NULL_HANDLE) {
vulkan_->Delete().QueueDeleteDescriptorPool(descPool_);
clear_();
usage_ = 0;
}
}
VkResult VulkanDescSetPool::Recreate(bool grow) {
_assert_msg_(vulkan_ != nullptr, "VulkanDescSetPool::Recreate without VulkanContext");
uint32_t prevSize = info_.maxSets;
if (grow) {
info_.maxSets *= 2;
for (auto &size : sizes_)
size.descriptorCount *= 2;
}
// Delete the pool if it already exists.
if (descPool_ != VK_NULL_HANDLE) {
DEBUG_LOG(G3D, "Reallocating %s desc pool from %d to %d", tag_, prevSize, info_.maxSets);
vulkan_->Delete().QueueDeleteDescriptorPool(descPool_);
clear_();
usage_ = 0;
}
info_.pPoolSizes = &sizes_[0];
info_.poolSizeCount = (uint32_t)sizes_.size();
VkResult result = vkCreateDescriptorPool(vulkan_->GetDevice(), &info_, nullptr, &descPool_);
if (result == VK_SUCCESS) {
vulkan_->SetDebugName(descPool_, VK_OBJECT_TYPE_DESCRIPTOR_POOL, tag_);
}
return result;
}
VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) {
RegisterGPUMemoryManager(this);
{
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.insert(this);
}
for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
blocks_.push_back(CreateBlock(originalBlockSize));
blocks_.back().original = true;
@ -46,7 +289,11 @@ VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t o
}
VulkanPushPool::~VulkanPushPool() {
UnregisterGPUMemoryManager(this);
{
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.erase(this);
}
_dbg_assert_(blocks_.empty());
}
@ -74,7 +321,7 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) {
_assert_(result == VK_SUCCESS);
result = vmaMapMemory(vulkan_->Allocator(), block.allocation, (void **)(&block.writePtr));
_assert_msg_(result == VK_SUCCESS, "VulkanPushPool: Failed to map memory (result = %s)", VulkanResultToString(result));
_assert_(result == VK_SUCCESS);
_assert_msg_(block.writePtr != nullptr, "VulkanPushPool: Failed to map memory on block of size %d", (int)block.size);
return block;

View file

@ -5,9 +5,7 @@
#include <functional>
#include <vector>
#include "Common/Data/Collections/FastVec.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/GPUBackendCommon.h"
// Forward declaration
VK_DEFINE_HANDLE(VmaAllocation);
@ -16,10 +14,101 @@ VK_DEFINE_HANDLE(VmaAllocation);
//
// Vulkan memory management utils.
// Just an abstract thing to get debug information.
class VulkanMemoryManager {
public:
virtual ~VulkanMemoryManager() {}
virtual void GetDebugString(char *buffer, size_t bufSize) const = 0;
virtual const char *Name() const = 0; // for sorting
};
// VulkanPushBuffer
// Simple incrementing allocator.
// Use these to push vertex, index and uniform data. Generally you'll have two or three of these
// and alternate on each frame. Make sure not to reset until the fence from the last time you used it
// has completed.
// NOTE: This has now been replaced with VulkanPushPool for all uses except the vertex cache.
class VulkanPushBuffer : public VulkanMemoryManager {
struct BufInfo {
VkBuffer buffer;
VmaAllocation allocation;
};
public:
// NOTE: If you create a push buffer with PushBufferType::GPU_ONLY,
// then you can't use any of the push functions as pointers will not be reachable from the CPU.
// You must in this case use Allocate() only, and pass the returned offset and the VkBuffer to Vulkan APIs.
VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage);
~VulkanPushBuffer();
void Destroy(VulkanContext *vulkan);
void Reset() { offset_ = 0; }
void GetDebugString(char *buffer, size_t bufSize) const override;
const char *Name() const override {
return name_;
}
// Needs context in case of defragment.
void Begin(VulkanContext *vulkan) {
buf_ = 0;
offset_ = 0;
// Note: we must defrag because some buffers may be smaller than size_.
Defragment(vulkan);
Map();
}
void BeginNoReset() { Map(); }
void End() { Unmap(); }
void Map();
void Unmap();
// When using the returned memory, make sure to bind the returned vkbuf.
uint8_t *Allocate(VkDeviceSize numBytes, VkDeviceSize alignment, VkBuffer *vkbuf, uint32_t *bindOffset) {
size_t offset = (offset_ + alignment - 1) & ~(alignment - 1);
if (offset + numBytes > size_) {
NextBuffer(numBytes);
offset = offset_;
}
offset_ = offset + numBytes;
*bindOffset = (uint32_t)offset;
*vkbuf = buffers_[buf_].buffer;
return writePtr_ + offset;
}
VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
uint32_t bindOffset;
uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
memcpy(ptr, data, numBytes);
return bindOffset;
}
size_t GetOffset() const { return offset_; }
size_t GetTotalSize() const;
private:
bool AddBuffer();
void NextBuffer(size_t minSize);
void Defragment(VulkanContext *vulkan);
VulkanContext *vulkan_;
std::vector<BufInfo> buffers_;
size_t buf_ = 0;
size_t offset_ = 0;
size_t size_ = 0;
uint8_t *writePtr_ = nullptr;
VkBufferUsageFlags usage_;
const char *name_;
};
// Simple memory pushbuffer pool that can share blocks between the "frames", to reduce the impact of push memory spikes -
// a later frame can gobble up redundant buffers from an earlier frame even if they don't share frame index.
// NOT thread safe! Can only be used from one thread (our main thread).
class VulkanPushPool : public GPUMemoryManager {
class VulkanPushPool : public VulkanMemoryManager {
public:
VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage);
~VulkanPushPool();
@ -54,8 +143,6 @@ public:
return blocks_[curBlockIndex_].writePtr;
}
// NOTE: If you can avoid this by writing the data directly into memory returned from Allocate,
// do so. Savings from avoiding memcpy can be significant.
VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
uint32_t bindOffset;
uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
@ -94,3 +181,36 @@ private:
int curBlockIndex_ = -1;
const char *name_;
};
// Only appropriate for use in a per-frame pool.
class VulkanDescSetPool {
public:
VulkanDescSetPool(const char *tag, bool grow) : tag_(tag), grow_(grow) {}
~VulkanDescSetPool();
// Must call this before use: defines how to clear cache of ANY returned values from Allocate().
void Setup(const std::function<void()> &clear) {
clear_ = clear;
}
void Create(VulkanContext *vulkan, const VkDescriptorPoolCreateInfo &info, const std::vector<VkDescriptorPoolSize> &sizes);
// Allocate a new set, which may resize and empty the current sets.
// Use only for the current frame, unless in a cache cleared by clear_.
VkDescriptorSet Allocate(int n, const VkDescriptorSetLayout *layouts, const char *tag);
void Reset();
void Destroy();
private:
VkResult Recreate(bool grow);
const char *tag_;
VulkanContext *vulkan_ = nullptr;
VkDescriptorPool descPool_ = VK_NULL_HANDLE;
VkDescriptorPoolCreateInfo info_{};
std::vector<VkDescriptorPoolSize> sizes_;
std::function<void()> clear_;
uint32_t usage_ = 0;
bool grow_;
};
std::vector<VulkanMemoryManager *> GetActiveVulkanMemoryManagers();

View file

@ -129,6 +129,7 @@ bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) {
return true;
}
bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) {
VkResult res;
// We share the same depth buffer but have multiple color buffers, see the loop below.
@ -172,7 +173,7 @@ bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) {
image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
image_info.flags = 0;
depth_.format = depth_format;
@ -250,8 +251,8 @@ void VulkanQueueRunner::DestroyBackBuffers() {
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
VKRRenderPass *foundPass;
if (renderPasses_.Get(key, &foundPass)) {
auto foundPass = renderPasses_.Get(key);
if (foundPass) {
return foundPass;
}
@ -260,6 +261,31 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
return pass;
}
// Must match the subpass self-dependency declared above.
void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) {
if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
recordBarrier->TransitionImage(
img.image,
0,
1,
img.numLayers,
aspect,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_GENERAL,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask
);
} else {
_assert_msg_(false, "Depth self-dependencies not yet supported");
}
}
void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
@ -337,8 +363,8 @@ void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
}
}
void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps) {
QueueProfileContext *profile = frameData.profile.enabled ? &frameData.profile : nullptr;
void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps) {
QueueProfileContext *profile = frameData.profilingEnabled_ ? &frameData.profile : nullptr;
if (profile)
profile->cpuStartTime = time_now_d();
@ -393,7 +419,7 @@ void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, int curFrame, Fr
vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
}
}
PerformRenderPass(step, cmd, curFrame);
PerformRenderPass(step, cmd);
break;
case VKRStepType::COPY:
PerformCopy(step, cmd);
@ -411,9 +437,9 @@ void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, int curFrame, Fr
break;
}
if (profile && profile->timestampsEnabled && profile->timestampDescriptions.size() + 1 < MAX_TIMESTAMP_QUERIES) {
if (profile && profile->timestampDescriptions.size() + 1 < MAX_TIMESTAMP_QUERIES) {
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile->queryPool, (uint32_t)profile->timestampDescriptions.size());
profile->timestampDescriptions.push_back(StepToString(vulkan_, step));
profile->timestampDescriptions.push_back(StepToString(step));
}
if (emitLabels) {
@ -455,7 +481,7 @@ void VulkanQueueRunner::ApplyMGSHack(std::vector<VKRStep *> &steps) {
last = j - 1;
// should really also check descriptor sets...
if (steps[j]->commands.size()) {
const VkRenderData &cmd = steps[j]->commands.back();
VkRenderData &cmd = steps[j]->commands.back();
if (cmd.cmd == VKRRenderCommand::DRAW_INDEXED && cmd.draw.count != 6)
last = j - 1;
}
@ -673,16 +699,36 @@ const char *AspectToString(VkImageAspectFlags aspect) {
}
}
std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep &step) {
static const char *rpTypeDebugNames[] = {
"RENDER",
"RENDER_DEPTH",
"RENDER_INPUT",
"RENDER_DEPTH_INPUT",
"MV_RENDER",
"MV_RENDER_DEPTH",
"MV_RENDER_INPUT",
"MV_RENDER_DEPTH_INPUT",
"MS_RENDER",
"MS_RENDER_DEPTH",
"MS_RENDER_INPUT",
"MS_RENDER_DEPTH_INPUT",
"MS_MV_RENDER",
"MS_MV_RENDER_DEPTH",
"MS_MV_RENDER_INPUT",
"MS_MV_RENDER_DEPTH_INPUT",
"BACKBUF",
};
std::string VulkanQueueRunner::StepToString(const VKRStep &step) const {
char buffer[256];
switch (step.stepType) {
case VKRStepType::RENDER:
{
int w = step.render.framebuffer ? step.render.framebuffer->width : vulkan->GetBackbufferWidth();
int h = step.render.framebuffer ? step.render.framebuffer->height : vulkan->GetBackbufferHeight();
int w = step.render.framebuffer ? step.render.framebuffer->width : vulkan_->GetBackbufferWidth();
int h = step.render.framebuffer ? step.render.framebuffer->height : vulkan_->GetBackbufferHeight();
int actual_w = step.render.renderArea.extent.width;
int actual_h = step.render.renderArea.extent.height;
const char *renderCmd = GetRPTypeName(step.render.renderPassType);
const char *renderCmd = rpTypeDebugNames[(size_t)step.render.renderPassType];
snprintf(buffer, sizeof(buffer), "%s %s %s (draws: %d, %dx%d/%dx%d)", renderCmd, step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", step.render.numDraws, actual_w, actual_h, w, h);
break;
}
@ -872,9 +918,15 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
case VKRRenderCommand::REMOVED:
INFO_LOG(G3D, " (Removed)");
break;
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
INFO_LOG(G3D, " SelfBarrier()");
break;
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
break;
case VKRRenderCommand::BIND_COMPUTE_PIPELINE:
INFO_LOG(G3D, " BindComputePipeline(%x)", (int)(intptr_t)cmd.compute_pipeline.pipeline);
break;
case VKRRenderCommand::BLEND:
INFO_LOG(G3D, " BlendColor(%08x)", cmd.blendColor.color);
break;
@ -914,19 +966,19 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
}
void VulkanQueueRunner::LogCopy(const VKRStep &step) {
INFO_LOG(G3D, "%s", StepToString(vulkan_, step).c_str());
INFO_LOG(G3D, "%s", StepToString(step).c_str());
}
void VulkanQueueRunner::LogBlit(const VKRStep &step) {
INFO_LOG(G3D, "%s", StepToString(vulkan_, step).c_str());
INFO_LOG(G3D, "%s", StepToString(step).c_str());
}
void VulkanQueueRunner::LogReadback(const VKRStep &step) {
INFO_LOG(G3D, "%s", StepToString(vulkan_, step).c_str());
INFO_LOG(G3D, "%s", StepToString(step).c_str());
}
void VulkanQueueRunner::LogReadbackImage(const VKRStep &step) {
INFO_LOG(G3D, "%s", StepToString(vulkan_, step).c_str());
INFO_LOG(G3D, "%s", StepToString(step).c_str());
}
void TransitionToOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayout colorLayout, VkImage depthStencilImage, VkImageLayout depthStencilLayout, int numLayers, VulkanBarrier *recordBarrier) {
@ -1099,7 +1151,7 @@ void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayou
}
}
void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer cmd, int curFrame) {
void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer cmd) {
for (size_t i = 0; i < step.preTransitions.size(); i++) {
const TransitionRequest &iter = step.preTransitions[i];
if (iter.aspect == VK_IMAGE_ASPECT_COLOR_BIT && iter.fb->color.layout != iter.targetLayout) {
@ -1189,13 +1241,12 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
VKRGraphicsPipeline *lastGraphicsPipeline = nullptr;
VKRComputePipeline *lastComputePipeline = nullptr;
const auto &commands = step.commands;
auto &commands = step.commands;
// We can do a little bit of state tracking here to eliminate some calls into the driver.
// The stencil ones are very commonly mostly redundant so let's eliminate them where possible.
// Might also want to consider scissor and viewport.
VkPipeline lastPipeline = VK_NULL_HANDLE;
FastVec<PendingDescSet> *descSets = nullptr;
VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
bool pipelineOK = false;
@ -1238,9 +1289,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
if (pipeline != VK_NULL_HANDLE) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
descSets = &c.graphics_pipeline.pipelineLayout->frameData[curFrame].descSets_;
pipelineLayout = c.graphics_pipeline.pipelineLayout->pipelineLayout;
_dbg_assert_(pipelineLayout != VK_NULL_HANDLE);
pipelineLayout = c.pipeline.pipelineLayout;
lastGraphicsPipeline = graphicsPipeline;
pipelineOK = true;
} else {
@ -1255,6 +1304,20 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
break;
}
case VKRRenderCommand::BIND_COMPUTE_PIPELINE:
{
VKRComputePipeline *computePipeline = c.compute_pipeline.pipeline;
if (computePipeline != lastComputePipeline) {
VkPipeline pipeline = computePipeline->pipeline->BlockUntilReady();
if (pipeline != VK_NULL_HANDLE) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
pipelineLayout = c.pipeline.pipelineLayout;
lastComputePipeline = computePipeline;
}
}
break;
}
case VKRRenderCommand::VIEWPORT:
if (fb != nullptr) {
vkCmdSetViewport(cmd, 0, 1, &c.viewport.vp);
@ -1298,6 +1361,21 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
break;
}
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
{
_assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT);
_assert_(fb);
VulkanBarrier barrier;
if (fb->sampleCount != VK_SAMPLE_COUNT_1_BIT) {
// Rendering is happening to the multisample buffer, not the color buffer.
SelfDependencyBarrier(fb->msaaColor, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
} else {
SelfDependencyBarrier(fb->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
}
barrier.Flush(cmd);
break;
}
case VKRRenderCommand::PUSH_CONSTANTS:
if (pipelineOK) {
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
@ -1321,10 +1399,8 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
case VKRRenderCommand::DRAW_INDEXED:
if (pipelineOK) {
VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set;
_dbg_assert_(set != VK_NULL_HANDLE);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets);
vkCmdBindIndexBuffer(cmd, c.drawIndexed.ibuffer, c.drawIndexed.ioffset, VK_INDEX_TYPE_UINT16);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &c.drawIndexed.ds, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets);
vkCmdBindIndexBuffer(cmd, c.drawIndexed.ibuffer, c.drawIndexed.ioffset, (VkIndexType)c.drawIndexed.indexType);
VkDeviceSize voffset = c.drawIndexed.voffset;
vkCmdBindVertexBuffers(cmd, 0, 1, &c.drawIndexed.vbuffer, &voffset);
vkCmdDrawIndexed(cmd, c.drawIndexed.count, c.drawIndexed.instances, 0, 0, 0);
@ -1333,9 +1409,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
case VKRRenderCommand::DRAW:
if (pipelineOK) {
VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set;
_dbg_assert_(set != VK_NULL_HANDLE);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.draw.numUboOffsets, c.draw.uboOffsets);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &c.draw.ds, c.draw.numUboOffsets, c.draw.uboOffsets);
if (c.draw.vbuffer) {
vkCmdBindVertexBuffers(cmd, 0, 1, &c.draw.vbuffer, &c.draw.voffset);
}
@ -1953,7 +2027,8 @@ void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd
key.height = step.readback.srcRect.extent.height;
// See if there's already a buffer we can reuse
if (!frameData.readbacks_.Get(key, &cached)) {
cached = frameData.readbacks_.Get(key);
if (!cached) {
cached = new CachedReadback();
cached->bufferSize = 0;
frameData.readbacks_.Insert(key, cached);
@ -2033,8 +2108,8 @@ bool VulkanQueueRunner::CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer
key.framebuf = src;
key.width = width;
key.height = height;
CachedReadback *cached;
if (frameData.readbacks_.Get(key, &cached)) {
CachedReadback *cached = frameData.readbacks_.Get(key);
if (cached) {
readback = cached;
} else {
// Didn't have a cached image ready yet

View file

@ -6,7 +6,6 @@
#include "Common/Thread/Promise.h"
#include "Common/Data/Collections/Hashmaps.h"
#include "Common/Data/Collections/FastVec.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanBarrier.h"
#include "Common/GPU/Vulkan/VulkanFrameData.h"
@ -19,7 +18,6 @@ class VKRFramebuffer;
struct VKRGraphicsPipeline;
struct VKRComputePipeline;
struct VKRImage;
struct VKRPipelineLayout;
struct FrameData;
enum {
@ -31,6 +29,7 @@ enum {
enum class VKRRenderCommand : uint8_t {
REMOVED,
BIND_GRAPHICS_PIPELINE, // async
BIND_COMPUTE_PIPELINE, // async
STENCIL,
BLEND,
VIEWPORT,
@ -39,6 +38,7 @@ enum class VKRRenderCommand : uint8_t {
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
SELF_DEPENDENCY_BARRIER,
DEBUG_ANNOTATION,
NUM_RENDER_COMMANDS,
};
@ -47,21 +47,30 @@ enum class PipelineFlags : u8 {
NONE = 0,
USES_BLEND_CONSTANT = (1 << 1),
USES_DEPTH_STENCIL = (1 << 2), // Reads or writes the depth or stencil buffers.
USES_GEOMETRY_SHADER = (1 << 3),
USES_MULTIVIEW = (1 << 4), // Inherited from the render pass it was created with.
USES_DISCARD = (1 << 5),
USES_INPUT_ATTACHMENT = (1 << 3),
USES_GEOMETRY_SHADER = (1 << 4),
USES_MULTIVIEW = (1 << 5), // Inherited from the render pass it was created with.
USES_DISCARD = (1 << 6),
};
ENUM_CLASS_BITOPS(PipelineFlags);
struct VkRenderData {
VKRRenderCommand cmd;
union {
struct {
VkPipeline pipeline;
VkPipelineLayout pipelineLayout;
} pipeline;
struct {
VKRGraphicsPipeline *pipeline;
VKRPipelineLayout *pipelineLayout;
VkPipelineLayout pipelineLayout;
} graphics_pipeline;
struct {
uint32_t descSetIndex;
VKRComputePipeline *pipeline;
VkPipelineLayout pipelineLayout;
} compute_pipeline;
struct {
VkDescriptorSet ds;
int numUboOffsets;
uint32_t uboOffsets[3];
VkBuffer vbuffer;
@ -70,15 +79,16 @@ struct VkRenderData {
uint32_t offset;
} draw;
struct {
uint32_t descSetIndex;
VkDescriptorSet ds;
int numUboOffsets;
uint32_t uboOffsets[3];
uint16_t numUboOffsets;
uint16_t instances;
VkBuffer vbuffer;
VkBuffer ibuffer;
uint32_t voffset;
uint32_t ioffset;
uint32_t count;
int16_t instances;
int16_t indexType;
} drawIndexed;
struct {
uint32_t clearColor;
@ -110,7 +120,9 @@ struct VkRenderData {
const char *annotation;
} debugAnnotation;
struct {
int setIndex;
int setNumber;
VkDescriptorSet set;
VkPipelineLayout pipelineLayout;
} bindDescSet;
};
};
@ -141,7 +153,7 @@ struct VKRStep {
~VKRStep() {}
VKRStepType stepType;
FastVec<VkRenderData> commands;
std::vector<VkRenderData> commands;
TinySet<TransitionRequest, 4> preTransitions;
TinySet<VKRFramebuffer *, 8> dependencies;
const char *tag;
@ -200,14 +212,9 @@ struct VKRStep {
// These are enqueued from the main thread,
// and the render thread pops them off
struct VKRRenderThreadTask {
VKRRenderThreadTask(VKRRunType _runType) : runType(_runType) {}
std::vector<VKRStep *> steps;
int frame = -1;
int frame;
VKRRunType runType;
// Avoid copying these by accident.
VKRRenderThreadTask(VKRRenderThreadTask &) = delete;
VKRRenderThreadTask &operator =(VKRRenderThreadTask &) = delete;
};
class VulkanQueueRunner {
@ -220,10 +227,10 @@ public:
}
void PreprocessSteps(std::vector<VKRStep *> &steps);
void RunSteps(std::vector<VKRStep *> &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps = false);
void RunSteps(std::vector<VKRStep *> &steps, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps = false);
void LogSteps(const std::vector<VKRStep *> &steps, bool verbose);
static std::string StepToString(VulkanContext *vulkan, const VKRStep &step);
std::string StepToString(const VKRStep &step) const;
void CreateDeviceObjects();
void DestroyDeviceObjects();
@ -280,7 +287,7 @@ private:
bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd);
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd, int curFrame);
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd);
void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd);
void PerformBlit(const VKRStep &pass, VkCommandBuffer cmd);
void PerformReadback(const VKRStep &pass, VkCommandBuffer cmd, FrameData &frameData);
@ -302,6 +309,8 @@ private:
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
VulkanContext *vulkan_;
VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
@ -312,7 +321,7 @@ private:
// Renderpasses, all combinations of preserving or clearing or dont-care-ing fb contents.
// Each VKRRenderPass contains all compatibility classes (which attachments they have, etc).
DenseHashMap<RPKey, VKRRenderPass *> renderPasses_;
DenseHashMap<RPKey, VKRRenderPass *, nullptr> renderPasses_;
// Readback buffer. Currently we only support synchronous readback, so we only really need one.
// We size it generously.

View file

@ -12,7 +12,6 @@
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanRenderManager.h"
#include "Common/LogReporting.h"
#include "Common/Thread/ThreadUtil.h"
#include "Common/VR/PPSSPPVR.h"
@ -30,10 +29,6 @@ using namespace PPSSPP_VK;
// renderPass is an example of the "compatibility class" or RenderPassType type.
bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile) {
// Good torture test to test the shutdown-while-precompiling-shaders issue on PC where it's normally
// hard to catch because shaders compile so fast.
// sleep_ms(200);
bool multisample = RenderPassTypeHasMultisample(rpType);
if (multisample) {
if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
@ -116,7 +111,7 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
pipe.pDynamicState = &desc->ds;
pipe.pInputAssemblyState = &inputAssembly;
pipe.pMultisampleState = &ms;
pipe.layout = desc->pipelineLayout->pipelineLayout;
pipe.layout = desc->pipelineLayout;
pipe.basePipelineHandle = VK_NULL_HANDLE;
pipe.basePipelineIndex = 0;
pipe.subpass = 0;
@ -192,7 +187,7 @@ void VKRGraphicsPipeline::DestroyVariantsInstant(VkDevice device) {
VKRGraphicsPipeline::~VKRGraphicsPipeline() {
// This is called from the callbacked queued in QueueForDeletion.
// When we reach here, we should already be empty, so let's assert on that.
// Here we are free to directly delete stuff, don't need to queue.
for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {
_assert_(!pipeline[i]);
}
@ -200,14 +195,6 @@ VKRGraphicsPipeline::~VKRGraphicsPipeline() {
desc->Release();
}
void VKRGraphicsPipeline::BlockUntilCompiled() {
for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {
if (pipeline[i]) {
pipeline[i]->BlockUntilReady();
}
}
}
void VKRGraphicsPipeline::QueueForDeletion(VulkanContext *vulkan) {
// Can't destroy variants here, the pipeline still lives for a while.
vulkan->Delete().QueueCallback([](VulkanContext *vulkan, void *p) {
@ -260,21 +247,15 @@ bool VKRComputePipeline::CreateAsync(VulkanContext *vulkan) {
return true;
}
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan, bool useThread, HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory)
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
: vulkan_(vulkan), queueRunner_(vulkan),
initTimeMs_("initTimeMs"),
totalGPUTimeMs_("totalGPUTimeMs"),
renderCPUTimeMs_("renderCPUTimeMs"),
descUpdateTimeMs_("descUpdateCPUTimeMs"),
useRenderThread_(useThread),
frameTimeHistory_(frameTimeHistory)
renderCPUTimeMs_("renderCPUTimeMs")
{
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
// For present timing experiments. Disabled for now.
measurePresentTime_ = false;
frameDataShared_.Init(vulkan, useThread, measurePresentTime_);
frameDataShared_.Init(vulkan);
for (int i = 0; i < inflightFramesAtStart_; i++) {
frameData_[i].Init(vulkan, i);
@ -289,6 +270,7 @@ bool VulkanRenderManager::CreateBackbuffers() {
return false;
}
VkCommandBuffer cmdInit = GetInitCmd();
if (!queueRunner_.CreateSwapchain(cmdInit)) {
@ -310,53 +292,35 @@ bool VulkanRenderManager::CreateBackbuffers() {
outOfDateFrames_ = 0;
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
frameData.readyForFence = true; // Just in case.
}
// Start the thread(s).
// Start the thread.
if (HasBackbuffers()) {
run_ = true; // For controlling the compiler thread's exit
if (useRenderThread_) {
INFO_LOG(G3D, "Starting Vulkan submission thread");
thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this);
}
INFO_LOG(G3D, "Starting Vulkan submission thread");
thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this);
INFO_LOG(G3D, "Starting Vulkan compiler thread");
compileThread_ = std::thread(&VulkanRenderManager::CompileThreadFunc, this);
if (measurePresentTime_ && vulkan_->Extensions().KHR_present_wait && vulkan_->GetPresentMode() == VK_PRESENT_MODE_FIFO_KHR) {
INFO_LOG(G3D, "Starting Vulkan present wait thread");
presentWaitThread_ = std::thread(&VulkanRenderManager::PresentWaitThreadFunc, this);
}
}
return true;
}
// Called from main thread.
void VulkanRenderManager::StopThread() {
if (useRenderThread_) {
_dbg_assert_(thread_.joinable());
{
// Tell the render thread to quit when it's done.
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::EXIT);
task->frame = vulkan_->GetCurFrame();
VKRRenderThreadTask task;
task.frame = vulkan_->GetCurFrame();
task.runType = VKRRunType::EXIT;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
pushCondVar_.notify_one();
}
// Compiler and present thread still relies on this.
// Compiler thread still relies on this.
run_ = false;
if (presentWaitThread_.joinable()) {
presentWaitThread_.join();
}
// Stop the thread.
if (useRenderThread_) {
thread_.join();
}
thread_.join();
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
@ -413,8 +377,6 @@ VulkanRenderManager::~VulkanRenderManager() {
vulkan_->WaitUntilQueueIdle();
_dbg_assert_(pipelineLayouts_.empty());
VkDevice device = vulkan_->GetDevice();
frameDataShared_.Destroy(vulkan_);
for (int i = 0; i < inflightFramesAtStart_; i++) {
@ -515,32 +477,24 @@ void VulkanRenderManager::CompileThreadFunc() {
Task *task = new CreateMultiPipelinesTask(vulkan_, entries);
g_threadManager.EnqueueTask(task);
}
queueRunner_.NotifyCompileDone();
}
}
void VulkanRenderManager::DrainAndBlockCompileQueue() {
void VulkanRenderManager::DrainCompileQueue() {
std::unique_lock<std::mutex> lock(compileMutex_);
compileBlocked_ = true;
compileCond_.notify_all();
while (!compileQueue_.empty()) {
queueRunner_.WaitForCompileNotification();
}
}
void VulkanRenderManager::ReleaseCompileQueue() {
std::unique_lock<std::mutex> lock(compileMutex_);
compileBlocked_ = false;
}
void VulkanRenderManager::ThreadFunc() {
SetCurrentThreadName("RenderMan");
while (true) {
_dbg_assert_(useRenderThread_);
// Pop a task of the queue and execute it.
VKRRenderThreadTask *task = nullptr;
VKRRenderThreadTask task;
{
std::unique_lock<std::mutex> lock(pushMutex_);
while (renderThreadQueue_.empty()) {
@ -552,15 +506,12 @@ void VulkanRenderManager::ThreadFunc() {
// Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to
// push more work when it feels like it, and just start working.
if (task->runType == VKRRunType::EXIT) {
if (task.runType == VKRRunType::EXIT) {
// Oh, host wanted out. Let's leave.
delete task;
// In this case, there should be no more tasks.
break;
}
Run(*task);
delete task;
Run(task);
}
// Wait for the device to be done with everything, before tearing stuff down.
@ -570,66 +521,18 @@ void VulkanRenderManager::ThreadFunc() {
VLOG("PULL: Quitting");
}
void VulkanRenderManager::PresentWaitThreadFunc() {
SetCurrentThreadName("PresentWait");
_dbg_assert_(vkWaitForPresentKHR != nullptr);
uint64_t waitedId = frameIdGen_;
while (run_) {
const uint64_t timeout = 1000000000ULL; // 1 sec
if (VK_SUCCESS == vkWaitForPresentKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), waitedId, timeout)) {
frameTimeHistory_[waitedId].actualPresent = time_now_d();
frameTimeHistory_[waitedId].waitCount++;
waitedId++;
} else {
// We caught up somehow, which is a bad sign (we should have blocked, right?). Maybe we should break out of the loop?
sleep_ms(1);
frameTimeHistory_[waitedId].waitCount++;
}
_dbg_assert_(waitedId <= frameIdGen_);
}
INFO_LOG(G3D, "Leaving PresentWaitThreadFunc()");
}
void VulkanRenderManager::PollPresentTiming() {
// For VK_GOOGLE_display_timing, we need to poll.
// Poll for information about completed frames.
// NOTE: We seem to get the information pretty late! Like after 6 frames, which is quite weird.
// Tested on POCO F4.
if (vulkan_->Extensions().GOOGLE_display_timing) {
uint32_t count = 0;
vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, nullptr);
if (count > 0) {
VkPastPresentationTimingGOOGLE *timings = new VkPastPresentationTimingGOOGLE[count];
vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, timings);
for (uint32_t i = 0; i < count; i++) {
uint64_t presentId = timings[i].presentID;
frameTimeHistory_[presentId].actualPresent = from_time_raw(timings[i].actualPresentTime);
frameTimeHistory_[presentId].desiredPresentTime = from_time_raw(timings[i].desiredPresentTime);
frameTimeHistory_[presentId].earliestPresentTime = from_time_raw(timings[i].earliestPresentTime);
double presentMargin = from_time_raw_relative(timings[i].presentMargin);
frameTimeHistory_[presentId].presentMargin = presentMargin;
}
delete[] timings;
}
}
}
void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfiler) {
double frameBeginTime = time_now_d()
VLOG("BeginFrame");
VkDevice device = vulkan_->GetDevice();
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
VLOG("PUSH: Fencing %d", curFrame);
// Makes sure the submission from the previous time around has happened. Otherwise
// we are not allowed to wait from another thread here..
if (useRenderThread_) {
{
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
while (!frameData.readyForFence) {
frameData.fenceCondVar.wait(lock);
@ -644,29 +547,16 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
}
vkResetFences(device, 1, &frameData.fence);
uint64_t frameId = frameIdGen_++;
PollPresentTiming();
ResetDescriptorLists(curFrame);
int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;
FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameId);
frameTimeData.frameId = frameId;
frameTimeData.frameBegin = frameBeginTime;
frameTimeData.afterFenceWait = time_now_d();
// Can't set this until after the fence.
frameData.profile.enabled = enableProfiling;
frameData.profile.timestampsEnabled = enableProfiling && validBits > 0;
frameData.frameId = frameId;
frameData.profilingEnabled_ = enableProfiling && validBits > 0;
uint64_t queryResults[MAX_TIMESTAMP_QUERIES];
if (enableProfiling) {
if (frameData.profilingEnabled_) {
// Pull the profiling results from last time and produce a summary!
if (!frameData.profile.timestampDescriptions.empty() && frameData.profile.timestampsEnabled) {
if (!frameData.profile.timestampDescriptions.empty()) {
int numQueries = (int)frameData.profile.timestampDescriptions.size();
VkResult res = vkGetQueryPoolResults(
vulkan_->GetDevice(),
@ -684,13 +574,6 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
renderCPUTimeMs_.Format(line, sizeof(line));
str << line;
descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);
descUpdateTimeMs_.Format(line, sizeof(line));
str << line;
snprintf(line, sizeof(line), "Descriptors written: %d\n", frameData.profile.descriptorsWritten);
str << line;
snprintf(line, sizeof(line), "Resource deletions: %d\n", vulkan_->GetLastDeleteCount());
str << line;
for (int i = 0; i < numQueries - 1; i++) {
uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
double milliseconds = (double)diff * timestampConversionFactor;
@ -711,22 +594,10 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
frameData.profile.profileSummary = "(error getting GPU profile - not ready?)";
}
} else {
std::stringstream str;
char line[256];
renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
renderCPUTimeMs_.Format(line, sizeof(line));
str << line;
descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);
descUpdateTimeMs_.Format(line, sizeof(line));
str << line;
snprintf(line, sizeof(line), "Descriptors written: %d\n", frameData.profile.descriptorsWritten);
str << line;
frameData.profile.profileSummary = str.str();
frameData.profile.profileSummary = "(no GPU profile data collected)";
}
}
frameData.profile.descriptorsWritten = 0;
// Must be after the fence - this performs deletes.
VLOG("PUSH: BeginFrame %d", curFrame);
@ -734,7 +605,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
vulkan_->BeginFrame(enableLogProfiler ? GetInitCmd() : VK_NULL_HANDLE);
frameData.profile.timestampDescriptions.clear();
if (frameData.profile.timestampsEnabled) {
if (frameData.profilingEnabled_) {
// For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool,
// unless we want to limit ourselves to only measure the main cmd buffer.
// Later versions of Vulkan have support for clearing queries on the CPU timeline, but we don't want to rely on that.
@ -750,28 +621,14 @@ VkCommandBuffer VulkanRenderManager::GetInitCmd() {
return frameData_[curFrame].GetInitCmd(vulkan_);
}
void VulkanRenderManager::ReportBadStateForDraw() {
const char *cause1 = "";
char cause2[256];
cause2[0] = '\0';
if (!curRenderStep_) {
cause1 = "No current render step";
}
if (curRenderStep_ && curRenderStep_->stepType != VKRStepType::RENDER) {
cause1 = "Not a render step: ";
std::string str = VulkanQueueRunner::StepToString(vulkan_, *curRenderStep_);
truncate_cpy(cause2, str.c_str());
}
ERROR_LOG_REPORT_ONCE(baddraw, G3D, "Can't draw: %s%s. Step count: %d", cause1, cause2, (int)steps_.size());
}
VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, VkSampleCountFlagBits sampleCount, bool cacheLoad, const char *tag) {
VKRGraphicsPipeline *pipeline = new VKRGraphicsPipeline(pipelineFlags, tag);
if (!desc->vertexShader || !desc->fragmentShader) {
ERROR_LOG(G3D, "Can't create graphics pipeline with missing vs/ps: %p %p", desc->vertexShader, desc->fragmentShader);
return nullptr;
}
VKRGraphicsPipeline *pipeline = new VKRGraphicsPipeline(pipelineFlags, tag);
pipeline->desc = desc;
pipeline->desc->AddRef();
if (curRenderStep_ && !cacheLoad) {
@ -788,11 +645,7 @@ VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipe
VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,
};
VKRRenderPass *compatibleRenderPass = queueRunner_.GetRenderPass(key);
std::lock_guard<std::mutex> lock(compileMutex_);
if (compileBlocked_) {
delete pipeline;
return nullptr;
}
compileMutex_.lock();
bool needsCompile = false;
for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {
if (!(variantBitmask & (1 << i)))
@ -804,6 +657,10 @@ VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipe
WARN_LOG(G3D, "Not compiling pipeline that requires depth, for non depth renderpass type");
continue;
}
if ((pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) && !RenderPassTypeHasInput(rpType)) {
WARN_LOG(G3D, "Not compiling pipeline that requires input attachment, for non input renderpass type");
continue;
}
// Shouldn't hit this, these should have been filtered elsewhere. However, still a good check to do.
if (sampleCount == VK_SAMPLE_COUNT_1_BIT && RenderPassTypeHasMultisample(rpType)) {
WARN_LOG(G3D, "Not compiling single sample pipeline for a multisampled render pass type");
@ -816,19 +673,18 @@ VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipe
}
if (needsCompile)
compileCond_.notify_one();
compileMutex_.unlock();
}
return pipeline;
}
VKRComputePipeline *VulkanRenderManager::CreateComputePipeline(VKRComputePipelineDesc *desc) {
std::lock_guard<std::mutex> lock(compileMutex_);
if (compileBlocked_) {
return nullptr;
}
VKRComputePipeline *pipeline = new VKRComputePipeline();
pipeline->desc = desc;
compileMutex_.lock();
compileQueue_.push_back(CompileQueueEntry(pipeline));
compileCond_.notify_one();
compileMutex_.unlock();
return pipeline;
}
@ -854,6 +710,10 @@ void VulkanRenderManager::EndCurRenderStep() {
if (!curRenderStep_->render.framebuffer) {
rpType = RenderPassType::BACKBUFFER;
} else {
if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) {
// Not allowed on backbuffers.
rpType = depthStencil ? (RenderPassType::HAS_DEPTH | RenderPassType::COLOR_INPUT) : RenderPassType::COLOR_INPUT;
}
// Framebuffers can be stereo, and if so, will control the render pass type to match.
// Pipelines can be mono and render fine to stereo etc, so not checking them here.
// Note that we don't support rendering to just one layer of a multilayer framebuffer!
@ -874,7 +734,7 @@ void VulkanRenderManager::EndCurRenderStep() {
compileMutex_.lock();
bool needsCompile = false;
for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {
if (!pipeline || compileBlocked_) {
if (!pipeline) {
// Not good, but let's try not to crash.
continue;
}
@ -904,6 +764,11 @@ void VulkanRenderManager::EndCurRenderStep() {
curPipelineFlags_ = (PipelineFlags)0;
}
void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) {
_dbg_assert_(curRenderStep_);
curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER });
}
void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
_dbg_assert_(insideFrame_);
// Eliminate dupes (bind of the framebuffer we already are rendering to), instantly convert to a clear if possible.
@ -1134,7 +999,7 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int
queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], nullptr, w, h, destFormat, destFormat, pixelStride, pixels);
}
static void RemoveDrawCommands(FastVec<VkRenderData> *cmds) {
static void RemoveDrawCommands(std::vector<VkRenderData> *cmds) {
// Here we remove any DRAW type commands when we hit a CLEAR.
for (auto &c : *cmds) {
if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) {
@ -1143,7 +1008,7 @@ static void RemoveDrawCommands(FastVec<VkRenderData> *cmds) {
}
}
static void CleanupRenderCommands(FastVec<VkRenderData> *cmds) {
static void CleanupRenderCommands(std::vector<VkRenderData> *cmds) {
size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS];
memset(lastCommand, -1, sizeof(lastCommand));
@ -1401,38 +1266,17 @@ void VulkanRenderManager::Finish() {
FrameData &frameData = frameData_[curFrame];
VLOG("PUSH: Frame[%d]", curFrame);
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SUBMIT);
task->frame = curFrame;
if (useRenderThread_) {
VKRRenderThreadTask task;
task.frame = curFrame;
task.runType = VKRRunType::PRESENT;
{
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back()->steps = std::move(steps_);
renderThreadQueue_.back().steps = std::move(steps_);
pushCondVar_.notify_one();
} else {
// Just do it!
task->steps = std::move(steps_);
Run(*task);
delete task;
}
steps_.clear();
}
void VulkanRenderManager::Present() {
int curFrame = vulkan_->GetCurFrame();
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::PRESENT);
task->frame = curFrame;
if (useRenderThread_) {
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
pushCondVar_.notify_one();
} else {
// Just do it!
Run(*task);
delete task;
}
vulkan_->EndFrame();
insideFrame_ = false;
}
@ -1450,42 +1294,9 @@ void VulkanRenderManager::Wipe() {
void VulkanRenderManager::Run(VKRRenderThreadTask &task) {
FrameData &frameData = frameData_[task.frame];
if (task.runType == VKRRunType::PRESENT) {
if (!frameData.skipSwap) {
VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);
frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();
if (res == VK_ERROR_OUT_OF_DATE_KHR) {
// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.
// Do the increment.
outOfDateFrames_++;
} else if (res == VK_SUBOPTIMAL_KHR) {
outOfDateFrames_++;
} else if (res != VK_SUCCESS) {
_assert_msg_(false, "vkQueuePresentKHR failed! result=%s", VulkanResultToString(res));
} else {
// Success
outOfDateFrames_ = 0;
}
} else {
// We only get here if vkAcquireNextImage returned VK_ERROR_OUT_OF_DATE.
outOfDateFrames_++;
frameData.skipSwap = false;
}
return;
}
_dbg_assert_(!frameData.hasPresentCommands);
if (!frameTimeHistory_[frameData.frameId].firstSubmit) {
frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();
}
frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_);
// Flush descriptors.
double descStart = time_now_d();
FlushDescriptors(task.frame);
frameData.profile.descWriteTime = time_now_d() - descStart;
if (!frameData.hasMainCommands) {
// Effectively resets both main and present command buffers, since they both live in this pool.
// We always record main commands first, so we don't need to reset the present command buffer separately.
@ -1507,23 +1318,43 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) {
int passes = GetVRPassesCount();
for (int i = 0; i < passes; i++) {
PreVRFrameRender(i);
queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_, i < passes - 1);
queueRunner_.RunSteps(task.steps, frameData, frameDataShared_, i < passes - 1);
PostVRFrameRender();
}
} else {
queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_);
queueRunner_.RunSteps(task.steps, frameData, frameDataShared_);
}
switch (task.runType) {
case VKRRunType::SUBMIT:
case VKRRunType::PRESENT:
frameData.SubmitPending(vulkan_, FrameSubmitType::Present, frameDataShared_);
if (!frameData.skipSwap) {
VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);
if (res == VK_ERROR_OUT_OF_DATE_KHR) {
// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.
// Do the increment.
outOfDateFrames_++;
} else if (res == VK_SUBOPTIMAL_KHR) {
outOfDateFrames_++;
} else if (res != VK_SUCCESS) {
_assert_msg_(false, "vkQueuePresentKHR failed! result=%s", VulkanResultToString(res));
} else {
// Success
outOfDateFrames_ = 0;
}
} else {
// We only get here if vkAcquireNextImage returned VK_ERROR_OUT_OF_DATE.
outOfDateFrames_++;
frameData.skipSwap = false;
}
break;
case VKRRunType::SYNC:
// The submit will trigger the readbackFence, and also do the wait for it.
frameData.SubmitPending(vulkan_, FrameSubmitType::Sync, frameDataShared_);
if (useRenderThread_) {
{
std::unique_lock<std::mutex> lock(syncMutex_);
syncCondVar_.notify_one();
}
@ -1542,8 +1373,6 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) {
// Called from main thread.
void VulkanRenderManager::FlushSync() {
_dbg_assert_(!curRenderStep_);
if (invalidationCallback_) {
invalidationCallback_(InvalidationCallbackFlags::COMMAND_BUFFER_STATE);
}
@ -1551,34 +1380,25 @@ void VulkanRenderManager::FlushSync() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (useRenderThread_) {
{
VLOG("PUSH: Frame[%d]", curFrame);
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);
task->frame = curFrame;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back()->steps = std::move(steps_);
pushCondVar_.notify_one();
steps_.clear();
}
{
VLOG("PUSH: Frame[%d]", curFrame);
VKRRenderThreadTask task;
task.frame = curFrame;
task.runType = VKRRunType::SYNC;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back().steps = std::move(steps_);
pushCondVar_.notify_one();
}
{
std::unique_lock<std::mutex> lock(syncMutex_);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.syncDone) {
VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);
syncCondVar_.wait(lock);
}
frameData.syncDone = false;
{
std::unique_lock<std::mutex> lock(syncMutex_);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.syncDone) {
VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);
syncCondVar_.wait(lock);
}
} else {
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);
task->frame = curFrame;
task->steps = std::move(steps_);
Run(*task);
delete task;
steps_.clear();
frameData.syncDone = false;
}
}
@ -1587,238 +1407,3 @@ void VulkanRenderManager::ResetStats() {
totalGPUTimeMs_.Reset();
renderCPUTimeMs_.Reset();
}
VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindingTypes, size_t bindingTypesCount, bool geoShadersEnabled, const char *tag) {
VKRPipelineLayout *layout = new VKRPipelineLayout();
layout->tag = tag;
layout->bindingTypesCount = (uint32_t)bindingTypesCount;
_dbg_assert_(bindingTypesCount <= ARRAY_SIZE(layout->bindingTypes));
memcpy(layout->bindingTypes, bindingTypes, sizeof(BindingType) * bindingTypesCount);
VkDescriptorSetLayoutBinding bindings[VKRPipelineLayout::MAX_DESC_SET_BINDINGS];
for (int i = 0; i < bindingTypesCount; i++) {
bindings[i].binding = i;
bindings[i].descriptorCount = 1;
bindings[i].pImmutableSamplers = nullptr;
switch (bindingTypes[i]) {
case BindingType::COMBINED_IMAGE_SAMPLER:
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[i].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
break;
case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
break;
case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
if (geoShadersEnabled) {
bindings[i].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT;
}
break;
case BindingType::STORAGE_BUFFER_VERTEX:
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
break;
case BindingType::STORAGE_BUFFER_COMPUTE:
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
break;
case BindingType::STORAGE_IMAGE_COMPUTE:
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
break;
default:
_dbg_assert_(false);
break;
}
}
VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
dsl.bindingCount = (uint32_t)bindingTypesCount;
dsl.pBindings = bindings;
VkResult res = vkCreateDescriptorSetLayout(vulkan_->GetDevice(), &dsl, nullptr, &layout->descriptorSetLayout);
_assert_(VK_SUCCESS == res && layout->descriptorSetLayout);
VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
VkDescriptorSetLayout setLayouts[1] = { layout->descriptorSetLayout };
pl.setLayoutCount = ARRAY_SIZE(setLayouts);
pl.pSetLayouts = setLayouts;
res = vkCreatePipelineLayout(vulkan_->GetDevice(), &pl, nullptr, &layout->pipelineLayout);
_assert_(VK_SUCCESS == res && layout->pipelineLayout);
vulkan_->SetDebugName(layout->descriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, tag);
vulkan_->SetDebugName(layout->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT, tag);
for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
// Some games go beyond 1024 and end up having to resize like GTA, but most stay below so we start there.
layout->frameData[i].pool.Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 1024);
}
pipelineLayouts_.push_back(layout);
return layout;
}
void VulkanRenderManager::DestroyPipelineLayout(VKRPipelineLayout *layout) {
for (auto iter = pipelineLayouts_.begin(); iter != pipelineLayouts_.end(); iter++) {
if (*iter == layout) {
pipelineLayouts_.erase(iter);
break;
}
}
vulkan_->Delete().QueueCallback([](VulkanContext *vulkan, void *userdata) {
VKRPipelineLayout *layout = (VKRPipelineLayout *)userdata;
for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
layout->frameData[i].pool.DestroyImmediately();
}
vkDestroyPipelineLayout(vulkan->GetDevice(), layout->pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(vulkan->GetDevice(), layout->descriptorSetLayout, nullptr);
delete layout;
}, layout);
}
void VulkanRenderManager::FlushDescriptors(int frame) {
for (auto iter : pipelineLayouts_) {
iter->FlushDescSets(vulkan_, frame, &frameData_[frame].profile);
}
}
void VulkanRenderManager::ResetDescriptorLists(int frame) {
for (auto iter : pipelineLayouts_) {
VKRPipelineLayout::FrameData &data = iter->frameData[frame];
data.flushedDescriptors_ = 0;
data.descSets_.clear();
data.descData_.clear();
}
}
VKRPipelineLayout::~VKRPipelineLayout() {
_assert_(frameData[0].pool.IsDestroyed());
}
void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile) {
_dbg_assert_(frame < VulkanContext::MAX_INFLIGHT_FRAMES);
FrameData &data = frameData[frame];
VulkanDescSetPool &pool = data.pool;
FastVec<PackedDescriptor> &descData = data.descData_;
FastVec<PendingDescSet> &descSets = data.descSets_;
pool.Reset();
VkDescriptorSet setCache[8];
VkDescriptorSetLayout layoutsForAlloc[ARRAY_SIZE(setCache)];
for (int i = 0; i < ARRAY_SIZE(setCache); i++) {
layoutsForAlloc[i] = descriptorSetLayout;
}
int setsUsed = ARRAY_SIZE(setCache); // To allocate immediately.
// This will write all descriptors.
// Initially, we just do a simple look-back comparing to the previous descriptor to avoid sequential dupes.
// Initially, let's do naive single desc set writes.
VkWriteDescriptorSet writes[MAX_DESC_SET_BINDINGS];
VkDescriptorImageInfo imageInfo[MAX_DESC_SET_BINDINGS]; // just picked a practical number
VkDescriptorBufferInfo bufferInfo[MAX_DESC_SET_BINDINGS];
size_t start = data.flushedDescriptors_;
int writeCount = 0;
for (size_t index = start; index < descSets.size(); index++) {
auto &d = descSets[index];
// This is where we look up to see if we already have an identical descriptor previously in the array.
// We could do a simple custom hash map here that doesn't handle collisions, since those won't matter.
// Instead, for now we just check history one item backwards. Good enough, it seems.
if (index > start + 1) {
if (descSets[index - 1].count == d.count) {
if (!memcmp(descData.data() + d.offset, descData.data() + descSets[index - 1].offset, d.count * sizeof(PackedDescriptor))) {
d.set = descSets[index - 1].set;
continue;
}
}
}
if (setsUsed < ARRAY_SIZE(setCache)) {
d.set = setCache[setsUsed++];
} else {
// Allocate in small batches.
bool success = pool.Allocate(setCache, ARRAY_SIZE(setCache), layoutsForAlloc);
_dbg_assert_(success);
d.set = setCache[0];
setsUsed = 1;
}
// TODO: Build up bigger batches of writes.
const PackedDescriptor *data = descData.begin() + d.offset;
int numWrites = 0;
int numBuffers = 0;
int numImages = 0;
for (int i = 0; i < d.count; i++) {
if (!data[i].image.view) { // This automatically also checks for an null buffer due to the union.
continue;
}
switch (this->bindingTypes[i]) {
case BindingType::COMBINED_IMAGE_SAMPLER:
_dbg_assert_(data[i].image.sampler != VK_NULL_HANDLE);
imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo[numImages].imageView = data[i].image.view;
imageInfo[numImages].sampler = data[i].image.sampler;
writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[numWrites].pImageInfo = &imageInfo[numImages];
writes[numWrites].pBufferInfo = nullptr;
numImages++;
break;
case BindingType::STORAGE_IMAGE_COMPUTE:
imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
imageInfo[numImages].imageView = data[i].image.view;
imageInfo[numImages].sampler = VK_NULL_HANDLE;
writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
writes[numWrites].pImageInfo = &imageInfo[numImages];
writes[numWrites].pBufferInfo = nullptr;
numImages++;
break;
case BindingType::STORAGE_BUFFER_VERTEX:
case BindingType::STORAGE_BUFFER_COMPUTE:
bufferInfo[numBuffers].buffer = data[i].buffer.buffer;
bufferInfo[numBuffers].offset = data[i].buffer.offset;
bufferInfo[numBuffers].range = data[i].buffer.range;
writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];
writes[numWrites].pImageInfo = nullptr;
numBuffers++;
break;
case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:
case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:
bufferInfo[numBuffers].buffer = data[i].buffer.buffer;
bufferInfo[numBuffers].offset = 0;
bufferInfo[numBuffers].range = data[i].buffer.range;
writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];
writes[numWrites].pImageInfo = nullptr;
numBuffers++;
break;
}
writes[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[numWrites].pNext = nullptr;
writes[numWrites].descriptorCount = 1;
writes[numWrites].dstArrayElement = 0;
writes[numWrites].dstBinding = i;
writes[numWrites].dstSet = d.set;
writes[numWrites].pTexelBufferView = nullptr;
numWrites++;
}
vkUpdateDescriptorSets(vulkan->GetDevice(), numWrites, writes, 0, nullptr);
writeCount++;
}
data.flushedDescriptors_ = (int)descSets.size();
profile->descriptorsWritten += writeCount;
}

View file

@ -17,13 +17,11 @@
#include "Common/System/Display.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Data/Collections/FastVec.h"
#include "Common/Math/math_util.h"
#include "Common/GPU/DataFormat.h"
#include "Common/GPU/MiscTypes.h"
#include "Common/GPU/Vulkan/VulkanQueueRunner.h"
#include "Common/GPU/Vulkan/VulkanFramebuffer.h"
#include "Common/GPU/Vulkan/VulkanDescSet.h"
#include "Common/GPU/thin3d.h"
// Forward declaration
@ -78,10 +76,7 @@ struct BoundingRect {
// All the data needed to create a graphics pipeline.
// TODO: Compress this down greatly.
class VKRGraphicsPipelineDesc : public Draw::RefCountedObject {
public:
VKRGraphicsPipelineDesc() : Draw::RefCountedObject("VKRGraphicsPipelineDesc") {}
struct VKRGraphicsPipelineDesc : Draw::RefCountedObject {
VkPipelineCache pipelineCache = VK_NULL_HANDLE;
VkPipelineColorBlendStateCreateInfo cbs{ VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO };
VkPipelineColorBlendAttachmentState blend0{};
@ -107,7 +102,7 @@ public:
VkPipelineVertexInputStateCreateInfo vis{ VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
VkPipelineViewportStateCreateInfo views{ VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO };
VKRPipelineLayout *pipelineLayout = nullptr;
VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
// Does not include the render pass type, it's passed in separately since the
// desc is persistent.
@ -132,10 +127,6 @@ struct VKRGraphicsPipeline {
// This deletes the whole VKRGraphicsPipeline, you must remove your last pointer to it when doing this.
void QueueForDeletion(VulkanContext *vulkan);
// This blocks until any background compiles are finished.
// Used during game shutdown before we clear out shaders that these compiles depend on.
void BlockUntilCompiled();
u32 GetVariantsBitmask() const;
void LogCreationFailure() const;
@ -185,66 +176,15 @@ struct CompileQueueEntry {
VkSampleCountFlagBits sampleCount;
};
// Pending descriptor sets.
// TODO: Sort these by VKRPipelineLayout to avoid storing it for each element.
struct PendingDescSet {
int offset; // probably enough with a u16.
u8 count;
VkDescriptorSet set;
};
struct PackedDescriptor {
union {
struct {
VkImageView view;
VkSampler sampler;
} image;
struct {
VkBuffer buffer;
uint32_t offset;
uint32_t range;
} buffer;
};
};
// Note that we only support a single descriptor set due to compatibility with some ancient devices.
// We should probably eventually give that up.
struct VKRPipelineLayout {
~VKRPipelineLayout();
enum { MAX_DESC_SET_BINDINGS = 10 };
BindingType bindingTypes[MAX_DESC_SET_BINDINGS];
uint32_t bindingTypesCount = 0;
VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE; // only support 1 for now.
int pushConstSize = 0;
const char *tag = nullptr;
struct FrameData {
FrameData() : pool("GameDescPool", true) {}
VulkanDescSetPool pool;
FastVec<PackedDescriptor> descData_;
FastVec<PendingDescSet> descSets_;
// TODO: We should be able to get away with a single descData_/descSets_ and then send it along,
// but it's easier to just segregate by frame id.
int flushedDescriptors_ = 0;
};
FrameData frameData[VulkanContext::MAX_INFLIGHT_FRAMES];
void FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile);
};
class VulkanRenderManager {
public:
VulkanRenderManager(VulkanContext *vulkan, bool useThread, HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory);
VulkanRenderManager(VulkanContext *vulkan);
~VulkanRenderManager();
// Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again.
void BeginFrame(bool enableProfiling, bool enableLogProfiler);
// These can run on a different thread!
// Can run on a different thread!
void Finish();
void Present();
// Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown.
void Wipe();
@ -275,6 +215,8 @@ public:
// get an array texture view.
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int layer);
void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits);
bool CopyFramebufferToMemory(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag);
void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
@ -289,27 +231,16 @@ public:
VKRGraphicsPipeline *CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, VkSampleCountFlagBits sampleCount, bool cacheLoad, const char *tag);
VKRComputePipeline *CreateComputePipeline(VKRComputePipelineDesc *desc);
VKRPipelineLayout *CreatePipelineLayout(BindingType *bindingTypes, size_t bindingCount, bool geoShadersEnabled, const char *tag);
void DestroyPipelineLayout(VKRPipelineLayout *pipelineLayout);
void ReportBadStateForDraw();
void NudgeCompilerThread() {
compileMutex_.lock();
compileCond_.notify_one();
compileMutex_.unlock();
}
// This is the first call in a draw operation. Instead of asserting like we used to, you can now check the
// return value and skip the draw if we're in a bad state. In that case, call ReportBadState.
// The old assert wasn't very helpful in figuring out what caused it anyway...
bool BindPipeline(VKRGraphicsPipeline *pipeline, PipelineFlags flags, VKRPipelineLayout *pipelineLayout) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && pipeline != nullptr);
if (!curRenderStep_ || curRenderStep_->stepType != VKRStepType::RENDER) {
return false;
}
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::BIND_GRAPHICS_PIPELINE;
void BindPipeline(VKRGraphicsPipeline *pipeline, PipelineFlags flags, VkPipelineLayout pipelineLayout) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_(pipeline != nullptr);
VkRenderData data{ VKRRenderCommand::BIND_GRAPHICS_PIPELINE };
pipelinesToCheck_.push_back(pipeline);
data.graphics_pipeline.pipeline = pipeline;
data.graphics_pipeline.pipelineLayout = pipelineLayout;
@ -318,16 +249,24 @@ public:
// DebugBreak();
// }
curPipelineFlags_ |= flags;
curPipelineLayout_ = pipelineLayout;
return true;
curRenderStep_->commands.push_back(data);
}
void BindPipeline(VKRComputePipeline *pipeline, PipelineFlags flags, VkPipelineLayout pipelineLayout) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_(pipeline != nullptr);
VkRenderData data{ VKRRenderCommand::BIND_COMPUTE_PIPELINE };
data.compute_pipeline.pipeline = pipeline;
data.compute_pipeline.pipelineLayout = pipelineLayout;
curPipelineFlags_ |= flags;
curRenderStep_->commands.push_back(data);
}
void SetViewport(const VkViewport &vp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_((int)vp.width >= 0);
_dbg_assert_((int)vp.height >= 0);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::VIEWPORT;
VkRenderData data{ VKRRenderCommand::VIEWPORT };
data.viewport.vp.x = vp.x;
data.viewport.vp.y = vp.y;
data.viewport.vp.width = vp.width;
@ -337,6 +276,7 @@ public:
// TODO: This should be fixed at the source.
data.viewport.vp.minDepth = clamp_value(vp.minDepth, 0.0f, 1.0f);
data.viewport.vp.maxDepth = clamp_value(vp.maxDepth, 0.0f, 1.0f);
curRenderStep_->commands.push_back(data);
curStepHasViewport_ = true;
}
@ -378,37 +318,37 @@ public:
curRenderArea_.Apply(rc);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::SCISSOR;
VkRenderData data{ VKRRenderCommand::SCISSOR };
data.scissor.scissor = rc;
curRenderStep_->commands.push_back(data);
curStepHasScissor_ = true;
}
void SetStencilParams(uint8_t writeMask, uint8_t compareMask, uint8_t refValue) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::STENCIL;
VkRenderData data{ VKRRenderCommand::STENCIL };
data.stencil.stencilWriteMask = writeMask;
data.stencil.stencilCompareMask = compareMask;
data.stencil.stencilRef = refValue;
curRenderStep_->commands.push_back(data);
}
void SetBlendFactor(uint32_t color) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::BLEND;
VkRenderData data{ VKRRenderCommand::BLEND };
data.blendColor.color = color;
curRenderStep_->commands.push_back(data);
}
void PushConstants(VkPipelineLayout pipelineLayout, VkShaderStageFlags stages, int offset, int size, void *constants) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_(size + offset < 40);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::PUSH_CONSTANTS;
VkRenderData data{ VKRRenderCommand::PUSH_CONSTANTS };
data.push.stages = stages;
data.push.offset = offset;
data.push.size = size;
memcpy(data.push.data, constants, size);
curRenderStep_->commands.push_back(data);
}
void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask);
@ -438,52 +378,28 @@ public:
curRenderStep_->render.stencilStore = VKRRenderPassStoreAction::DONT_CARE;
}
// Descriptors will match the current pipeline layout, set by the last call to BindPipeline.
// Count is the count of void*s. Two are needed for COMBINED_IMAGE_SAMPLER, everything else is a single one.
// The goal is to keep this function very small and fast, and do the expensive work on the render thread or
// another thread.
PackedDescriptor *PushDescriptorSet(int count, int *descSetIndex) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
int curFrame = vulkan_->GetCurFrame();
VKRPipelineLayout::FrameData &data = curPipelineLayout_->frameData[curFrame];
size_t offset = data.descData_.size();
PackedDescriptor *retval = data.descData_.extend_uninitialized(count);
int setIndex = (int)data.descSets_.size();
PendingDescSet &descSet = data.descSets_.push_uninitialized();
descSet.offset = (uint32_t)offset;
descSet.count = count;
// descSet.set = VK_NULL_HANDLE; // to be filled in
*descSetIndex = setIndex;
return retval;
}
void Draw(int descSetIndex, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) {
void Draw(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::DRAW;
VkRenderData data{ VKRRenderCommand::DRAW };
data.draw.count = count;
data.draw.offset = offset;
data.draw.descSetIndex = descSetIndex;
data.draw.ds = descSet;
data.draw.vbuffer = vbuffer;
data.draw.voffset = voffset;
data.draw.numUboOffsets = numUboOffsets;
_dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.draw.uboOffsets));
for (int i = 0; i < numUboOffsets; i++)
data.draw.uboOffsets[i] = uboOffsets[i];
curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
void DrawIndexed(int descSetIndex, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances) {
void DrawIndexed(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances, VkIndexType indexType) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::DRAW_INDEXED;
VkRenderData data{ VKRRenderCommand::DRAW_INDEXED };
data.drawIndexed.count = count;
data.drawIndexed.instances = numInstances;
data.drawIndexed.descSetIndex = descSetIndex;
data.drawIndexed.ds = descSet;
data.drawIndexed.vbuffer = vbuffer;
data.drawIndexed.voffset = voffset;
data.drawIndexed.ibuffer = ibuffer;
@ -492,6 +408,8 @@ public:
_dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.drawIndexed.uboOffsets));
for (int i = 0; i < numUboOffsets; i++)
data.drawIndexed.uboOffsets[i] = uboOffsets[i];
data.drawIndexed.indexType = indexType;
curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
@ -499,9 +417,9 @@ public:
// in the debugger.
void DebugAnnotate(const char *annotation) {
_dbg_assert_(curRenderStep_);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::DEBUG_ANNOTATION;
VkRenderData data{ VKRRenderCommand::DEBUG_ANNOTATION };
data.debugAnnotation.annotation = annotation;
curRenderStep_->commands.push_back(data);
}
VkCommandBuffer GetInitCmd();
@ -535,9 +453,10 @@ public:
return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES;
}
void Invalidate(InvalidationFlags flags);
void ResetStats();
void DrainAndBlockCompileQueue();
void ReleaseCompileQueue();
void DrainCompileQueue();
private:
void EndCurRenderStep();
@ -551,12 +470,6 @@ private:
void FlushSync();
void StopThread();
void PresentWaitThreadFunc();
void PollPresentTiming();
void ResetDescriptorLists(int frame);
void FlushDescriptors(int frame);
FrameDataShared frameDataShared_;
FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES];
@ -578,9 +491,6 @@ private:
bool insideFrame_ = false;
bool run_ = false;
bool useRenderThread_ = true;
bool measurePresentTime_ = false;
// This is the offset within this frame, in case of a mid-frame sync.
VKRStep *curRenderStep_ = nullptr;
bool curStepHasViewport_ = false;
@ -599,7 +509,7 @@ private:
std::mutex pushMutex_;
std::condition_variable pushCondVar_;
std::queue<VKRRenderThreadTask *> renderThreadQueue_;
std::queue<VKRRenderThreadTask> renderThreadQueue_;
// For readbacks and other reasons we need to sync with the render thread.
std::mutex syncMutex_;
@ -612,10 +522,6 @@ private:
std::condition_variable compileCond_;
std::mutex compileMutex_;
std::vector<CompileQueueEntry> compileQueue_;
bool compileBlocked_ = false;
// Thread for measuring presentation delay.
std::thread presentWaitThread_;
// pipelines to check and possibly create at the end of the current render pass.
std::vector<VKRGraphicsPipeline *> pipelinesToCheck_;
@ -624,13 +530,6 @@ private:
SimpleStat initTimeMs_;
SimpleStat totalGPUTimeMs_;
SimpleStat renderCPUTimeMs_;
SimpleStat descUpdateTimeMs_;
std::function<void(InvalidationCallbackFlags)> invalidationCallback_;
uint64_t frameIdGen_ = FRAME_TIME_HISTORY_LENGTH;
HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory_;
VKRPipelineLayout *curPipelineLayout_ = nullptr;
std::vector<VKRPipelineLayout *> pipelineLayouts_;
};

View file

@ -20,20 +20,27 @@
#include <string>
#include <map>
#include "Common/Log.h"
#include "Common/StringUtils.h"
#include "Common/System/Display.h"
#include "Common/Math/lin/matrix4x4.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/GPU/thin3d.h"
#include "Common/GPU/Vulkan/VulkanRenderManager.h"
#include "Common/Log.h"
#include "Common/StringUtils.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanImage.h"
#include "Common/GPU/Vulkan/VulkanMemory.h"
#include "Common/GPU/Vulkan/VulkanLoader.h"
#include "Common/Thread/Promise.h"
// For descriptor set 0 (the only one), we use a simple descriptor set for all thin3d rendering: 1 UBO binding point, 3 combined texture/samples.
#include "Common/GPU/Vulkan/VulkanLoader.h"
// We support a frame-global descriptor set, which can be optionally used by other code,
// but is not directly used by thin3d. It has to be defined here though, be in set 0
// and specified in every pipeline layout, otherwise it can't sit undisturbed when other
// descriptor sets are bound on top.
// For descriptor set 1, we use a simple descriptor set for all thin3d rendering: 1 UBO binding point, 3 combined texture/samples.
//
// binding 0 - uniform buffer
// binding 1 - texture/sampler
@ -247,7 +254,7 @@ bool VKShaderModule::Compile(VulkanContext *vulkan, ShaderLanguage language, con
class VKInputLayout : public InputLayout {
public:
VkVertexInputBindingDescription binding;
std::vector<VkVertexInputBindingDescription> bindings;
std::vector<VkVertexInputAttributeDescription> attributes;
VkPipelineVertexInputStateCreateInfo visc;
};
@ -290,7 +297,7 @@ public:
std::vector<VKShaderModule *> deps;
int stride = 0;
int stride[4]{};
int dynamicUniformSize = 0;
bool usesStencil = false;
@ -328,11 +335,8 @@ struct DescriptorSetKey {
class VKTexture : public Texture {
public:
VKTexture(VulkanContext *vulkan, VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc)
: vulkan_(vulkan), mipLevels_(desc.mipLevels) {
format_ = desc.format;
}
: vulkan_(vulkan), mipLevels_(desc.mipLevels), format_(desc.format) {}
bool Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc);
void Update(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t *const *data, TextureCallback callback, int numLevels);
~VKTexture() {
Destroy();
@ -352,13 +356,7 @@ public:
return VK_NULL_HANDLE; // This would be bad.
}
int NumLevels() const {
return mipLevels_;
}
private:
void UpdateInternal(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t *const *data, TextureCallback callback, int numLevels);
void Destroy() {
if (vkTex_) {
vkTex_->Destroy();
@ -371,16 +369,19 @@ private:
VulkanTexture *vkTex_ = nullptr;
int mipLevels_ = 0;
DataFormat format_ = DataFormat::UNDEFINED;
};
class VKFramebuffer;
class VKContext : public DrawContext {
public:
VKContext(VulkanContext *vulkan, bool useRenderThread);
VKContext(VulkanContext *vulkan);
~VKContext();
void DebugAnnotate(const char *annotation) override;
void SetDebugFlags(DebugFlags flags) override;
const DeviceCaps &GetDeviceCaps() const override {
return caps_;
@ -397,13 +398,13 @@ public:
}
uint32_t GetDataFormatSupport(DataFormat fmt) const override;
PresentMode GetPresentMode() const {
PresentationMode GetPresentationMode() const override {
switch (vulkan_->GetPresentMode()) {
case VK_PRESENT_MODE_FIFO_KHR: return PresentMode::FIFO;
case VK_PRESENT_MODE_FIFO_RELAXED_KHR: return PresentMode::FIFO; // We treat is as FIFO for now (and won't ever enable it anyway...)
case VK_PRESENT_MODE_IMMEDIATE_KHR: return PresentMode::IMMEDIATE;
case VK_PRESENT_MODE_MAILBOX_KHR: return PresentMode::MAILBOX;
default: return PresentMode::FIFO;
case VK_PRESENT_MODE_FIFO_KHR: return PresentationMode::FIFO;
case VK_PRESENT_MODE_FIFO_RELAXED_KHR: return PresentationMode::FIFO_RELAXED;
case VK_PRESENT_MODE_IMMEDIATE_KHR: return PresentationMode::IMMEDIATE;
case VK_PRESENT_MODE_MAILBOX_KHR: return PresentationMode::MAILBOX;
default: return PresentationMode::FIFO;
}
}
@ -420,7 +421,6 @@ public:
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
@ -430,6 +430,7 @@ public:
// These functions should be self explanatory.
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) override;
void BindCurrentFramebufferForColorInput() override;
void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override;
@ -446,9 +447,13 @@ public:
curPipeline_ = (VKPipeline *)pipeline;
}
void BindVertexBuffer(Buffer *vertexBuffer, int offset) override {
curVBuffer_ = (VKBuffer *)vertexBuffer;
curVBufferOffset_ = offset;
// TODO: Make VKBuffers proper buffers, and do a proper binding model. This is just silly.
void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override {
_assert_(start + count <= ARRAY_SIZE(curVBuffers_));
for (int i = 0; i < count; i++) {
curVBuffers_[i + start] = (VKBuffer *)buffers[i];
curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0;
}
}
void BindIndexBuffer(Buffer *indexBuffer, int offset) override {
curIBuffer_ = (VKBuffer *)indexBuffer;
@ -467,16 +472,10 @@ public:
void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override;
void BeginFrame(DebugFlags debugFlags) override;
void BeginFrame() override;
void EndFrame() override;
void Present(PresentMode presentMode, int vblanks) override;
void WipeQueue() override;
int GetFrameCount() override {
return frameCount_;
}
void FlushState() override {}
void ResetStats() override {
@ -500,10 +499,10 @@ public:
}
}
void BindDescriptors(VkBuffer buffer, PackedDescriptor descriptors[4]);
VkDescriptorSet GetOrCreateDescriptorSet(VkBuffer buffer);
std::vector<std::string> GetFeatureList() const override;
std::vector<std::string> GetExtensionList(bool device, bool enabledOnly) const override;
std::vector<std::string> GetExtensionList() const override;
uint64_t GetNativeObject(NativeObject obj, void *srcObject) override;
@ -517,30 +516,27 @@ public:
renderManager_.SetInvalidationCallback(callback);
}
std::string GetGpuProfileString() const override {
return renderManager_.GetGpuProfileString();
}
private:
VulkanTexture *GetNullTexture();
VulkanContext *vulkan_ = nullptr;
int frameCount_ = 0;
VulkanRenderManager renderManager_;
VulkanTexture *nullTexture_ = nullptr;
AutoRef<VKPipeline> curPipeline_;
AutoRef<VKBuffer> curVBuffer_;
int curVBufferOffset_ = 0;
AutoRef<VKBuffer> curVBuffers_[4];
int curVBufferOffsets_[4]{};
AutoRef<VKBuffer> curIBuffer_;
int curIBufferOffset_ = 0;
VKRPipelineLayout *pipelineLayout_ = nullptr;
VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE;
VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE;
VkPipelineCache pipelineCache_ = VK_NULL_HANDLE;
AutoRef<VKFramebuffer> curFramebuffer_;
VkDevice device_;
DebugFlags debugFlags_ = DebugFlags::NONE;
enum {
MAX_FRAME_COMMAND_BUFFERS = 256,
@ -548,10 +544,23 @@ private:
AutoRef<VKTexture> boundTextures_[MAX_BOUND_TEXTURES];
AutoRef<VKSamplerState> boundSamplers_[MAX_BOUND_TEXTURES];
VkImageView boundImageView_[MAX_BOUND_TEXTURES]{};
TextureBindFlags boundTextureFlags_[MAX_BOUND_TEXTURES]{};
TextureBindFlags boundTextureFlags_[MAX_BOUND_TEXTURES];
VulkanPushPool *push_ = nullptr;
struct FrameData {
FrameData() : descriptorPool("VKContext", false) {
descriptorPool.Setup([this] { descSets_.clear(); });
}
// Per-frame descriptor set cache. As it's per frame and reset every frame, we don't need to
// worry about invalidating descriptors pointing to deleted textures.
// However! ARM is not a fan of doing it this way.
std::map<DescriptorSetKey, VkDescriptorSet> descSets_;
VulkanDescSetPool descriptorPool;
};
FrameData frame_[VulkanContext::MAX_INFLIGHT_FRAMES];
DeviceCaps caps_{};
uint8_t stencilRef_ = 0;
@ -739,14 +748,14 @@ enum class TextureState {
PENDING_DESTRUCTION,
};
bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc) {
bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *push, const TextureDesc &desc) {
// Zero-sized textures not allowed.
_assert_(desc.width * desc.height * desc.depth > 0); // remember to set depth to 1!
if (desc.width * desc.height * desc.depth <= 0) {
ERROR_LOG(G3D, "Bad texture dimensions %dx%dx%d", desc.width, desc.height, desc.depth);
return false;
}
_dbg_assert_(pushBuffer);
_assert_(push);
format_ = desc.format;
mipLevels_ = desc.mipLevels;
width_ = desc.width;
@ -754,6 +763,8 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const Te
depth_ = desc.depth;
vkTex_ = new VulkanTexture(vulkan_, desc.tag);
VkFormat vulkanFormat = DataFormatToVulkan(format_);
int bpp = GetBpp(vulkanFormat);
int bytesPerPixel = bpp / 8;
int usageBits = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
if (mipLevels_ > (int)desc.initData.size()) {
// Gonna have to generate some, which requires TRANSFER_SRC
@ -768,10 +779,33 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const Te
}
VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
if (desc.initData.size()) {
UpdateInternal(cmd, pushBuffer, desc.initData.data(), desc.initDataCallback, (int)desc.initData.size());
int w = width_;
int h = height_;
int d = depth_;
int i;
for (i = 0; i < (int)desc.initData.size(); i++) {
uint32_t offset;
VkBuffer buf;
size_t size = w * h * d * bytesPerPixel;
if (desc.initDataCallback) {
uint8_t *dest = (uint8_t *)push->Allocate(size, 16, &buf, &offset);
_assert_(dest != nullptr);
if (!desc.initDataCallback(dest, desc.initData[i], w, h, d, w * bytesPerPixel, h * w * bytesPerPixel)) {
memcpy(dest, desc.initData[i], size);
}
} else {
offset = push->Push((const void *)desc.initData[i], size, 16, &buf);
}
TextureCopyBatch batch;
vkTex_->CopyBufferToMipLevel(cmd, &batch, i, w, h, 0, buf, offset, w);
vkTex_->FinishCopyBatch(cmd, &batch);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
// Generate the rest of the mips automatically.
if (desc.initData.size() < mipLevels_) {
vkTex_->GenerateMips(cmd, (int)desc.initData.size(), false);
if (i < mipLevels_) {
vkTex_->GenerateMips(cmd, i, false);
layout = VK_IMAGE_LAYOUT_GENERAL;
}
}
@ -779,45 +813,6 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const Te
return true;
}
void VKTexture::Update(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t * const *data, TextureCallback initDataCallback, int numLevels) {
// Before we can use UpdateInternal, we need to transition the image to the same state as after CreateDirect,
// making it ready for writing.
vkTex_->PrepareForTransferDst(cmd, numLevels);
UpdateInternal(cmd, pushBuffer, data, initDataCallback, numLevels);
vkTex_->RestoreAfterTransferDst(cmd, numLevels);
}
void VKTexture::UpdateInternal(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t * const *data, TextureCallback initDataCallback, int numLevels) {
int w = width_;
int h = height_;
int d = depth_;
int i;
VkFormat vulkanFormat = DataFormatToVulkan(format_);
int bpp = GetBpp(vulkanFormat);
int bytesPerPixel = bpp / 8;
TextureCopyBatch batch;
batch.reserve(numLevels);
for (i = 0; i < numLevels; i++) {
uint32_t offset;
VkBuffer buf;
size_t size = w * h * d * bytesPerPixel;
uint8_t *dest = (uint8_t *)pushBuffer->Allocate(size, 16, &buf, &offset);
if (initDataCallback) {
_assert_(dest != nullptr);
if (!initDataCallback(dest, data[i], w, h, d, w * bytesPerPixel, h * w * bytesPerPixel)) {
memcpy(dest, data[i], size);
}
} else {
memcpy(dest, data[i], size);
}
vkTex_->CopyBufferToMipLevel(cmd, &batch, i, w, h, 0, buf, offset, w);
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
vkTex_->FinishCopyBatch(cmd, &batch);
}
static DataFormat DataFormatFromVulkanDepth(VkFormat fmt) {
switch (fmt) {
case VK_FORMAT_D24_UNORM_S8_UINT:
@ -837,23 +832,19 @@ static DataFormat DataFormatFromVulkanDepth(VkFormat fmt) {
return DataFormat::UNDEFINED;
}
VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
: vulkan_(vulkan), renderManager_(vulkan, useRenderThread, frameTimeHistory_) {
VKContext::VKContext(VulkanContext *vulkan)
: vulkan_(vulkan), renderManager_(vulkan) {
shaderLanguageDesc_.Init(GLSL_VULKAN);
VkFormat depthStencilFormat = vulkan->GetDeviceInfo().preferredDepthStencilFormat;
caps_.setMaxFrameLatencySupported = true;
caps_.anisoSupported = vulkan->GetDeviceFeatures().enabled.standard.samplerAnisotropy != 0;
caps_.geometryShaderSupported = vulkan->GetDeviceFeatures().enabled.standard.geometryShader != 0;
caps_.tesselationShaderSupported = vulkan->GetDeviceFeatures().enabled.standard.tessellationShader != 0;
caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.standard.dualSrcBlend != 0;
caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.standard.depthClamp != 0;
// Comment out these two to test geometry shader culling on any geometry shader-supporting hardware.
caps_.clipDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderClipDistance != 0;
caps_.cullDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderCullDistance != 0;
caps_.framebufferBlitSupported = true;
caps_.framebufferCopySupported = true;
caps_.framebufferDepthBlitSupported = vulkan->GetDeviceInfo().canBlitToPreferredDepthStencilFormat;
@ -872,20 +863,6 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.standard.logicOp != 0;
caps_.multiViewSupported = vulkan->GetDeviceFeatures().enabled.multiview.multiview != 0;
caps_.sampleRateShadingSupported = vulkan->GetDeviceFeatures().enabled.standard.sampleRateShading != 0;
caps_.textureSwizzleSupported = true;
// Present mode stuff
caps_.presentMaxInterval = 1;
caps_.presentInstantModeChange = false; // TODO: Fix this with some work in VulkanContext
caps_.presentModesSupported = (PresentMode)0;
for (auto mode : vulkan->GetAvailablePresentModes()) {
switch (mode) {
case VK_PRESENT_MODE_FIFO_KHR: caps_.presentModesSupported |= PresentMode::FIFO; break;
case VK_PRESENT_MODE_IMMEDIATE_KHR: caps_.presentModesSupported |= PresentMode::IMMEDIATE; break;
case VK_PRESENT_MODE_MAILBOX_KHR: caps_.presentModesSupported |= PresentMode::MAILBOX; break;
default: break; // Ignore any other modes.
}
}
const auto &limits = vulkan->GetPhysicalDeviceProperties().properties.limits;
@ -899,7 +876,6 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
case VULKAN_VENDOR_QUALCOMM: caps_.vendor = GPUVendor::VENDOR_QUALCOMM; break;
case VULKAN_VENDOR_INTEL: caps_.vendor = GPUVendor::VENDOR_INTEL; break;
case VULKAN_VENDOR_APPLE: caps_.vendor = GPUVendor::VENDOR_APPLE; break;
case VULKAN_VENDOR_MESA: caps_.vendor = GPUVendor::VENDOR_MESA; break;
default:
WARN_LOG(G3D, "Unknown vendor ID %08x", deviceProps.vendorID);
caps_.vendor = GPUVendor::VENDOR_UNKNOWN;
@ -1009,29 +985,71 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
}
}
// Vulkan can support this through input attachments and various extensions, but not worth
// the trouble.
caps_.framebufferFetchSupported = false;
// Limited, through input attachments and self-dependencies.
// We turn it off here already if buggy.
caps_.framebufferFetchSupported = !bugs_.Has(Bugs::SUBPASS_FEEDBACK_BROKEN);
caps_.deviceID = deviceProps.deviceID;
device_ = vulkan->GetDevice();
std::vector<VkDescriptorPoolSize> dpTypes;
dpTypes.resize(2);
dpTypes[0].descriptorCount = 200;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = 200 * MAX_BOUND_TEXTURES;
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
VkDescriptorPoolCreateInfo dp{ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
// Don't want to mess around with individually freeing these, let's go dynamic each frame.
dp.flags = 0;
// 200 textures per frame was not enough for the UI.
dp.maxSets = 4096;
VkBufferUsageFlags usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
push_ = new VulkanPushPool(vulkan_, "pushBuffer", 4 * 1024 * 1024, usage);
for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
frame_[i].descriptorPool.Create(vulkan_, dp, dpTypes);
}
// binding 0 - uniform data
// binding 1 - combined sampler/image 0
// binding 2 - combined sampler/image 1
// ...etc
BindingType bindings[MAX_BOUND_TEXTURES + 1];
bindings[0] = BindingType::UNIFORM_BUFFER_DYNAMIC_ALL;
VkDescriptorSetLayoutBinding bindings[MAX_BOUND_TEXTURES + 1];
bindings[0].descriptorCount = 1;
bindings[0].pImmutableSamplers = nullptr;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[0].binding = 0;
for (int i = 0; i < MAX_BOUND_TEXTURES; ++i) {
bindings[1 + i] = BindingType::COMBINED_IMAGE_SAMPLER;
bindings[i + 1].descriptorCount = 1;
bindings[i + 1].pImmutableSamplers = nullptr;
bindings[i + 1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[i + 1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[i + 1].binding = i + 1;
}
pipelineLayout_ = renderManager_.CreatePipelineLayout(bindings, ARRAY_SIZE(bindings), caps_.geometryShaderSupported, "thin3d_layout");
VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
dsl.bindingCount = ARRAY_SIZE(bindings);
dsl.pBindings = bindings;
VkResult res = vkCreateDescriptorSetLayout(device_, &dsl, nullptr, &descriptorSetLayout_);
_assert_(VK_SUCCESS == res);
vulkan_->SetDebugName(descriptorSetLayout_, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, "thin3d_d_layout");
VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
pl.pPushConstantRanges = nullptr;
pl.pushConstantRangeCount = 0;
VkDescriptorSetLayout setLayouts[1] = { descriptorSetLayout_ };
pl.setLayoutCount = ARRAY_SIZE(setLayouts);
pl.pSetLayouts = setLayouts;
res = vkCreatePipelineLayout(device_, &pl, nullptr, &pipelineLayout_);
_assert_(VK_SUCCESS == res);
vulkan_->SetDebugName(pipelineLayout_, VK_OBJECT_TYPE_PIPELINE_LAYOUT, "thin3d_p_layout");
VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO };
VkResult res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_);
res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_);
_assert_(VK_SUCCESS == res);
}
@ -1039,32 +1057,35 @@ VKContext::~VKContext() {
DestroyPresets();
delete nullTexture_;
// This also destroys all descriptor sets.
for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
frame_[i].descriptorPool.Destroy();
}
push_->Destroy();
delete push_;
renderManager_.DestroyPipelineLayout(pipelineLayout_);
vulkan_->Delete().QueueDeleteDescriptorSetLayout(descriptorSetLayout_);
vulkan_->Delete().QueueDeletePipelineLayout(pipelineLayout_);
vulkan_->Delete().QueueDeletePipelineCache(pipelineCache_);
}
void VKContext::BeginFrame(DebugFlags debugFlags) {
renderManager_.BeginFrame(debugFlags & DebugFlags::PROFILE_TIMESTAMPS, debugFlags & DebugFlags::PROFILE_SCOPES);
void VKContext::BeginFrame() {
// TODO: Bad dependency on g_Config here!
renderManager_.BeginFrame(debugFlags_ & DebugFlags::PROFILE_TIMESTAMPS, debugFlags_ & DebugFlags::PROFILE_SCOPES);
FrameData &frame = frame_[vulkan_->GetCurFrame()];
push_->BeginFrame();
frame.descriptorPool.Reset();
}
void VKContext::EndFrame() {
// Do all the work to submit the command buffers etc.
renderManager_.Finish();
// Unbind stuff, to avoid accidentally relying on it across frames (and provide some protection against forgotten unbinds of deleted things).
Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
}
void VKContext::Present(PresentMode presentMode, int vblanks) {
if (presentMode == PresentMode::FIFO) {
_dbg_assert_(vblanks == 0 || vblanks == 1);
}
renderManager_.Present();
frameCount_++;
}
void VKContext::Invalidate(InvalidationFlags flags) {
if (flags & InvalidationFlags::CACHED_RENDER_STATE) {
curPipeline_ = nullptr;
@ -1085,30 +1106,81 @@ void VKContext::WipeQueue() {
renderManager_.Wipe();
}
void VKContext::BindDescriptors(VkBuffer buf, PackedDescriptor descriptors[4]) {
descriptors[0].buffer.buffer = buf;
descriptors[0].buffer.offset = 0; // dynamic
descriptors[0].buffer.range = curPipeline_->GetUBOSize();
VkDescriptorSet VKContext::GetOrCreateDescriptorSet(VkBuffer buf) {
DescriptorSetKey key{};
FrameData *frame = &frame_[vulkan_->GetCurFrame()];
int numDescs = 1;
for (int i = 0; i < MAX_BOUND_TEXTURES; ++i) {
VkImageView view;
VkSampler sampler;
if (boundTextures_[i]) {
view = (boundTextureFlags_[i] & TextureBindFlags::VULKAN_BIND_ARRAY) ? boundTextures_[i]->GetImageArrayView() : boundTextures_[i]->GetImageView();
key.imageViews_[i] = (boundTextureFlags_[i] & TextureBindFlags::VULKAN_BIND_ARRAY) ? boundTextures_[i]->GetImageArrayView() : boundTextures_[i]->GetImageView();
} else {
view = boundImageView_[i];
key.imageViews_[i] = boundImageView_[i];
}
sampler = boundSamplers_[i] ? boundSamplers_[i]->GetSampler() : VK_NULL_HANDLE;
key.samplers_[i] = boundSamplers_[i];
}
key.buffer_ = buf;
if (view && sampler) {
descriptors[i + 1].image.view = view;
descriptors[i + 1].image.sampler = sampler;
} else {
descriptors[i + 1].image.view = VK_NULL_HANDLE;
descriptors[i + 1].image.sampler = VK_NULL_HANDLE;
auto iter = frame->descSets_.find(key);
if (iter != frame->descSets_.end()) {
return iter->second;
}
VkDescriptorSet descSet = frame->descriptorPool.Allocate(1, &descriptorSetLayout_, "thin3d_descset");
if (descSet == VK_NULL_HANDLE) {
ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed");
return VK_NULL_HANDLE;
}
vulkan_->SetDebugName(descSet, VK_OBJECT_TYPE_DESCRIPTOR_SET, "(thin3d desc set)");
VkDescriptorBufferInfo bufferDesc;
bufferDesc.buffer = buf;
bufferDesc.offset = 0;
bufferDesc.range = curPipeline_->GetUBOSize();
VkDescriptorImageInfo imageDesc[MAX_BOUND_TEXTURES]{};
VkWriteDescriptorSet writes[1 + MAX_BOUND_TEXTURES]{};
// If handles are NULL for whatever buggy reason, it's best to leave the descriptors
// unwritten instead of trying to write a zero, which is not legal.
int numWrites = 0;
if (buf) {
writes[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[numWrites].dstSet = descSet;
writes[numWrites].dstArrayElement = 0;
writes[numWrites].dstBinding = 0;
writes[numWrites].pBufferInfo = &bufferDesc;
writes[numWrites].pImageInfo = nullptr;
writes[numWrites].pTexelBufferView = nullptr;
writes[numWrites].descriptorCount = 1;
writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
numWrites++;
}
for (int i = 0; i < MAX_BOUND_TEXTURES; ++i) {
if (key.imageViews_[i] && key.samplers_[i] && key.samplers_[i]->GetSampler()) {
imageDesc[i].imageView = key.imageViews_[i];
imageDesc[i].sampler = key.samplers_[i]->GetSampler();
imageDesc[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
writes[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[numWrites].dstSet = descSet;
writes[numWrites].dstArrayElement = 0;
writes[numWrites].dstBinding = i + 1;
writes[numWrites].pBufferInfo = nullptr;
writes[numWrites].pImageInfo = &imageDesc[i];
writes[numWrites].pTexelBufferView = nullptr;
writes[numWrites].descriptorCount = 1;
writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
numWrites++;
}
}
vkUpdateDescriptorSets(device_, numWrites, writes, 0, nullptr);
frame->descSets_[key] = descSet;
return descSet;
}
Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char *tag) {
@ -1145,11 +1217,13 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char
}
}
_dbg_assert_(input);
_dbg_assert_(input && input->bindings.size() == 1);
_dbg_assert_((int)input->attributes.size() == (int)input->visc.vertexAttributeDescriptionCount);
pipeline->stride = input->binding.stride;
gDesc.ibd = input->binding;
for (int i = 0; i < (int)input->bindings.size(); i++) {
pipeline->stride[i] = input->bindings[i].stride;
}
gDesc.ibd = input->bindings[0];
for (size_t i = 0; i < input->attributes.size(); i++) {
gDesc.attrs[i] = input->attributes[i];
}
@ -1231,20 +1305,23 @@ InputLayout *VKContext::CreateInputLayout(const InputLayoutDesc &desc) {
VKInputLayout *vl = new VKInputLayout();
vl->visc = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
vl->visc.flags = 0;
vl->visc.vertexBindingDescriptionCount = 1;
vl->visc.vertexAttributeDescriptionCount = (uint32_t)desc.attributes.size();
vl->visc.vertexBindingDescriptionCount = (uint32_t)desc.bindings.size();
vl->bindings.resize(vl->visc.vertexBindingDescriptionCount);
vl->attributes.resize(vl->visc.vertexAttributeDescriptionCount);
vl->visc.pVertexBindingDescriptions = &vl->binding;
vl->visc.pVertexBindingDescriptions = vl->bindings.data();
vl->visc.pVertexAttributeDescriptions = vl->attributes.data();
for (size_t i = 0; i < desc.attributes.size(); i++) {
vl->attributes[i].binding = 0;
vl->attributes[i].binding = (uint32_t)desc.attributes[i].binding;
vl->attributes[i].format = DataFormatToVulkan(desc.attributes[i].format);
vl->attributes[i].location = desc.attributes[i].location;
vl->attributes[i].offset = desc.attributes[i].offset;
}
vl->binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vl->binding.binding = 0;
vl->binding.stride = desc.stride;
for (size_t i = 0; i < desc.bindings.size(); i++) {
vl->bindings[i].inputRate = desc.bindings[i].instanceRate ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vl->bindings[i].binding = (uint32_t)i;
vl->bindings[i].stride = desc.bindings[i].stride;
}
return vl;
}
@ -1265,20 +1342,6 @@ Texture *VKContext::CreateTexture(const TextureDesc &desc) {
}
}
void VKContext::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {
VkCommandBuffer initCmd = renderManager_.GetInitCmd();
if (!push_ || !initCmd) {
// Too early! Fail.
ERROR_LOG(G3D, "Can't create textures before the first frame has started.");
return;
}
VKTexture *tex = (VKTexture *)texture;
_dbg_assert_(numLevels <= tex->NumLevels());
tex->Update(initCmd, push_, data, initDataCallback, numLevels);
}
static inline void CopySide(VkStencilOpState &dest, const StencilSetup &src) {
dest.compareOp = compToVK[(int)src.compareOp];
dest.failOp = stencilOpToVK[(int)src.failOp];
@ -1353,7 +1416,7 @@ void VKContext::BindTextures(int start, int count, Texture **textures, TextureBi
boundTextures_[i] = static_cast<VKTexture *>(textures[i - start]);
boundTextureFlags_[i] = flags;
if (boundTextures_[i]) {
// If a texture is bound, we set these up in BindDescriptors too.
// If a texture is bound, we set these up in GetOrCreateDescriptorSet too.
// But we might need to set the view here anyway so it can be queried using GetNativeObject.
if (flags & TextureBindFlags::VULKAN_BIND_ARRAY) {
boundImageView_[i] = boundTextures_[i]->GetImageArrayView();
@ -1399,36 +1462,42 @@ void VKContext::ApplyDynamicState() {
}
void VKContext::Draw(int vertexCount, int offset) {
VKBuffer *vbuf = curVBuffer_;
VKBuffer *vbuf = curVBuffers_[0];
VkBuffer vulkanVbuf;
VkBuffer vulkanUBObuf;
uint32_t ubo_offset = (uint32_t)curPipeline_->PushUBO(push_, vulkan_, &vulkanUBObuf);
size_t vbBindOffset = push_->Push(vbuf->GetData(), vbuf->GetSize(), 4, &vulkanVbuf);
VkDescriptorSet descSet = GetOrCreateDescriptorSet(vulkanUBObuf);
if (descSet == VK_NULL_HANDLE) {
ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed, skipping %s", __FUNCTION__);
return;
}
BindCurrentPipeline();
ApplyDynamicState();
int descSetIndex;
PackedDescriptor *descriptors = renderManager_.PushDescriptorSet(4, &descSetIndex);
BindDescriptors(vulkanUBObuf, descriptors);
renderManager_.Draw(descSetIndex, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffset_, vertexCount, offset);
renderManager_.Draw(descSet, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount, offset);
}
void VKContext::DrawIndexed(int vertexCount, int offset) {
VKBuffer *ibuf = curIBuffer_;
VKBuffer *vbuf = curVBuffer_;
VKBuffer *vbuf = curVBuffers_[0];
VkBuffer vulkanVbuf, vulkanIbuf, vulkanUBObuf;
uint32_t ubo_offset = (uint32_t)curPipeline_->PushUBO(push_, vulkan_, &vulkanUBObuf);
size_t vbBindOffset = push_->Push(vbuf->GetData(), vbuf->GetSize(), 4, &vulkanVbuf);
size_t ibBindOffset = push_->Push(ibuf->GetData(), ibuf->GetSize(), 4, &vulkanIbuf);
VkDescriptorSet descSet = GetOrCreateDescriptorSet(vulkanUBObuf);
if (descSet == VK_NULL_HANDLE) {
ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed, skipping %s", __FUNCTION__);
return;
}
BindCurrentPipeline();
ApplyDynamicState();
int descSetIndex;
PackedDescriptor *descriptors = renderManager_.PushDescriptorSet(4, &descSetIndex);
BindDescriptors(vulkanUBObuf, descriptors);
renderManager_.DrawIndexed(descSetIndex, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffset_, vulkanIbuf, (int)ibBindOffset + offset * sizeof(uint32_t), vertexCount, 1);
renderManager_.DrawIndexed(descSet, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vulkanIbuf, (int)ibBindOffset + offset * sizeof(uint32_t), vertexCount, 1, VK_INDEX_TYPE_UINT16);
}
void VKContext::DrawUP(const void *vdata, int vertexCount) {
@ -1438,7 +1507,7 @@ void VKContext::DrawUP(const void *vdata, int vertexCount) {
}
VkBuffer vulkanVbuf, vulkanUBObuf;
size_t dataSize = vertexCount * curPipeline_->stride;
size_t dataSize = vertexCount * curPipeline_->stride[0];
uint32_t vbBindOffset;
uint8_t *dataPtr = push_->Allocate(dataSize, 4, &vulkanVbuf, &vbBindOffset);
_assert_(dataPtr != nullptr);
@ -1446,12 +1515,15 @@ void VKContext::DrawUP(const void *vdata, int vertexCount) {
uint32_t ubo_offset = (uint32_t)curPipeline_->PushUBO(push_, vulkan_, &vulkanUBObuf);
VkDescriptorSet descSet = GetOrCreateDescriptorSet(vulkanUBObuf);
if (descSet == VK_NULL_HANDLE) {
ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed, skipping %s", __FUNCTION__);
return;
}
BindCurrentPipeline();
ApplyDynamicState();
int descSetIndex;
PackedDescriptor *descriptors = renderManager_.PushDescriptorSet(4, &descSetIndex);
BindDescriptors(vulkanUBObuf, descriptors);
renderManager_.Draw(descSetIndex, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffset_, vertexCount);
renderManager_.Draw(descSet, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount);
}
void VKContext::BindCurrentPipeline() {
@ -1469,8 +1541,8 @@ void VKContext::Clear(int clearMask, uint32_t colorval, float depthVal, int sten
renderManager_.Clear(colorval, depthVal, stencilVal, mask);
}
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan, bool useRenderThread) {
return new VKContext(vulkan, useRenderThread);
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan) {
return new VKContext(vulkan);
}
void AddFeature(std::vector<std::string> &features, const char *name, VkBool32 available, VkBool32 enabled) {
@ -1501,24 +1573,16 @@ std::vector<std::string> VKContext::GetFeatureList() const {
AddFeature(features, "multiview", vulkan_->GetDeviceFeatures().available.multiview.multiview, vulkan_->GetDeviceFeatures().enabled.multiview.multiview);
AddFeature(features, "multiviewGeometryShader", vulkan_->GetDeviceFeatures().available.multiview.multiviewGeometryShader, vulkan_->GetDeviceFeatures().enabled.multiview.multiviewGeometryShader);
AddFeature(features, "presentId", vulkan_->GetDeviceFeatures().available.presentId.presentId, vulkan_->GetDeviceFeatures().enabled.presentId.presentId);
AddFeature(features, "presentWait", vulkan_->GetDeviceFeatures().available.presentWait.presentWait, vulkan_->GetDeviceFeatures().enabled.presentWait.presentWait);
features.emplace_back(std::string("Preferred depth buffer format: ") + VulkanFormatToString(vulkan_->GetDeviceInfo().preferredDepthStencilFormat));
return features;
}
std::vector<std::string> VKContext::GetExtensionList(bool device, bool enabledOnly) const {
std::vector<std::string> VKContext::GetExtensionList() const {
std::vector<std::string> extensions;
if (enabledOnly) {
for (auto &iter : (device ? vulkan_->GetDeviceExtensionsEnabled() : vulkan_->GetInstanceExtensionsEnabled())) {
extensions.push_back(iter);
}
} else {
for (auto &iter : (device ? vulkan_->GetDeviceExtensionsAvailable() : vulkan_->GetInstanceExtensionsAvailable())) {
extensions.push_back(iter.extensionName);
}
for (auto &iter : vulkan_->GetDeviceExtensionsAvailable()) {
extensions.push_back(iter.extensionName);
}
return extensions;
}
@ -1671,6 +1735,10 @@ void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChanne
boundImageView_[binding] = renderManager_.BindFramebufferAsTexture(fb->GetFB(), binding, aspect, layer);
}
void VKContext::BindCurrentFramebufferForColorInput() {
renderManager_.BindCurrentFramebufferAsInputAttachment0(VK_IMAGE_ASPECT_COLOR_BIT);
}
void VKContext::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {
VKFramebuffer *fb = (VKFramebuffer *)fbo;
if (fb) {
@ -1747,4 +1815,8 @@ void VKContext::DebugAnnotate(const char *annotation) {
renderManager_.DebugAnnotate(annotation);
}
void VKContext::SetDebugFlags(DebugFlags flags) {
debugFlags_ = flags;
}
} // namespace Draw

View file

@ -119,8 +119,7 @@ bool DataFormatIsBlockCompressed(DataFormat fmt, int *blockSize) {
}
RefCountedObject::~RefCountedObject() {
const int rc = refcount_.load();
_dbg_assert_msg_(rc == 0xDEDEDE, "Unexpected refcount %d in object of type '%s'", rc, name_);
_dbg_assert_(refcount_ == 0xDEDEDE);
}
bool RefCountedObject::Release() {
@ -132,7 +131,6 @@ bool RefCountedObject::Release() {
return true;
}
} else {
// No point in printing the name here if the object has already been free-d, it'll be corrupt and dangerous to print.
_dbg_assert_msg_(false, "Refcount (%d) invalid for object %p - corrupt?", refcount_.load(), this);
}
return false;
@ -140,10 +138,11 @@ bool RefCountedObject::Release() {
bool RefCountedObject::ReleaseAssertLast() {
bool released = Release();
_dbg_assert_msg_(released, "RefCountedObject: Expected to be the last reference, but isn't! (%s)", name_);
_dbg_assert_msg_(released, "RefCountedObject: Expected to be the last reference, but isn't!");
return released;
}
// ================================== PIXEL/FRAGMENT SHADERS
// The Vulkan ones can be re-used with modern GL later if desired, as they're just GLSL.
@ -768,20 +767,4 @@ const char *Bugs::GetBugName(uint32_t bug) {
}
}
const char *PresentModeToString(PresentMode presentMode) {
// All 8 possible cases, with three flags, for simplicity.
switch ((int)presentMode) {
case 0: return "NONE";
case (int)PresentMode::FIFO: return "FIFO";
case (int)PresentMode::IMMEDIATE: return "IMMEDIATE";
case (int)PresentMode::MAILBOX: return "MAILBOX";
case ((int)PresentMode::FIFO | (int)PresentMode::MAILBOX) : return "FIFO|MAILBOX";
case ((int)PresentMode::FIFO | (int)PresentMode::IMMEDIATE) : return "FIFO|IMMEDIATE";
case ((int)PresentMode::MAILBOX | (int)PresentMode::IMMEDIATE) : return "MAILBOX|IMMEDIATE"; // Not gonna happen
case ((int)PresentMode::FIFO | (int)PresentMode::MAILBOX | (int)PresentMode::IMMEDIATE) : return "FIFO|MAILBOX|IMMEDIATE";
default:
return "INVALID";
}
}
} // namespace Draw

View file

@ -18,7 +18,6 @@
#include "Common/GPU/Shader.h"
#include "Common/GPU/MiscTypes.h"
#include "Common/Data/Collections/Slice.h"
#include "Common/Data/Collections/FastVec.h"
namespace Lin {
class Matrix4x4;
@ -231,7 +230,6 @@ enum class GPUVendor {
VENDOR_BROADCOM, // Raspberry
VENDOR_VIVANTE,
VENDOR_APPLE,
VENDOR_MESA,
};
enum class NativeObject {
@ -361,7 +359,7 @@ protected:
class RefCountedObject {
public:
explicit RefCountedObject(const char *name) : name_(name) {
RefCountedObject() {
refcount_ = 1;
}
RefCountedObject(const RefCountedObject &other) = delete;
@ -374,7 +372,6 @@ public:
private:
std::atomic<int> refcount_;
const char * const name_;
};
template <typename T>
@ -431,22 +428,18 @@ struct AutoRef {
class BlendState : public RefCountedObject {
public:
BlendState() : RefCountedObject("BlendState") {}
};
class SamplerState : public RefCountedObject {
public:
SamplerState() : RefCountedObject("SamplerState") {}
};
class DepthStencilState : public RefCountedObject {
public:
DepthStencilState() : RefCountedObject("DepthStencilState") {}
};
class Framebuffer : public RefCountedObject {
public:
Framebuffer() : RefCountedObject("Framebuffer") {}
int Width() { return width_; }
int Height() { return height_; }
int Layers() { return layers_; }
@ -459,55 +452,46 @@ protected:
class Buffer : public RefCountedObject {
public:
Buffer() : RefCountedObject("Buffer") {}
};
class Texture : public RefCountedObject {
public:
Texture() : RefCountedObject("Texture") {}
int Width() const { return width_; }
int Height() const { return height_; }
int Depth() const { return depth_; }
DataFormat Format() const { return format_; }
int Width() { return width_; }
int Height() { return height_; }
int Depth() { return depth_; }
protected:
int width_ = -1, height_ = -1, depth_ = -1;
DataFormat format_ = DataFormat::UNDEFINED;
};
struct BindingDesc {
int stride;
bool instanceRate;
};
struct AttributeDesc {
int binding;
int location; // corresponds to semantic
DataFormat format;
int offset;
};
struct InputLayoutDesc {
int stride;
std::vector<BindingDesc> bindings;
std::vector<AttributeDesc> attributes;
};
class InputLayout : public RefCountedObject {
public:
InputLayout() : RefCountedObject("InputLayout") {}
};
class InputLayout : public RefCountedObject { };
// Uniform types have moved to Shader.h.
class ShaderModule : public RefCountedObject {
public:
ShaderModule() : RefCountedObject("ShaderModule") {}
virtual ShaderStage GetStage() const = 0;
};
class Pipeline : public RefCountedObject {
public:
Pipeline() : RefCountedObject("Pipeline") {}
};
class Pipeline : public RefCountedObject { };
class RasterState : public RefCountedObject {
public:
RasterState() : RefCountedObject("RasterState") {}
};
class RasterState : public RefCountedObject {};
struct StencilSetup {
StencilOp failOp;
@ -566,13 +550,6 @@ struct PipelineDesc {
const Slice<SamplerDef> samplers;
};
enum class PresentMode {
FIFO = 1,
IMMEDIATE = 2,
MAILBOX = 4,
};
ENUM_CLASS_BITOPS(PresentMode);
struct DeviceCaps {
GPUVendor vendor;
uint32_t deviceID; // use caution!
@ -606,8 +583,6 @@ struct DeviceCaps {
bool multiViewSupported;
bool isTilingGPU; // This means that it benefits from correct store-ops, msaa without backing memory, etc.
bool sampleRateShadingSupported;
bool setMaxFrameLatencySupported;
bool textureSwizzleSupported;
bool verySlowShaderCompiler;
@ -617,11 +592,6 @@ struct DeviceCaps {
// Old style, for older GL or Direct3D 9.
u32 clipPlanesSupported;
// Presentation caps
int presentMaxInterval; // 1 on many backends
bool presentInstantModeChange;
PresentMode presentModesSupported;
u32 multiSampleLevelsMask; // Bit n is set if (1 << n) is a valid multisample level. Bit 0 is always set.
std::string deviceName; // The device name to use when creating the thin3d context, to get the same one.
};
@ -683,6 +653,13 @@ enum class DebugFlags {
};
ENUM_CLASS_BITOPS(DebugFlags);
enum class PresentationMode {
FIFO,
FIFO_RELAXED,
IMMEDIATE,
MAILBOX,
};
class DrawContext {
public:
virtual ~DrawContext();
@ -694,9 +671,11 @@ public:
virtual const DeviceCaps &GetDeviceCaps() const = 0;
virtual uint32_t GetDataFormatSupport(DataFormat fmt) const = 0;
virtual std::vector<std::string> GetFeatureList() const { return std::vector<std::string>(); }
virtual std::vector<std::string> GetExtensionList(bool device, bool enabledOnly) const { return std::vector<std::string>(); }
virtual std::vector<std::string> GetExtensionList() const { return std::vector<std::string>(); }
virtual std::vector<std::string> GetDeviceList() const { return std::vector<std::string>(); }
virtual PresentationMode GetPresentationMode() const = 0;
// Describes the primary shader language that this implementation prefers.
const ShaderLanguageDesc &GetShaderLanguageDesc() {
return shaderLanguageDesc_;
@ -707,6 +686,7 @@ public:
virtual void SetErrorCallback(ErrorCallbackFn callback, void *userdata) {}
virtual void DebugAnnotate(const char *annotation) {}
virtual void SetDebugFlags(DebugFlags flags) {}
// Partial pipeline state, used to create pipelines. (in practice, in d3d11 they'll use the native state objects directly).
// TODO: Possibly ditch these and just put the descs directly in PipelineDesc since only D3D11 benefits.
@ -733,11 +713,6 @@ public:
// Copies data from the CPU over into the buffer, at a specific offset. This does not change the size of the buffer and cannot write outside it.
virtual void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) = 0;
// Used to optimize DrawPixels by re-using previously allocated temp textures.
// Do not try to update a texture that might be used by an in-flight command buffer! In OpenGL and D3D, this will cause stalls
// while in Vulkan this might cause various strangeness like image corruption.
virtual void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) = 0;
virtual void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) = 0;
virtual bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) = 0;
@ -781,7 +756,7 @@ public:
virtual void BindSamplerStates(int start, int count, SamplerState **state) = 0;
virtual void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags = TextureBindFlags::NONE) = 0;
virtual void BindVertexBuffer(Buffer *vertexBuffer, int offset) = 0;
virtual void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) = 0;
virtual void BindIndexBuffer(Buffer *indexBuffer, int offset) = 0;
// Sometimes it's necessary to bind a texture not created by thin3d, and use with a thin3d pipeline.
@ -812,13 +787,8 @@ public:
virtual void DrawUP(const void *vdata, int vertexCount) = 0;
// Frame management (for the purposes of sync and resource management, necessary with modern APIs). Default implementations here.
virtual void BeginFrame(DebugFlags debugFlags) = 0;
virtual void BeginFrame() {}
virtual void EndFrame() = 0;
// vblanks is only relevant in FIFO present mode.
// NOTE: Not all backends support vblanks > 1. Some backends also can't change presentation mode immediately.
virtual void Present(PresentMode presentMode, int vblanks) = 0;
virtual void WipeQueue() {}
// This should be avoided as much as possible, in favor of clearing when binding a render target, which is native
@ -846,20 +816,7 @@ public:
// Not very elegant, but more elegant than the old passId hack.
virtual void SetInvalidationCallback(InvalidationCallback callback) = 0;
// Total amount of frames rendered. Unaffected by game pause, so more robust than gpuStats.numFlips
virtual int GetFrameCount() = 0;
virtual std::string GetGpuProfileString() const {
return "";
}
const HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &FrameTimeHistory() const {
return frameTimeHistory_;
}
protected:
HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> frameTimeHistory_;
ShaderModule *vsPresets_[VS_MAX_PRESET];
ShaderModule *fsPresets_[FS_MAX_PRESET];
@ -899,6 +856,4 @@ struct ShaderSource {
ShaderModule *CreateShader(DrawContext *draw, ShaderStage stage, const std::vector<ShaderSource> &sources);
const char *PresentModeToString(PresentMode presentMode);
} // namespace Draw

View file

@ -17,20 +17,20 @@ struct ID3D11Device;
struct ID3D11DeviceContext;
struct ID3D11Device1;
struct ID3D11DeviceContext1;
struct IDXGISwapChain;
#endif
class VulkanContext;
namespace Draw {
DrawContext *T3DCreateGLContext(bool canChangeSwapInterval);
DrawContext *T3DCreateGLContext();
#ifdef _WIN32
DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx);
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, IDXGISwapChain *swapChain, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> adapterNames, int maxInflightFrames);
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> adapterNames);
#endif
DrawContext *T3DCreateVulkanContext(VulkanContext *context, bool useRenderThread);
DrawContext *T3DCreateVulkanContext(VulkanContext *context);
} // namespace Draw

Some files were not shown because too many files have changed in this diff Show more