Address feedback (except the mailbox refcount)

This commit is contained in:
Henrik Rydgård 2021-06-13 10:16:53 +02:00
parent 1d59560409
commit 81f0c3a8e4
9 changed files with 37 additions and 49 deletions

View file

@ -26,19 +26,14 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
} }
int numTasks = threadMan->GetNumLooperThreads(); int numTasks = threadMan->GetNumLooperThreads();
int range = upper - lower; int range = upper - lower;
if (range <= 0) { if (range <= 0) {
// Bad range. A finished counter allocated. // Nothing to do. A finished counter allocated to keep the API.
return new WaitableCounter(0); return new WaitableCounter(0);
} } else if (range <= minSize) {
// Single background task.
if (range <= numTasks) { WaitableCounter *waitableCounter = new WaitableCounter(1);
// Just assign one task per thread, as many as we have. threadMan->EnqueueTaskOnThread(0, new LoopRangeTask(waitableCounter, loop, lower, upper), TaskType::CPU_COMPUTE);
WaitableCounter *waitableCounter = new WaitableCounter(range);
for (int i = 0; i < range; i++) {
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, i, i + 1), TaskType::CPU_COMPUTE);
}
return waitableCounter; return waitableCounter;
} else { } else {
// Split the range between threads. Allow for some fractional bits. // Split the range between threads. Allow for some fractional bits.
@ -68,7 +63,7 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
} }
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE); threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE);
counter += delta; counter += delta;
if ((counter >> fractionalBits) > upper) { if ((counter >> fractionalBits) >= upper) {
break; break;
} }
} }
@ -78,7 +73,6 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
int stragglerStart = (int)(counter >> fractionalBits); int stragglerStart = (int)(counter >> fractionalBits);
int stragglerEnd = upper; int stragglerEnd = upper;
if (stragglerStart < stragglerEnd) { if (stragglerStart < stragglerEnd) {
// printf("doing stragglers: %d-%d\n", start, upper);
loop(stragglerStart, stragglerEnd); loop(stragglerStart, stragglerEnd);
} }
return waitableCounter; return waitableCounter;
@ -114,14 +108,13 @@ void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t
return; return;
} }
// 128 is the largest cacheline size on common CPUs. // unknown's testing showed that 128kB is an appropriate minimum size.
// Still I suspect that the optimal minSize is a lot higher.
char *d = (char *)dst; char *d = (char *)dst;
char *s = (char *)src; const char *s = (const char *)src;
ParallelRangeLoop(threadMan, [&](int l, int h) { ParallelRangeLoop(threadMan, [&](int l, int h) {
memmove(d + l, s + l, h - l); memmove(d + l, s + l, h - l);
}, 0, (int)bytes, 128); }, 0, (int)bytes, 128 * 1024);
} }
// NOTE: Supports a max of 2GB. // NOTE: Supports a max of 2GB.
@ -132,11 +125,10 @@ void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t b
return; return;
} }
// 128 is the largest cacheline size on common CPUs. // unknown's testing showed that 128kB is an appropriate minimum size.
// Still I suspect that the optimal minSize is a lot higher.
char *d = (char *)dst; char *d = (char *)dst;
ParallelRangeLoop(threadMan, [&](int l, int h) { ParallelRangeLoop(threadMan, [&](int l, int h) {
memset(d + l, value, h - l); memset(d + l, value, h - l);
}, 0, (int)bytes, 128); }, 0, (int)bytes, 128 * 1024);
} }

View file

@ -6,7 +6,7 @@
#include "Common/Thread/ThreadManager.h" #include "Common/Thread/ThreadManager.h"
// Same as the latch from C++21, just counting upwards for no particular reason. // Same as the latch from C++21.
struct WaitableCounter : public Waitable { struct WaitableCounter : public Waitable {
public: public:
WaitableCounter(int count) : count_(count) {} WaitableCounter(int count) : count_(count) {}
@ -25,10 +25,9 @@ public:
void Wait() override { void Wait() override {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
if (count_ == 0) { while (count_ != 0) {
return; cond_.wait(lock);
} }
cond_.wait(lock);
} }
int count_; int count_;
@ -44,6 +43,6 @@ void ParallelRangeLoop(ThreadManager *threadMan, const std::function<void(int, i
// Common utilities for large (!) memory copies. // Common utilities for large (!) memory copies.
// Will only fall back to threads if it seems to make sense. // Will only fall back to threads if it seems to make sense.
// NOTE: These support a max of 2GB.
void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t bytes); void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t bytes);
void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t bytes); void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t bytes);

View file

@ -28,7 +28,7 @@ struct GlobalThreadContext {
std::deque<Task *> queue; std::deque<Task *> queue;
std::vector<ThreadContext *> threads_; std::vector<ThreadContext *> threads_;
int roundRobin; int roundRobin = 0;
}; };
struct ThreadContext { struct ThreadContext {

View file

@ -17,7 +17,6 @@ public:
virtual void Run() = 0; virtual void Run() = 0;
virtual bool Cancellable() { return false; } virtual bool Cancellable() { return false; }
virtual void Cancel() {} virtual void Cancel() {}
virtual float Priority() { return 1.0f; }
virtual uint64_t id() { return 0; } virtual uint64_t id() { return 0; }
}; };
@ -53,8 +52,8 @@ public:
// something meaningful yourself. // something meaningful yourself.
void TryCancelTask(uint64_t id); void TryCancelTask(uint64_t id);
// Parallel loops get to use half the threads, // Parallel loops (assumed compute-limited) get one thread per logical core. We have a few extra threads too
// so we still have some worker threads for other tasks. // for I/O bounds tasks, that can be run concurrently with those.
int GetNumLooperThreads() const; int GetNumLooperThreads() const;
private: private:

View file

@ -16,16 +16,17 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <algorithm> #include <algorithm>
#include "Common/GPU/OpenGL/GLCommon.h"
#include "GPU/Common/TextureScalerCommon.h"
#include "GPU/GLES/TextureScalerGLES.h"
#include "Common/Data/Convert/ColorConv.h" #include "Common/Data/Convert/ColorConv.h"
#include "Common/Log.h" #include "Common/Log.h"
#include "Common/Thread/ParallelLoop.h" #include "Common/Thread/ParallelLoop.h"
#include "Core/ThreadPools.h" #include "Common/GPU/OpenGL/GLCommon.h"
#include "Common/GPU/DataFormat.h" #include "Common/GPU/DataFormat.h"
#include "Core/ThreadPools.h"
#include "GPU/Common/TextureScalerCommon.h"
#include "GPU/GLES/TextureScalerGLES.h"
int TextureScalerGLES::BytesPerPixel(u32 format) { int TextureScalerGLES::BytesPerPixel(u32 format) {
return ((Draw::DataFormat)format == Draw::DataFormat::R8G8B8A8_UNORM) ? 4 : 2; return ((Draw::DataFormat)format == Draw::DataFormat::R8G8B8A8_UNORM) ? 4 : 2;
} }
@ -42,15 +43,15 @@ void TextureScalerGLES::ConvertTo8888(u32 format, u32* source, u32* &dest, int w
break; break;
case Draw::DataFormat::R4G4B4A4_UNORM_PACK16: case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1); ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
break; break;
case Draw::DataFormat::R5G6B5_UNORM_PACK16: case Draw::DataFormat::R5G6B5_UNORM_PACK16:
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1); ParallelRangeLoop(&g_threadManager, std::bind(&convert565_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
break; break;
case Draw::DataFormat::R5G5B5A1_UNORM_PACK16: case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1); ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
break; break;
default: default:

View file

@ -42,23 +42,21 @@ u32 TextureScalerVulkan::Get8888Format() {
} }
void TextureScalerVulkan::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) { void TextureScalerVulkan::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
const int MIN_LINES_PER_THREAD = 4;
switch (format) { switch (format) {
case VULKAN_8888_FORMAT: case VULKAN_8888_FORMAT:
dest = source; // already fine dest = source; // already fine
break; break;
case VULKAN_4444_FORMAT: case VULKAN_4444_FORMAT:
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD); ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
break; break;
case VULKAN_565_FORMAT: case VULKAN_565_FORMAT:
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD); ParallelRangeLoop(&g_threadManager, std::bind(&convert565_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
break; break;
case VULKAN_1555_FORMAT: case VULKAN_1555_FORMAT:
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD); ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
break; break;
default: default:

View file

@ -630,15 +630,6 @@ handleELF:
// INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str()); // INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str());
} }
float Priority() override {
auto fl = info_->GetFileLoader();
if (fl && fl->IsRemote()) {
// Increase the value so remote info loads after non-remote.
return info_->lastAccessedTime + 1000.0f;
}
return info_->lastAccessedTime;
}
private: private:
Path gamePath_; Path gamePath_;
std::shared_ptr<GameInfo> info_; std::shared_ptr<GameInfo> info_;

View file

@ -14,11 +14,13 @@
#include "Common/System/NativeApp.h" #include "Common/System/NativeApp.h"
#include "Common/System/System.h" #include "Common/System/System.h"
#include "Common/CPUDetect.h"
#include "Common/File/VFS/VFS.h" #include "Common/File/VFS/VFS.h"
#include "Common/File/VFS/AssetReader.h" #include "Common/File/VFS/AssetReader.h"
#include "Common/File/FileUtil.h" #include "Common/File/FileUtil.h"
#include "Common/GraphicsContext.h" #include "Common/GraphicsContext.h"
#include "Common/TimeUtil.h" #include "Common/TimeUtil.h"
#include "Common/Thread/ThreadManager.h"
#include "Core/Config.h" #include "Core/Config.h"
#include "Core/ConfigValues.h" #include "Core/ConfigValues.h"
#include "Core/Core.h" #include "Core/Core.h"
@ -330,6 +332,8 @@ int main(int argc, const char* argv[])
if (testFilenames.empty()) if (testFilenames.empty())
return printUsage(argv[0], argc <= 1 ? NULL : "No executables specified"); return printUsage(argv[0], argc <= 1 ? NULL : "No executables specified");
g_threadManager.Init(cpu_info.num_cores, cpu_info.logical_cpu_count);
LogManager::Init(&g_Config.bEnableLogging); LogManager::Init(&g_Config.bEnableLogging);
LogManager *logman = LogManager::GetInstance(); LogManager *logman = LogManager::GetInstance();

View file

@ -49,6 +49,10 @@ bool TestParallelLoop(ThreadManager *threadMan) {
// Try a loop with a relatively large minimum size. // Try a loop with a relatively large minimum size.
printf("blocking test #2 [0-100)\n"); printf("blocking test #2 [0-100)\n");
ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40); ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40);
// Try a loop with minimum size larger than range.
printf("waitable test [10-30)\n");
WaitableCounter *waitable2 = ParallelRangeLoopWaitable(threadMan, rangeFunc, 10, 30, 40);
waitable2->WaitAndRelease();
return true; return true;
} }