Address feedback (except the mailbox refcount)
This commit is contained in:
parent
1d59560409
commit
81f0c3a8e4
9 changed files with 37 additions and 49 deletions
|
@ -26,19 +26,14 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
|
||||||
}
|
}
|
||||||
|
|
||||||
int numTasks = threadMan->GetNumLooperThreads();
|
int numTasks = threadMan->GetNumLooperThreads();
|
||||||
|
|
||||||
int range = upper - lower;
|
int range = upper - lower;
|
||||||
if (range <= 0) {
|
if (range <= 0) {
|
||||||
// Bad range. A finished counter allocated.
|
// Nothing to do. A finished counter allocated to keep the API.
|
||||||
return new WaitableCounter(0);
|
return new WaitableCounter(0);
|
||||||
}
|
} else if (range <= minSize) {
|
||||||
|
// Single background task.
|
||||||
if (range <= numTasks) {
|
WaitableCounter *waitableCounter = new WaitableCounter(1);
|
||||||
// Just assign one task per thread, as many as we have.
|
threadMan->EnqueueTaskOnThread(0, new LoopRangeTask(waitableCounter, loop, lower, upper), TaskType::CPU_COMPUTE);
|
||||||
WaitableCounter *waitableCounter = new WaitableCounter(range);
|
|
||||||
for (int i = 0; i < range; i++) {
|
|
||||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, i, i + 1), TaskType::CPU_COMPUTE);
|
|
||||||
}
|
|
||||||
return waitableCounter;
|
return waitableCounter;
|
||||||
} else {
|
} else {
|
||||||
// Split the range between threads. Allow for some fractional bits.
|
// Split the range between threads. Allow for some fractional bits.
|
||||||
|
@ -68,7 +63,7 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
|
||||||
}
|
}
|
||||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE);
|
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE);
|
||||||
counter += delta;
|
counter += delta;
|
||||||
if ((counter >> fractionalBits) > upper) {
|
if ((counter >> fractionalBits) >= upper) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -78,7 +73,6 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
|
||||||
int stragglerStart = (int)(counter >> fractionalBits);
|
int stragglerStart = (int)(counter >> fractionalBits);
|
||||||
int stragglerEnd = upper;
|
int stragglerEnd = upper;
|
||||||
if (stragglerStart < stragglerEnd) {
|
if (stragglerStart < stragglerEnd) {
|
||||||
// printf("doing stragglers: %d-%d\n", start, upper);
|
|
||||||
loop(stragglerStart, stragglerEnd);
|
loop(stragglerStart, stragglerEnd);
|
||||||
}
|
}
|
||||||
return waitableCounter;
|
return waitableCounter;
|
||||||
|
@ -114,14 +108,13 @@ void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 128 is the largest cacheline size on common CPUs.
|
// unknown's testing showed that 128kB is an appropriate minimum size.
|
||||||
// Still I suspect that the optimal minSize is a lot higher.
|
|
||||||
|
|
||||||
char *d = (char *)dst;
|
char *d = (char *)dst;
|
||||||
char *s = (char *)src;
|
const char *s = (const char *)src;
|
||||||
ParallelRangeLoop(threadMan, [&](int l, int h) {
|
ParallelRangeLoop(threadMan, [&](int l, int h) {
|
||||||
memmove(d + l, s + l, h - l);
|
memmove(d + l, s + l, h - l);
|
||||||
}, 0, (int)bytes, 128);
|
}, 0, (int)bytes, 128 * 1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: Supports a max of 2GB.
|
// NOTE: Supports a max of 2GB.
|
||||||
|
@ -132,11 +125,10 @@ void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t b
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 128 is the largest cacheline size on common CPUs.
|
// unknown's testing showed that 128kB is an appropriate minimum size.
|
||||||
// Still I suspect that the optimal minSize is a lot higher.
|
|
||||||
|
|
||||||
char *d = (char *)dst;
|
char *d = (char *)dst;
|
||||||
ParallelRangeLoop(threadMan, [&](int l, int h) {
|
ParallelRangeLoop(threadMan, [&](int l, int h) {
|
||||||
memset(d + l, value, h - l);
|
memset(d + l, value, h - l);
|
||||||
}, 0, (int)bytes, 128);
|
}, 0, (int)bytes, 128 * 1024);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
#include "Common/Thread/ThreadManager.h"
|
#include "Common/Thread/ThreadManager.h"
|
||||||
|
|
||||||
// Same as the latch from C++21, just counting upwards for no particular reason.
|
// Same as the latch from C++21.
|
||||||
struct WaitableCounter : public Waitable {
|
struct WaitableCounter : public Waitable {
|
||||||
public:
|
public:
|
||||||
WaitableCounter(int count) : count_(count) {}
|
WaitableCounter(int count) : count_(count) {}
|
||||||
|
@ -25,10 +25,9 @@ public:
|
||||||
|
|
||||||
void Wait() override {
|
void Wait() override {
|
||||||
std::unique_lock<std::mutex> lock(mutex_);
|
std::unique_lock<std::mutex> lock(mutex_);
|
||||||
if (count_ == 0) {
|
while (count_ != 0) {
|
||||||
return;
|
cond_.wait(lock);
|
||||||
}
|
}
|
||||||
cond_.wait(lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int count_;
|
int count_;
|
||||||
|
@ -44,6 +43,6 @@ void ParallelRangeLoop(ThreadManager *threadMan, const std::function<void(int, i
|
||||||
|
|
||||||
// Common utilities for large (!) memory copies.
|
// Common utilities for large (!) memory copies.
|
||||||
// Will only fall back to threads if it seems to make sense.
|
// Will only fall back to threads if it seems to make sense.
|
||||||
|
// NOTE: These support a max of 2GB.
|
||||||
void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t bytes);
|
void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t bytes);
|
||||||
void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t bytes);
|
void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t bytes);
|
||||||
|
|
|
@ -28,7 +28,7 @@ struct GlobalThreadContext {
|
||||||
std::deque<Task *> queue;
|
std::deque<Task *> queue;
|
||||||
std::vector<ThreadContext *> threads_;
|
std::vector<ThreadContext *> threads_;
|
||||||
|
|
||||||
int roundRobin;
|
int roundRobin = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ThreadContext {
|
struct ThreadContext {
|
||||||
|
|
|
@ -17,7 +17,6 @@ public:
|
||||||
virtual void Run() = 0;
|
virtual void Run() = 0;
|
||||||
virtual bool Cancellable() { return false; }
|
virtual bool Cancellable() { return false; }
|
||||||
virtual void Cancel() {}
|
virtual void Cancel() {}
|
||||||
virtual float Priority() { return 1.0f; }
|
|
||||||
virtual uint64_t id() { return 0; }
|
virtual uint64_t id() { return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -53,8 +52,8 @@ public:
|
||||||
// something meaningful yourself.
|
// something meaningful yourself.
|
||||||
void TryCancelTask(uint64_t id);
|
void TryCancelTask(uint64_t id);
|
||||||
|
|
||||||
// Parallel loops get to use half the threads,
|
// Parallel loops (assumed compute-limited) get one thread per logical core. We have a few extra threads too
|
||||||
// so we still have some worker threads for other tasks.
|
// for I/O bounds tasks, that can be run concurrently with those.
|
||||||
int GetNumLooperThreads() const;
|
int GetNumLooperThreads() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -16,16 +16,17 @@
|
||||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "Common/GPU/OpenGL/GLCommon.h"
|
|
||||||
|
|
||||||
#include "GPU/Common/TextureScalerCommon.h"
|
|
||||||
#include "GPU/GLES/TextureScalerGLES.h"
|
|
||||||
#include "Common/Data/Convert/ColorConv.h"
|
#include "Common/Data/Convert/ColorConv.h"
|
||||||
#include "Common/Log.h"
|
#include "Common/Log.h"
|
||||||
#include "Common/Thread/ParallelLoop.h"
|
#include "Common/Thread/ParallelLoop.h"
|
||||||
#include "Core/ThreadPools.h"
|
#include "Common/GPU/OpenGL/GLCommon.h"
|
||||||
#include "Common/GPU/DataFormat.h"
|
#include "Common/GPU/DataFormat.h"
|
||||||
|
|
||||||
|
#include "Core/ThreadPools.h"
|
||||||
|
#include "GPU/Common/TextureScalerCommon.h"
|
||||||
|
#include "GPU/GLES/TextureScalerGLES.h"
|
||||||
|
|
||||||
int TextureScalerGLES::BytesPerPixel(u32 format) {
|
int TextureScalerGLES::BytesPerPixel(u32 format) {
|
||||||
return ((Draw::DataFormat)format == Draw::DataFormat::R8G8B8A8_UNORM) ? 4 : 2;
|
return ((Draw::DataFormat)format == Draw::DataFormat::R8G8B8A8_UNORM) ? 4 : 2;
|
||||||
}
|
}
|
||||||
|
@ -42,15 +43,15 @@ void TextureScalerGLES::ConvertTo8888(u32 format, u32* source, u32* &dest, int w
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
|
case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
|
||||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1);
|
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Draw::DataFormat::R5G6B5_UNORM_PACK16:
|
case Draw::DataFormat::R5G6B5_UNORM_PACK16:
|
||||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1);
|
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
|
case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
|
||||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1);
|
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -42,23 +42,21 @@ u32 TextureScalerVulkan::Get8888Format() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureScalerVulkan::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
|
void TextureScalerVulkan::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
|
||||||
const int MIN_LINES_PER_THREAD = 4;
|
|
||||||
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case VULKAN_8888_FORMAT:
|
case VULKAN_8888_FORMAT:
|
||||||
dest = source; // already fine
|
dest = source; // already fine
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VULKAN_4444_FORMAT:
|
case VULKAN_4444_FORMAT:
|
||||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD);
|
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VULKAN_565_FORMAT:
|
case VULKAN_565_FORMAT:
|
||||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD);
|
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VULKAN_1555_FORMAT:
|
case VULKAN_1555_FORMAT:
|
||||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD);
|
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -630,15 +630,6 @@ handleELF:
|
||||||
// INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str());
|
// INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
float Priority() override {
|
|
||||||
auto fl = info_->GetFileLoader();
|
|
||||||
if (fl && fl->IsRemote()) {
|
|
||||||
// Increase the value so remote info loads after non-remote.
|
|
||||||
return info_->lastAccessedTime + 1000.0f;
|
|
||||||
}
|
|
||||||
return info_->lastAccessedTime;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Path gamePath_;
|
Path gamePath_;
|
||||||
std::shared_ptr<GameInfo> info_;
|
std::shared_ptr<GameInfo> info_;
|
||||||
|
|
|
@ -14,11 +14,13 @@
|
||||||
#include "Common/System/NativeApp.h"
|
#include "Common/System/NativeApp.h"
|
||||||
#include "Common/System/System.h"
|
#include "Common/System/System.h"
|
||||||
|
|
||||||
|
#include "Common/CPUDetect.h"
|
||||||
#include "Common/File/VFS/VFS.h"
|
#include "Common/File/VFS/VFS.h"
|
||||||
#include "Common/File/VFS/AssetReader.h"
|
#include "Common/File/VFS/AssetReader.h"
|
||||||
#include "Common/File/FileUtil.h"
|
#include "Common/File/FileUtil.h"
|
||||||
#include "Common/GraphicsContext.h"
|
#include "Common/GraphicsContext.h"
|
||||||
#include "Common/TimeUtil.h"
|
#include "Common/TimeUtil.h"
|
||||||
|
#include "Common/Thread/ThreadManager.h"
|
||||||
#include "Core/Config.h"
|
#include "Core/Config.h"
|
||||||
#include "Core/ConfigValues.h"
|
#include "Core/ConfigValues.h"
|
||||||
#include "Core/Core.h"
|
#include "Core/Core.h"
|
||||||
|
@ -330,6 +332,8 @@ int main(int argc, const char* argv[])
|
||||||
if (testFilenames.empty())
|
if (testFilenames.empty())
|
||||||
return printUsage(argv[0], argc <= 1 ? NULL : "No executables specified");
|
return printUsage(argv[0], argc <= 1 ? NULL : "No executables specified");
|
||||||
|
|
||||||
|
g_threadManager.Init(cpu_info.num_cores, cpu_info.logical_cpu_count);
|
||||||
|
|
||||||
LogManager::Init(&g_Config.bEnableLogging);
|
LogManager::Init(&g_Config.bEnableLogging);
|
||||||
LogManager *logman = LogManager::GetInstance();
|
LogManager *logman = LogManager::GetInstance();
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,10 @@ bool TestParallelLoop(ThreadManager *threadMan) {
|
||||||
// Try a loop with a relatively large minimum size.
|
// Try a loop with a relatively large minimum size.
|
||||||
printf("blocking test #2 [0-100)\n");
|
printf("blocking test #2 [0-100)\n");
|
||||||
ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40);
|
ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40);
|
||||||
|
// Try a loop with minimum size larger than range.
|
||||||
|
printf("waitable test [10-30)\n");
|
||||||
|
WaitableCounter *waitable2 = ParallelRangeLoopWaitable(threadMan, rangeFunc, 10, 30, 40);
|
||||||
|
waitable2->WaitAndRelease();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue