[host] improved latency of multimemcpy with hybrid locking and preempt

This commit is contained in:
Geoffrey McRae 2017-12-29 07:00:27 +11:00
parent f6f4c8070a
commit 2f2813037b
2 changed files with 31 additions and 10 deletions

View File

@ -25,9 +25,9 @@ MultiMemcpy::MultiMemcpy()
{
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
{
m_workers[i].start = CreateSemaphore(NULL, 0, 1, NULL);
m_workers[i].stop = CreateSemaphore(NULL, 0, 1, NULL);
m_semaphores[i] = m_workers[i].stop;
m_workers[i].id = (1 << i);
m_workers[i].running = &m_running;
m_workers[i].start = CreateSemaphore(NULL, 0, 1, NULL);
m_workers[i].thread = CreateThread(0, 0, WorkerFunction, &m_workers[i], 0, NULL);
}
@ -39,22 +39,26 @@ MultiMemcpy::~MultiMemcpy()
{
TerminateThread(m_workers[i].thread, 0);
CloseHandle(m_workers[i].start);
CloseHandle(m_workers[i].stop );
}
}
void MultiMemcpy::Copy(void * dst, void * src, size_t size)
{
if (!m_awake)
Wake();
const size_t block = size / MULTIMEMCPY_THREADS;
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
{
m_workers[i].dst = (uint8_t *)dst + i * block;
m_workers[i].src = (uint8_t *)src + i * block;
m_workers[i].size = (i + 1) * block - i * block;
ReleaseSemaphore(m_workers[i].start, 1, NULL);
}
WaitForMultipleObjects(MULTIMEMCPY_THREADS, m_semaphores, TRUE, INFINITE);
INTERLOCKED_OR8(&m_running, (1 << MULTIMEMCPY_THREADS) - 1);
while(m_running) {}
m_awake = false;
}
DWORD WINAPI MultiMemcpy::WorkerFunction(LPVOID param)
@ -64,7 +68,9 @@ DWORD WINAPI MultiMemcpy::WorkerFunction(LPVOID param)
for(;;)
{
WaitForSingleObject(w->start, INFINITE);
while(!(*w->running & w->id)) {}
memcpySSE(w->dst, w->src, w->size);
ReleaseSemaphore(w->stop, 1, NULL);
INTERLOCKED_AND8(w->running, ~w->id);
}
}

View File

@ -29,19 +29,34 @@ public:
MultiMemcpy();
~MultiMemcpy();
// preempt the copy and wake up the threads early
inline void Wake()
{
if (m_awake)
return;
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
ReleaseSemaphore(m_workers[i].start, 1, NULL);
m_awake = true;
}
void Copy(void * dst, void * src, size_t size);
private:
struct Worker
{
unsigned int id;
volatile char *running;
HANDLE start;
HANDLE stop;
HANDLE thread;
void * dst;
void * src;
size_t size;
};
HANDLE m_semaphores[MULTIMEMCPY_THREADS];
bool m_awake;
volatile char m_running;
struct Worker m_workers[MULTIMEMCPY_THREADS];
static DWORD WINAPI WorkerFunction(LPVOID param);
};