[client] reworked polling logic and added GPU magic

SDL_RenderPresent calls SwapBuffers which doesn't block even with vsync
enabled until the driver/gpu has buffered several frames, this is no good
for our use case where low latency is of ultimate importance. This patch
measures the swap interval, ensures the time is consumed and then calls
glFinish to ensure that there is no buffered data. The results on NVidia
hardware are exceptional, but will need verification on AMD hardware.

Polling logic has also been improved, the client will hunt for the best
delay rather then resorting to waiting on an event from the guest, avoiding
scheduler latency caused by waiting on the irq event.
This commit is contained in:
Geoffrey McRae 2017-12-07 02:37:46 +11:00
parent 01d4d52355
commit 7349334811

View File

@ -162,6 +162,25 @@ inline uint64_t microtime()
return ((uint64_t)time.tv_sec * 1000000) + time.tv_usec; return ((uint64_t)time.tv_sec * 1000000) + time.tv_usec;
} }
uint64_t detectPresentTime()
{
// warm up first as the GPU driver may have multiple buffers
for(int i = 0; i < 10; ++i)
SDL_RenderPresent(state.renderer);
// time 10 iterations and compute the average
const uint64_t start = microtime();
for(int i = 0; i < 10; ++i)
SDL_RenderPresent(state.renderer);
const uint64_t t = (microtime() - start) / 10;
// ensure all buffers are flushed
glFinish();
DEBUG_INFO("detected: %lu (%f Hz)", t, 1000000.0f / t);
return t;
}
int renderThread(void * unused) int renderThread(void * unused)
{ {
bool error = false; bool error = false;
@ -172,49 +191,62 @@ int renderThread(void * unused)
SDL_Texture * textTexture = NULL; SDL_Texture * textTexture = NULL;
SDL_Rect textRect; SDL_Rect textRect;
uint64_t waitTime = 0; const uint64_t presentTime = detectPresentTime();
uint64_t presentTime = 0;
uint64_t pollDelay = 0;
uint64_t pollStep = 0;
uint64_t drawStart = 0;
uint64_t drawTime = 0;
uint64_t fpsStart = 0;
uint64_t fpsTime = 0; uint64_t fpsTime = 0;
while(state.running) while(state.running)
{ {
glFlush(); DEBUG_INFO("delay: %lu", pollDelay);
usleep(pollDelay);
if (waitTime < 30000 && waitTime > 2000) if(header.dataPos == state.shm->dataPos)
usleep(waitTime - 2000);
// poll for a new frame
while(state.running && header.dataPos == state.shm->dataPos)
{ {
// if the frame is overdue pollStep = 0;
if (microtime() - presentTime > waitTime) do
{ {
enum IVSHMEMWaitResult result = ivshmem_wait_irq(0, (1000/30)); ++pollStep;
if (result == IVSHMEM_WAIT_RESULT_OK) if (pollDelay + pollStep < 30000)
continue; pollDelay += pollStep;
usleep(1);
}
while(header.dataPos == state.shm->dataPos && state.running);
if (result == IVSHMEM_WAIT_RESULT_TIMEOUT) if (!state.running)
break;
if (result == IVSHMEM_WAIT_RESULT_ERROR)
{
DEBUG_ERROR("error during wait for host");
state.running = false;
break; break;
} }
} else
{
// we were late, step back a chunk
pollStep += 100;
if (pollDelay > pollStep)
pollDelay -= pollStep;
} }
if (!state.running) if (!state.running)
break; break;
// normally you would never put this into an OpenGL application but because // sleep for the remainder of the presentation time
// of our hybrid sleep/poll logic above the GPU has had time to finish up if (frameCount > 0)
// anyway. Having this here ensures that the GPU doesn't buffer up {
// additional frames if the guest starts to outpace us. drawTime = microtime() - drawStart;
glFinish(); if (drawTime < presentTime)
{
uint64_t delta = presentTime - drawTime;
if (delta > 1000)
usleep(delta - 1000);
}
waitTime = microtime() - presentTime; // ensure buffers are flushed
glFinish();
}
drawStart = microtime();
// we must take a copy of the header, both to let the guest advance and to // we must take a copy of the header, both to let the guest advance and to
// prevent the contained arguments being abused to overflow buffers // prevent the contained arguments being abused to overflow buffers
@ -340,13 +372,8 @@ int renderThread(void * unused)
break; break;
} }
if (!params.showFPS) if (params.showFPS)
{ {
SDL_RenderPresent(state.renderer);
presentTime = microtime();
continue;
}
// for now render the frame counter here, we really should // for now render the frame counter here, we really should
// move this into the renderers though. // move this into the renderers though.
if (fpsTime > 1000000) if (fpsTime > 1000000)
@ -407,13 +434,15 @@ int renderThread(void * unused)
glDisable(GL_BLEND); glDisable(GL_BLEND);
SDL_GL_UnbindTexture(textTexture); SDL_GL_UnbindTexture(textTexture);
} }
}
SDL_RenderPresent(state.renderer); SDL_RenderPresent(state.renderer);
++frameCount; ++frameCount;
uint64_t t = microtime(); uint64_t t = microtime();
fpsTime += t - presentTime; fpsTime += t - fpsStart;
presentTime = t; fpsStart = t;
} }
if (lgr) if (lgr)