[client] reworked polling logic and added GPU magic

SDL_RenderPresent calls SwapBuffers which doesn't block even with vsync enabled until the driver/gpu has buffered several frames, this is no good for our use case where low latency is of ultimate importance. This patch measures the swap interval, ensures the time is consumed and then calls glFinish to ensure that there is no buffered data. The results on NVidia hardware are exceptional, but will need verification on AMD hardware. Polling logic has also been improved, the client will hunt for the best delay rather then resorting to waiting on an event from the guest, avoiding scheduler latency caused by waiting on the irq event.
2026-03-07 00:59:55 +00:00 · 2017-12-07 02:37:46 +11:00
parent 01d4d52355
commit 7349334811
1 changed files with 123 additions and 94 deletions
--- a/client/main.c
+++ b/client/main.c
@@ -162,6 +162,25 @@ inline uint64_t microtime()
  return ((uint64_t)time.tv_sec * 1000000) + time.tv_usec;
 }
 uint64_t detectPresentTime()
 {
  // warm up first as the GPU driver may have multiple buffers
  for(int i = 0; i < 10; ++i)
    SDL_RenderPresent(state.renderer);
  // time 10 iterations and compute the average
  const uint64_t start = microtime();
  for(int i = 0; i < 10; ++i)
    SDL_RenderPresent(state.renderer);
  const uint64_t t = (microtime() - start) / 10;
  // ensure all buffers are flushed
  glFinish();
  DEBUG_INFO("detected: %lu (%f Hz)", t, 1000000.0f / t);
  return t;
 }
 int renderThread(void * unused)
 {
  bool                error = false;
@@ -172,49 +191,62 @@ int renderThread(void * unused)
  SDL_Texture       * textTexture = NULL;
  SDL_Rect            textRect;
-  uint64_t            waitTime    = 0;
+  const uint64_t presentTime = detectPresentTime();
-  uint64_t            presentTime = 0;
+
  uint64_t pollDelay = 0;
  uint64_t pollStep  = 0;
  uint64_t drawStart = 0;
  uint64_t drawTime  = 0;
  uint64_t fpsStart = 0;
  uint64_t fpsTime  = 0;
  while(state.running)
  {
-    glFlush();
+    DEBUG_INFO("delay: %lu", pollDelay);
-
+    usleep(pollDelay);
-    if (waitTime < 30000 && waitTime > 2000)
+    if(header.dataPos == state.shm->dataPos)
      usleep(waitTime - 2000);
    // poll for a new frame
    while(state.running && header.dataPos == state.shm->dataPos)
    {
-      // if the frame is overdue
+      pollStep = 0;
-      if (microtime() - presentTime > waitTime)
+      do
      {
-        enum IVSHMEMWaitResult result = ivshmem_wait_irq(0, (1000/30));
+        ++pollStep;
-        if (result == IVSHMEM_WAIT_RESULT_OK)
+        if (pollDelay + pollStep < 30000)
-          continue;
+          pollDelay += pollStep;
        usleep(1);
      }
      while(header.dataPos == state.shm->dataPos && state.running);
-        if (result == IVSHMEM_WAIT_RESULT_TIMEOUT)
+      if (!state.running)
          break;
        if (result == IVSHMEM_WAIT_RESULT_ERROR)
        {
          DEBUG_ERROR("error during wait for host");
          state.running = false;
        break;
    }
-      }
+    else
    {
      // we were late, step back a chunk
      pollStep += 100;
      if (pollDelay > pollStep)
        pollDelay -= pollStep;
    }
    if (!state.running)
      break;
-    // normally you would never put this into an OpenGL application but because
+    // sleep for the remainder of the presentation time
-    // of our hybrid sleep/poll logic above the GPU has had time to finish up
+    if (frameCount > 0)
-    // anyway. Having this here ensures that the GPU doesn't buffer up
+    {
-    // additional frames if the guest starts to outpace us.
+      drawTime = microtime() - drawStart;
-    glFinish();
+      if (drawTime < presentTime)
      {
        uint64_t delta = presentTime - drawTime;
        if (delta > 1000)
          usleep(delta - 1000);
      }
-    waitTime = microtime() - presentTime;
+      // ensure buffers are flushed
      glFinish();
    }
    drawStart = microtime();
    // we must take a copy of the header, both to let the guest advance and to
    // prevent the contained arguments being abused to overflow buffers
@@ -340,13 +372,8 @@ int renderThread(void * unused)
      break;
    }
-    if (!params.showFPS)
+    if (params.showFPS)
    {
      SDL_RenderPresent(state.renderer);
      presentTime = microtime();
      continue;
    }
      // for now render the frame counter here, we really should
      // move this into the renderers though.
      if (fpsTime > 1000000)
@@ -407,13 +434,15 @@ int renderThread(void * unused)
        glDisable(GL_BLEND);
        SDL_GL_UnbindTexture(textTexture);
      }
    }
    SDL_RenderPresent(state.renderer);
    ++frameCount;
    uint64_t t = microtime();
-    fpsTime    += t - presentTime;
+    fpsTime  += t - fpsStart;
-    presentTime = t;
+    fpsStart  = t;
  }
  if (lgr)