From 0b290d83d3de79a558b563e8d7024185145e9141 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Tue, 5 Dec 2017 20:39:54 +1100 Subject: [PATCH] [host] Numerous performance improvements and minor bug fixes --- host/Capture/DXGI.cpp | 91 ++++++++++++++++-------------------------- host/Capture/NvFBC.cpp | 32 +++++++++++---- host/Capture/NvFBC.h | 2 + host/CaptureFactory.h | 2 +- host/ICapture.h | 3 ++ host/Service.cpp | 21 +++++++--- host/Util.h | 73 +++++++++++++++++++++++++++++++++ host/main.cpp | 4 +- 8 files changed, 156 insertions(+), 72 deletions(-) diff --git a/host/Capture/DXGI.cpp b/host/Capture/DXGI.cpp index 03c95613..1eb523a7 100644 --- a/host/Capture/DXGI.cpp +++ b/host/Capture/DXGI.cpp @@ -19,6 +19,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include "DXGI.h" using namespace Capture; +#include "Util.h" #include "common\debug.h" #include "common\memcpySSE.h" @@ -360,68 +361,46 @@ bool DXGI::GrabFrame(FrameInfo & frame) { m_pointerVisible = frameInfo.PointerPosition.Visible; m_pointerPos = frameInfo.PointerPosition.Position; - } - // if the pointer is to be drawn - if (m_pointerVisible) - { - const int maxHeight = min(m_shapeInfo.Height, desc.Height - m_pointerPos.y); - const int maxWidth = min(m_shapeInfo.Width , desc.Width - m_pointerPos.x); - - switch (m_shapeInfo.Type) - { - case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR: - { - for(int y = abs(min(0, m_pointerPos.y)); y < maxHeight; ++y) - for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x) - { - BYTE *src = (BYTE *)m_pointer + (m_shapeInfo.Pitch * y) + (x * 4); - BYTE *dst = (BYTE *)rect.pBits + (rect.Pitch * (y + m_pointerPos.y)) + ((x + m_pointerPos.x) * 4); - - const unsigned int alpha = src[3] + 1; - const unsigned int inv = 256 - alpha; - dst[0] = (BYTE)((alpha * src[0] + inv * dst[0]) >> 8); - dst[1] = (BYTE)((alpha * src[1] + inv * dst[1]) >> 8); - dst[2] = (BYTE)((alpha * src[2] + inv * dst[2]) >> 8); - } - break; - } - - case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR: - { - for (int y = abs(min(0, m_pointerPos.y)); y < maxHeight; ++y) - for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x) - { - UINT32 *src = (UINT32 *)m_pointer + ((m_shapeInfo.Pitch/4) * y) + x; - UINT32 *dst = (UINT32 *)rect.pBits + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x); - if (*src & 0xff000000) - *dst = 0xff000000 | (*dst ^ *src); - else *dst = 0xff000000 | *src; - } - break; - } - - case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME: - { - for (int y = abs(min(0, m_pointerPos.y)); y < maxHeight / 2; ++y) - for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x) - { - UINT8 *srcAnd = (UINT8 *)m_pointer + (m_shapeInfo.Pitch * y) + (x/8); - UINT8 *srcXor = srcAnd + m_shapeInfo.Pitch * (m_shapeInfo.Height / 2); - UINT32 *dst = (UINT32 *)rect.pBits + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x); - const BYTE mask = 0x80 >> (x % 8); - const UINT32 andMask = (*srcAnd & mask) ? 0xFFFFFFFF : 0xFF000000; - const UINT32 xorMask = (*srcXor & mask) ? 0x00FFFFFF : 0x00000000; - *dst = (*dst & andMask) ^ xorMask; - } - break; - } - } + frame.hasMousePos = true; + frame.mouseX = m_pointerPos.x; + frame.mouseY = m_pointerPos.y; } memcpySSE(frame.buffer, rect.pBits, frame.outSize); status = surface->Unmap(); + // if the pointer is to be drawn + if (m_pointerVisible) + { + enum CursorType type; + switch (m_shapeInfo.Type) + { + case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR : type = CURSOR_TYPE_COLOR ; break; + case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR: type = CURSOR_TYPE_MASKED_COLOR; break; + case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME : type = CURSOR_TYPE_MONOCHROME ; break; + default: + DEBUG_ERROR("Invalid cursor type"); + return false; + } + + POINT cursorPos; + POINT cursorRect; + cursorPos.x = m_pointerPos.x - m_shapeInfo.HotSpot.x; + cursorPos.y = m_pointerPos.y - m_shapeInfo.HotSpot.y; + cursorRect.x = m_shapeInfo.Width; + cursorRect.y = m_shapeInfo.Height; + + Util::DrawCursor( + type, + m_pointer, + cursorRect, + m_shapeInfo.Pitch, + cursorPos, + frame + ); + } + if (FAILED(status)) { DEBUG_ERROR("Failed to unmap surface: %08x", status); diff --git a/host/Capture/NvFBC.cpp b/host/Capture/NvFBC.cpp index 560d4c64..bd532bb2 100644 --- a/host/Capture/NvFBC.cpp +++ b/host/Capture/NvFBC.cpp @@ -34,6 +34,7 @@ using namespace Capture; NvFBC::NvFBC() : m_options(NULL), m_optNoCrop(false), + m_optNoWait(false), m_initialized(false), m_hDLL(NULL), m_nvFBC(NULL) @@ -51,9 +52,10 @@ bool NvFBC::Initialize(CaptureOptions * options) m_options = options; m_optNoCrop = false; - for (CaptureOptions::const_iterator it = options->begin(); it != options->end(); ++it) + for (CaptureOptions::const_iterator it = options->cbegin(); it != options->cend(); ++it) { if (_strcmpi(*it, "nocrop") == 0) { m_optNoCrop = true; continue; } + if (_strcmpi(*it, "nowait") == 0) { m_optNoWait = true; continue; } } std::string nvfbc = Util::GetSystemRoot() + "\\" + NVFBC_LIBRARY_NAME; @@ -137,9 +139,9 @@ bool NvFBC::Initialize(CaptureOptions * options) setupParams.dwVersion = NVFBC_TOSYS_SETUP_PARAMS_VER; setupParams.eMode = NVFBC_TOSYS_ARGB; setupParams.bWithHWCursor = TRUE; - setupParams.bDiffMap = FALSE; + setupParams.bDiffMap = TRUE; setupParams.ppBuffer = (void **)&m_frameBuffer; - setupParams.ppDiffMap = NULL; + setupParams.ppDiffMap = (void **)&m_diffMap; if (m_nvFBC->NvFBCToSysSetUp(&setupParams) != NVFBC_SUCCESS) { @@ -154,8 +156,8 @@ bool NvFBC::Initialize(CaptureOptions * options) ZeroMemory(&m_grabFrameParams, sizeof(NVFBC_TOSYS_GRAB_FRAME_PARAMS)); ZeroMemory(&m_grabInfo, sizeof(NvFBCFrameGrabInfo)); m_grabFrameParams.dwVersion = NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER; - m_grabFrameParams.dwFlags = NVFBC_TOSYS_NOWAIT; - m_grabFrameParams.dwWaitTime = 100; + m_grabFrameParams.dwFlags = m_optNoWait ? NVFBC_TOSYS_NOWAIT : NVFBC_TOSYS_WAIT_WITH_TIMEOUT; + m_grabFrameParams.dwWaitTime = 1000; m_grabFrameParams.eGMode = NVFBC_TOSYS_SOURCEMODE_FULL; m_grabFrameParams.dwStartX = 0; m_grabFrameParams.dwStartY = 0; @@ -163,7 +165,6 @@ bool NvFBC::Initialize(CaptureOptions * options) m_grabFrameParams.dwTargetHeight = 0; m_grabFrameParams.pNvFBCFrameGrabInfo = &m_grabInfo; - m_initialized = true; return true; } @@ -222,8 +223,23 @@ bool NvFBC::GrabFrame(struct FrameInfo & frame) for(int i = 0; i < 2; ++i) { NVFBCRESULT status = m_nvFBC->NvFBCToSysGrabFrame(&m_grabFrameParams); + if (status == NVFBC_SUCCESS) { + bool hasDiff = false; + for (int r = (m_grabInfo.dwWidth * m_grabInfo.dwHeight) / (128 * 128); r >= 0; --r) + if (*((uint8_t*)m_diffMap + r)) + { + hasDiff = true; + break; + } + + if (!hasDiff) + { + i = 0; + continue; + } + unsigned int dataWidth; unsigned int dataOffset; @@ -232,8 +248,8 @@ bool NvFBC::GrabFrame(struct FrameInfo & frame) dataWidth = m_grabInfo.dwWidth * 4; dataOffset = 0; - frame.width = m_grabInfo.dwWidth; - frame.height = m_grabInfo.dwHeight; + frame.width = m_grabInfo.dwWidth; + frame.height = m_grabInfo.dwHeight; } else { diff --git a/host/Capture/NvFBC.h b/host/Capture/NvFBC.h index ac0a085b..96815742 100644 --- a/host/Capture/NvFBC.h +++ b/host/Capture/NvFBC.h @@ -45,6 +45,7 @@ namespace Capture private: CaptureOptions * m_options; bool m_optNoCrop; + bool m_optNoWait; bool m_initialized; HMODULE m_hDLL; @@ -57,6 +58,7 @@ namespace Capture DWORD m_maxCaptureWidth, m_maxCaptureHeight; NvFBCToSys * m_nvFBC; void * m_frameBuffer; + void * m_diffMap; NvFBCFrameGrabInfo m_grabInfo; NVFBC_TOSYS_GRAB_FRAME_PARAMS m_grabFrameParams; }; diff --git a/host/CaptureFactory.h b/host/CaptureFactory.h index f421811a..0e17af6a 100644 --- a/host/CaptureFactory.h +++ b/host/CaptureFactory.h @@ -71,7 +71,7 @@ public: static ICapture * DetectDevice(CaptureOptions * options) { DeviceList devices = GetDevices(); - for (DeviceList::const_iterator it = devices.begin(); it != devices.end(); ++it) + for (DeviceList::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) { ICapture * device = *it; diff --git a/host/ICapture.h b/host/ICapture.h index 8659c295..c5fbfa58 100644 --- a/host/ICapture.h +++ b/host/ICapture.h @@ -29,6 +29,9 @@ struct FrameInfo void * buffer; size_t bufferSize; size_t outSize; + + bool hasMousePos; + int mouseX, mouseY; }; typedef std::vector CaptureOptions; diff --git a/host/Service.cpp b/host/Service.cpp index 2608f3cb..f0436109 100644 --- a/host/Service.cpp +++ b/host/Service.cpp @@ -133,8 +133,9 @@ bool Service::Process() return false; FrameInfo frame; - frame.buffer = m_frame[m_frameIndex]; - frame.bufferSize = m_frameSize; + frame.buffer = m_frame[m_frameIndex]; + frame.bufferSize = m_frameSize; + frame.hasMousePos = false; // wait for the host to notify that is it is ready to proceed bool eventDone = false; @@ -181,10 +182,18 @@ bool Service::Process() m_header->dataLen = frame.outSize; // tell the host where the cursor is - POINT cursorPos; - GetCursorPos(&cursorPos); - m_header->mouseX = cursorPos.x; - m_header->mouseY = cursorPos.y; + if (frame.hasMousePos) + { + m_header->mouseX = frame.mouseX; + m_header->mouseY = frame.mouseY; + } + else + { + POINT cursorPos; + GetCursorPos(&cursorPos); + m_header->mouseX = cursorPos.x; + m_header->mouseY = cursorPos.y; + } if (!m_ivshmem->RingDoorbell(m_header->hostID, 0)) { diff --git a/host/Util.h b/host/Util.h index d9938800..55bf65cb 100644 --- a/host/Util.h +++ b/host/Util.h @@ -24,6 +24,15 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include "common\debug.h" +enum CursorType +{ + CURSOR_TYPE_COLOR, + CURSOR_TYPE_MONOCHROME, + CURSOR_TYPE_MASKED_COLOR, + CURSOR_TYPE_PACKED_MONOCHROME, + CURSOR_TYPE_PACKED_MASKED_COLOR +}; + class Util { public: @@ -103,4 +112,68 @@ public: _mm_stream_si128((__m128i *)&dest[32], v2); } } + + static void DrawCursor( + const enum CursorType type, + const uint8_t * cursorData, + const POINT cursorRect, + const unsigned int cursorPitch, + const POINT cursorPos, + FrameInfo & frame + ) + { + const int maxHeight = min(cursorRect.y, (int)frame.height - cursorPos.y); + const int maxWidth = min(cursorRect.x, (int)frame.width - cursorPos.x); + + switch (type) + { + case CURSOR_TYPE_COLOR: + { + const unsigned int destPitch = frame.stride * 4; + for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y) + for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x) + { + uint8_t *src = (uint8_t *)cursorData + (cursorPitch * y) + (x * 4); + uint8_t *dst = (uint8_t *)frame.buffer + (destPitch * (y + cursorPos.y)) + ((x + cursorPos.x) * 4); + + const unsigned int alpha = src[3] + 1; + const unsigned int inv = 256 - alpha; + dst[0] = (uint8_t)((alpha * src[0] + inv * dst[0]) >> 8); + dst[1] = (uint8_t)((alpha * src[1] + inv * dst[1]) >> 8); + dst[2] = (uint8_t)((alpha * src[2] + inv * dst[2]) >> 8); + } + break; + } + + case CURSOR_TYPE_MASKED_COLOR: + { + for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y) + for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x) + { + uint32_t *src = (uint32_t *)cursorData + ((cursorPitch / 4) * y) + x; + uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x); + if (*src & 0xff000000) + *dst = 0xff000000 | (*dst ^ *src); + else *dst = 0xff000000 | *src; + } + break; + } + + case CURSOR_TYPE_MONOCHROME: + { + for (int y = abs(min(0, cursorPos.y)); y < maxHeight / 2; ++y) + for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x) + { + uint8_t *srcAnd = (uint8_t *)cursorData + (cursorPitch * y) + (x / 8); + uint8_t *srcXor = srcAnd + cursorPitch * (cursorRect.y / 2); + uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x); + const uint8_t mask = 0x80 >> (x % 8); + const uint32_t andMask = (*srcAnd & mask) ? 0xFFFFFFFF : 0xFF000000; + const uint32_t xorMask = (*srcXor & mask) ? 0x00FFFFFF : 0x00000000; + *dst = (*dst & andMask) ^ xorMask; + } + break; + } + } + } }; \ No newline at end of file diff --git a/host/main.cpp b/host/main.cpp index 84b69580..23cd673b 100644 --- a/host/main.cpp +++ b/host/main.cpp @@ -51,7 +51,9 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR szCmdParam CrashHandler::Initialize(); struct StartupArgs args; - ZeroMemory(&args, sizeof(struct StartupArgs)); + args.foreground = false; + args.captureDevice = NULL; + int ret = parseArgs(args); if (ret == 0) {