[host] Numerous performance improvements and minor bug fixes

This commit is contained in:
Geoffrey McRae 2017-12-05 20:39:54 +11:00
parent c1a82e853d
commit 0b290d83d3
8 changed files with 156 additions and 72 deletions

View File

@ -19,6 +19,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include "DXGI.h" #include "DXGI.h"
using namespace Capture; using namespace Capture;
#include "Util.h"
#include "common\debug.h" #include "common\debug.h"
#include "common\memcpySSE.h" #include "common\memcpySSE.h"
@ -360,68 +361,46 @@ bool DXGI::GrabFrame(FrameInfo & frame)
{ {
m_pointerVisible = frameInfo.PointerPosition.Visible; m_pointerVisible = frameInfo.PointerPosition.Visible;
m_pointerPos = frameInfo.PointerPosition.Position; m_pointerPos = frameInfo.PointerPosition.Position;
}
// if the pointer is to be drawn frame.hasMousePos = true;
if (m_pointerVisible) frame.mouseX = m_pointerPos.x;
{ frame.mouseY = m_pointerPos.y;
const int maxHeight = min(m_shapeInfo.Height, desc.Height - m_pointerPos.y);
const int maxWidth = min(m_shapeInfo.Width , desc.Width - m_pointerPos.x);
switch (m_shapeInfo.Type)
{
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR:
{
for(int y = abs(min(0, m_pointerPos.y)); y < maxHeight; ++y)
for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x)
{
BYTE *src = (BYTE *)m_pointer + (m_shapeInfo.Pitch * y) + (x * 4);
BYTE *dst = (BYTE *)rect.pBits + (rect.Pitch * (y + m_pointerPos.y)) + ((x + m_pointerPos.x) * 4);
const unsigned int alpha = src[3] + 1;
const unsigned int inv = 256 - alpha;
dst[0] = (BYTE)((alpha * src[0] + inv * dst[0]) >> 8);
dst[1] = (BYTE)((alpha * src[1] + inv * dst[1]) >> 8);
dst[2] = (BYTE)((alpha * src[2] + inv * dst[2]) >> 8);
}
break;
}
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR:
{
for (int y = abs(min(0, m_pointerPos.y)); y < maxHeight; ++y)
for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x)
{
UINT32 *src = (UINT32 *)m_pointer + ((m_shapeInfo.Pitch/4) * y) + x;
UINT32 *dst = (UINT32 *)rect.pBits + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x);
if (*src & 0xff000000)
*dst = 0xff000000 | (*dst ^ *src);
else *dst = 0xff000000 | *src;
}
break;
}
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME:
{
for (int y = abs(min(0, m_pointerPos.y)); y < maxHeight / 2; ++y)
for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x)
{
UINT8 *srcAnd = (UINT8 *)m_pointer + (m_shapeInfo.Pitch * y) + (x/8);
UINT8 *srcXor = srcAnd + m_shapeInfo.Pitch * (m_shapeInfo.Height / 2);
UINT32 *dst = (UINT32 *)rect.pBits + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x);
const BYTE mask = 0x80 >> (x % 8);
const UINT32 andMask = (*srcAnd & mask) ? 0xFFFFFFFF : 0xFF000000;
const UINT32 xorMask = (*srcXor & mask) ? 0x00FFFFFF : 0x00000000;
*dst = (*dst & andMask) ^ xorMask;
}
break;
}
}
} }
memcpySSE(frame.buffer, rect.pBits, frame.outSize); memcpySSE(frame.buffer, rect.pBits, frame.outSize);
status = surface->Unmap(); status = surface->Unmap();
// if the pointer is to be drawn
if (m_pointerVisible)
{
enum CursorType type;
switch (m_shapeInfo.Type)
{
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_COLOR : type = CURSOR_TYPE_COLOR ; break;
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MASKED_COLOR: type = CURSOR_TYPE_MASKED_COLOR; break;
case DXGI_OUTDUPL_POINTER_SHAPE_TYPE_MONOCHROME : type = CURSOR_TYPE_MONOCHROME ; break;
default:
DEBUG_ERROR("Invalid cursor type");
return false;
}
POINT cursorPos;
POINT cursorRect;
cursorPos.x = m_pointerPos.x - m_shapeInfo.HotSpot.x;
cursorPos.y = m_pointerPos.y - m_shapeInfo.HotSpot.y;
cursorRect.x = m_shapeInfo.Width;
cursorRect.y = m_shapeInfo.Height;
Util::DrawCursor(
type,
m_pointer,
cursorRect,
m_shapeInfo.Pitch,
cursorPos,
frame
);
}
if (FAILED(status)) if (FAILED(status))
{ {
DEBUG_ERROR("Failed to unmap surface: %08x", status); DEBUG_ERROR("Failed to unmap surface: %08x", status);

View File

@ -34,6 +34,7 @@ using namespace Capture;
NvFBC::NvFBC() : NvFBC::NvFBC() :
m_options(NULL), m_options(NULL),
m_optNoCrop(false), m_optNoCrop(false),
m_optNoWait(false),
m_initialized(false), m_initialized(false),
m_hDLL(NULL), m_hDLL(NULL),
m_nvFBC(NULL) m_nvFBC(NULL)
@ -51,9 +52,10 @@ bool NvFBC::Initialize(CaptureOptions * options)
m_options = options; m_options = options;
m_optNoCrop = false; m_optNoCrop = false;
for (CaptureOptions::const_iterator it = options->begin(); it != options->end(); ++it) for (CaptureOptions::const_iterator it = options->cbegin(); it != options->cend(); ++it)
{ {
if (_strcmpi(*it, "nocrop") == 0) { m_optNoCrop = true; continue; } if (_strcmpi(*it, "nocrop") == 0) { m_optNoCrop = true; continue; }
if (_strcmpi(*it, "nowait") == 0) { m_optNoWait = true; continue; }
} }
std::string nvfbc = Util::GetSystemRoot() + "\\" + NVFBC_LIBRARY_NAME; std::string nvfbc = Util::GetSystemRoot() + "\\" + NVFBC_LIBRARY_NAME;
@ -137,9 +139,9 @@ bool NvFBC::Initialize(CaptureOptions * options)
setupParams.dwVersion = NVFBC_TOSYS_SETUP_PARAMS_VER; setupParams.dwVersion = NVFBC_TOSYS_SETUP_PARAMS_VER;
setupParams.eMode = NVFBC_TOSYS_ARGB; setupParams.eMode = NVFBC_TOSYS_ARGB;
setupParams.bWithHWCursor = TRUE; setupParams.bWithHWCursor = TRUE;
setupParams.bDiffMap = FALSE; setupParams.bDiffMap = TRUE;
setupParams.ppBuffer = (void **)&m_frameBuffer; setupParams.ppBuffer = (void **)&m_frameBuffer;
setupParams.ppDiffMap = NULL; setupParams.ppDiffMap = (void **)&m_diffMap;
if (m_nvFBC->NvFBCToSysSetUp(&setupParams) != NVFBC_SUCCESS) if (m_nvFBC->NvFBCToSysSetUp(&setupParams) != NVFBC_SUCCESS)
{ {
@ -154,8 +156,8 @@ bool NvFBC::Initialize(CaptureOptions * options)
ZeroMemory(&m_grabFrameParams, sizeof(NVFBC_TOSYS_GRAB_FRAME_PARAMS)); ZeroMemory(&m_grabFrameParams, sizeof(NVFBC_TOSYS_GRAB_FRAME_PARAMS));
ZeroMemory(&m_grabInfo, sizeof(NvFBCFrameGrabInfo)); ZeroMemory(&m_grabInfo, sizeof(NvFBCFrameGrabInfo));
m_grabFrameParams.dwVersion = NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER; m_grabFrameParams.dwVersion = NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER;
m_grabFrameParams.dwFlags = NVFBC_TOSYS_NOWAIT; m_grabFrameParams.dwFlags = m_optNoWait ? NVFBC_TOSYS_NOWAIT : NVFBC_TOSYS_WAIT_WITH_TIMEOUT;
m_grabFrameParams.dwWaitTime = 100; m_grabFrameParams.dwWaitTime = 1000;
m_grabFrameParams.eGMode = NVFBC_TOSYS_SOURCEMODE_FULL; m_grabFrameParams.eGMode = NVFBC_TOSYS_SOURCEMODE_FULL;
m_grabFrameParams.dwStartX = 0; m_grabFrameParams.dwStartX = 0;
m_grabFrameParams.dwStartY = 0; m_grabFrameParams.dwStartY = 0;
@ -163,7 +165,6 @@ bool NvFBC::Initialize(CaptureOptions * options)
m_grabFrameParams.dwTargetHeight = 0; m_grabFrameParams.dwTargetHeight = 0;
m_grabFrameParams.pNvFBCFrameGrabInfo = &m_grabInfo; m_grabFrameParams.pNvFBCFrameGrabInfo = &m_grabInfo;
m_initialized = true; m_initialized = true;
return true; return true;
} }
@ -222,8 +223,23 @@ bool NvFBC::GrabFrame(struct FrameInfo & frame)
for(int i = 0; i < 2; ++i) for(int i = 0; i < 2; ++i)
{ {
NVFBCRESULT status = m_nvFBC->NvFBCToSysGrabFrame(&m_grabFrameParams); NVFBCRESULT status = m_nvFBC->NvFBCToSysGrabFrame(&m_grabFrameParams);
if (status == NVFBC_SUCCESS) if (status == NVFBC_SUCCESS)
{ {
bool hasDiff = false;
for (int r = (m_grabInfo.dwWidth * m_grabInfo.dwHeight) / (128 * 128); r >= 0; --r)
if (*((uint8_t*)m_diffMap + r))
{
hasDiff = true;
break;
}
if (!hasDiff)
{
i = 0;
continue;
}
unsigned int dataWidth; unsigned int dataWidth;
unsigned int dataOffset; unsigned int dataOffset;
@ -232,8 +248,8 @@ bool NvFBC::GrabFrame(struct FrameInfo & frame)
dataWidth = m_grabInfo.dwWidth * 4; dataWidth = m_grabInfo.dwWidth * 4;
dataOffset = 0; dataOffset = 0;
frame.width = m_grabInfo.dwWidth; frame.width = m_grabInfo.dwWidth;
frame.height = m_grabInfo.dwHeight; frame.height = m_grabInfo.dwHeight;
} }
else else
{ {

View File

@ -45,6 +45,7 @@ namespace Capture
private: private:
CaptureOptions * m_options; CaptureOptions * m_options;
bool m_optNoCrop; bool m_optNoCrop;
bool m_optNoWait;
bool m_initialized; bool m_initialized;
HMODULE m_hDLL; HMODULE m_hDLL;
@ -57,6 +58,7 @@ namespace Capture
DWORD m_maxCaptureWidth, m_maxCaptureHeight; DWORD m_maxCaptureWidth, m_maxCaptureHeight;
NvFBCToSys * m_nvFBC; NvFBCToSys * m_nvFBC;
void * m_frameBuffer; void * m_frameBuffer;
void * m_diffMap;
NvFBCFrameGrabInfo m_grabInfo; NvFBCFrameGrabInfo m_grabInfo;
NVFBC_TOSYS_GRAB_FRAME_PARAMS m_grabFrameParams; NVFBC_TOSYS_GRAB_FRAME_PARAMS m_grabFrameParams;
}; };

View File

@ -71,7 +71,7 @@ public:
static ICapture * DetectDevice(CaptureOptions * options) static ICapture * DetectDevice(CaptureOptions * options)
{ {
DeviceList devices = GetDevices(); DeviceList devices = GetDevices();
for (DeviceList::const_iterator it = devices.begin(); it != devices.end(); ++it) for (DeviceList::const_iterator it = devices.cbegin(); it != devices.cend(); ++it)
{ {
ICapture * device = *it; ICapture * device = *it;

View File

@ -29,6 +29,9 @@ struct FrameInfo
void * buffer; void * buffer;
size_t bufferSize; size_t bufferSize;
size_t outSize; size_t outSize;
bool hasMousePos;
int mouseX, mouseY;
}; };
typedef std::vector<const char *> CaptureOptions; typedef std::vector<const char *> CaptureOptions;

View File

@ -133,8 +133,9 @@ bool Service::Process()
return false; return false;
FrameInfo frame; FrameInfo frame;
frame.buffer = m_frame[m_frameIndex]; frame.buffer = m_frame[m_frameIndex];
frame.bufferSize = m_frameSize; frame.bufferSize = m_frameSize;
frame.hasMousePos = false;
// wait for the host to notify that is it is ready to proceed // wait for the host to notify that is it is ready to proceed
bool eventDone = false; bool eventDone = false;
@ -181,10 +182,18 @@ bool Service::Process()
m_header->dataLen = frame.outSize; m_header->dataLen = frame.outSize;
// tell the host where the cursor is // tell the host where the cursor is
POINT cursorPos; if (frame.hasMousePos)
GetCursorPos(&cursorPos); {
m_header->mouseX = cursorPos.x; m_header->mouseX = frame.mouseX;
m_header->mouseY = cursorPos.y; m_header->mouseY = frame.mouseY;
}
else
{
POINT cursorPos;
GetCursorPos(&cursorPos);
m_header->mouseX = cursorPos.x;
m_header->mouseY = cursorPos.y;
}
if (!m_ivshmem->RingDoorbell(m_header->hostID, 0)) if (!m_ivshmem->RingDoorbell(m_header->hostID, 0))
{ {

View File

@ -24,6 +24,15 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include "common\debug.h" #include "common\debug.h"
enum CursorType
{
CURSOR_TYPE_COLOR,
CURSOR_TYPE_MONOCHROME,
CURSOR_TYPE_MASKED_COLOR,
CURSOR_TYPE_PACKED_MONOCHROME,
CURSOR_TYPE_PACKED_MASKED_COLOR
};
class Util class Util
{ {
public: public:
@ -103,4 +112,68 @@ public:
_mm_stream_si128((__m128i *)&dest[32], v2); _mm_stream_si128((__m128i *)&dest[32], v2);
} }
} }
static void DrawCursor(
const enum CursorType type,
const uint8_t * cursorData,
const POINT cursorRect,
const unsigned int cursorPitch,
const POINT cursorPos,
FrameInfo & frame
)
{
const int maxHeight = min(cursorRect.y, (int)frame.height - cursorPos.y);
const int maxWidth = min(cursorRect.x, (int)frame.width - cursorPos.x);
switch (type)
{
case CURSOR_TYPE_COLOR:
{
const unsigned int destPitch = frame.stride * 4;
for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y)
for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x)
{
uint8_t *src = (uint8_t *)cursorData + (cursorPitch * y) + (x * 4);
uint8_t *dst = (uint8_t *)frame.buffer + (destPitch * (y + cursorPos.y)) + ((x + cursorPos.x) * 4);
const unsigned int alpha = src[3] + 1;
const unsigned int inv = 256 - alpha;
dst[0] = (uint8_t)((alpha * src[0] + inv * dst[0]) >> 8);
dst[1] = (uint8_t)((alpha * src[1] + inv * dst[1]) >> 8);
dst[2] = (uint8_t)((alpha * src[2] + inv * dst[2]) >> 8);
}
break;
}
case CURSOR_TYPE_MASKED_COLOR:
{
for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y)
for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x)
{
uint32_t *src = (uint32_t *)cursorData + ((cursorPitch / 4) * y) + x;
uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x);
if (*src & 0xff000000)
*dst = 0xff000000 | (*dst ^ *src);
else *dst = 0xff000000 | *src;
}
break;
}
case CURSOR_TYPE_MONOCHROME:
{
for (int y = abs(min(0, cursorPos.y)); y < maxHeight / 2; ++y)
for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x)
{
uint8_t *srcAnd = (uint8_t *)cursorData + (cursorPitch * y) + (x / 8);
uint8_t *srcXor = srcAnd + cursorPitch * (cursorRect.y / 2);
uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x);
const uint8_t mask = 0x80 >> (x % 8);
const uint32_t andMask = (*srcAnd & mask) ? 0xFFFFFFFF : 0xFF000000;
const uint32_t xorMask = (*srcXor & mask) ? 0x00FFFFFF : 0x00000000;
*dst = (*dst & andMask) ^ xorMask;
}
break;
}
}
}
}; };

View File

@ -51,7 +51,9 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR szCmdParam
CrashHandler::Initialize(); CrashHandler::Initialize();
struct StartupArgs args; struct StartupArgs args;
ZeroMemory(&args, sizeof(struct StartupArgs)); args.foreground = false;
args.captureDevice = NULL;
int ret = parseArgs(args); int ret = parseArgs(args);
if (ret == 0) if (ret == 0)
{ {