[host] added multi-threaded memcopy for high resolutions

This commit is contained in:
Geoffrey McRae 2017-12-21 13:48:57 +11:00
parent a08aad8009
commit 43b096a5e7
8 changed files with 140 additions and 76 deletions

View File

@ -21,7 +21,6 @@ Place, Suite 330, Boston, MA 02111-1307 USA
using namespace Capture; using namespace Capture;
#include "common/debug.h" #include "common/debug.h"
#include "common/memcpySSE.h"
DXGI::DXGI() : DXGI::DXGI() :
m_options(NULL), m_options(NULL),
@ -33,11 +32,11 @@ DXGI::DXGI() :
m_texture(), m_texture(),
m_pointer(NULL) m_pointer(NULL)
{ {
} }
DXGI::~DXGI() DXGI::~DXGI()
{ {
} }
bool DXGI::Initialize(CaptureOptions * options) bool DXGI::Initialize(CaptureOptions * options)
@ -310,7 +309,7 @@ GrabStatus DXGI::GrabFrame(FrameInfo & frame)
frame.stride = m_mapping.RowPitch / 4; frame.stride = m_mapping.RowPitch / 4;
unsigned int size = m_height * m_mapping.RowPitch; unsigned int size = m_height * m_mapping.RowPitch;
memcpySSE(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize); m_memcpy.Copy(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize);
return GRAB_STATUS_OK; return GRAB_STATUS_OK;
} }
@ -448,7 +447,7 @@ GrabStatus DXGI::GrabFrame(FrameInfo & frame)
frame.stride = m_mapping.RowPitch / 4; frame.stride = m_mapping.RowPitch / 4;
unsigned int size = m_height * m_mapping.RowPitch; unsigned int size = m_height * m_mapping.RowPitch;
memcpySSE(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize); m_memcpy.Copy(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize);
return GRAB_STATUS_OK; return GRAB_STATUS_OK;
} }

View File

@ -20,6 +20,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#pragma once #pragma once
#include "ICapture.h" #include "ICapture.h"
#include "MultiMemcpy.h"
#define W32_LEAN_AND_MEAN #define W32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
@ -75,6 +76,7 @@ namespace Capture
unsigned int m_width; unsigned int m_width;
unsigned int m_height; unsigned int m_height;
MultiMemcpy m_memcpy;
IDXGIFactory1Ptr m_dxgiFactory; IDXGIFactory1Ptr m_dxgiFactory;
ID3D11DevicePtr m_device; ID3D11DevicePtr m_device;
D3D_FEATURE_LEVEL m_featureLevel; D3D_FEATURE_LEVEL m_featureLevel;

70
host/MultiMemcpy.cpp Normal file
View File

@ -0,0 +1,70 @@
/*
Looking Glass - KVM FrameRelay (KVMFR) Client
Copyright (C) 2017 Geoffrey McRae <geoff@hostfission.com>
https://looking-glass.hostfission.com
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "MultiMemcpy.h"
#include "Util.h"
#include "common/memcpySSE.h"
MultiMemcpy::MultiMemcpy()
{
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
{
m_workers[i].start = CreateSemaphore(NULL, 0, 1, NULL);
m_workers[i].stop = CreateSemaphore(NULL, 0, 1, NULL);
m_semaphores[i] = m_workers[i].stop;
m_workers[i].thread = CreateThread(0, 0, WorkerFunction, &m_workers[i], 0, NULL);
}
}
MultiMemcpy::~MultiMemcpy()
{
for(int i = 0; i < MULTIMEMCPY_THREADS; ++i)
{
TerminateThread(m_workers[i].thread, 0);
CloseHandle(m_workers[i].start);
CloseHandle(m_workers[i].stop );
}
}
void MultiMemcpy::Copy(void * dst, void * src, size_t size)
{
const size_t block = size / MULTIMEMCPY_THREADS;
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
{
m_workers[i].dst = (uint8_t *)dst + i * block;
m_workers[i].src = (uint8_t *)src + i * block;
m_workers[i].size = (i + 1) * block - i * block;
ReleaseSemaphore(m_workers[i].start, 1, NULL);
}
WaitForMultipleObjects(MULTIMEMCPY_THREADS, m_semaphores, TRUE, INFINITE);
}
DWORD WINAPI MultiMemcpy::WorkerFunction(LPVOID param)
{
struct Worker * w = (struct Worker *)param;
for(;;)
{
WaitForSingleObject(w->start, INFINITE);
memcpySSE(w->dst, w->src, w->size);
ReleaseSemaphore(w->stop, 1, NULL);
}
}

48
host/MultiMemcpy.h Normal file
View File

@ -0,0 +1,48 @@
/*
Looking Glass - KVM FrameRelay (KVMFR) Client
Copyright (C) 2017 Geoffrey McRae <geoff@hostfission.com>
https://looking-glass.hostfission.com
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define MULTIMEMCPY_THREADS 4
#include <Windows.h>
#include <stdint.h>
#pragma once
class MultiMemcpy
{
public:
MultiMemcpy();
~MultiMemcpy();
void Copy(void * dst, void * src, size_t size);
private:
struct Worker
{
HANDLE start;
HANDLE stop;
HANDLE thread;
void * dst;
void * src;
size_t size;
};
HANDLE m_semaphores[MULTIMEMCPY_THREADS];
struct Worker m_workers[MULTIMEMCPY_THREADS];
static DWORD WINAPI WorkerFunction(LPVOID param);
};

View File

@ -23,16 +23,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include "common/debug.h" #include "common/debug.h"
#include "common/KVMFR.h" #include "common/KVMFR.h"
#include "Util.h"
#include "CaptureFactory.h" #include "CaptureFactory.h"
#if __MINGW32__
#define INTERLOCKED_AND8 __sync_and_and_fetch
#define INTERLOCKED_OR8 __sync_or_and_fetch
#else
#define INTERLOCKED_OR8 InterlockedOr8
#define INTERLOCKED_AND8 InterlockedAnd8
#endif
Service * Service::m_instance = NULL; Service * Service::m_instance = NULL;
Service::Service() : Service::Service() :

View File

@ -29,6 +29,14 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#define min(a, b) ((a) < (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b))
#endif #endif
#if __MINGW32__
#define INTERLOCKED_AND8 __sync_and_and_fetch
#define INTERLOCKED_OR8 __sync_or_and_fetch
#else
#define INTERLOCKED_OR8 InterlockedOr8
#define INTERLOCKED_AND8 InterlockedAnd8
#endif
class Util class Util
{ {
public: public:
@ -120,68 +128,4 @@ public:
_mm_stream_si128((__m128i *)&dest[32], v2); _mm_stream_si128((__m128i *)&dest[32], v2);
} }
} }
static void DrawCursor(
const enum CursorType type,
const uint8_t * cursorData,
const POINT cursorRect,
const unsigned int cursorPitch,
const POINT cursorPos,
FrameInfo & frame
)
{
const int maxHeight = min(cursorRect.y, (int)frame.height - cursorPos.y);
const int maxWidth = min(cursorRect.x, (int)frame.width - cursorPos.x);
switch (type)
{
case CURSOR_TYPE_COLOR:
{
const unsigned int destPitch = frame.stride * 4;
for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y)
for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x)
{
uint8_t *src = (uint8_t *)cursorData + (cursorPitch * y) + (x * 4);
uint8_t *dst = (uint8_t *)frame.buffer + (destPitch * (y + cursorPos.y)) + ((x + cursorPos.x) * 4);
const unsigned int alpha = src[3] + 1;
const unsigned int inv = 256 - alpha;
dst[0] = (uint8_t)((alpha * src[0] + inv * dst[0]) >> 8);
dst[1] = (uint8_t)((alpha * src[1] + inv * dst[1]) >> 8);
dst[2] = (uint8_t)((alpha * src[2] + inv * dst[2]) >> 8);
}
break;
}
case CURSOR_TYPE_MASKED_COLOR:
{
for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y)
for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x)
{
uint32_t *src = (uint32_t *)cursorData + ((cursorPitch / 4) * y) + x;
uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x);
if (*src & 0xff000000)
*dst = 0xff000000 | (*dst ^ *src);
else *dst = 0xff000000 | *src;
}
break;
}
case CURSOR_TYPE_MONOCHROME:
{
for (int y = abs(min(0, cursorPos.y)); y < maxHeight / 2; ++y)
for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x)
{
uint8_t *srcAnd = (uint8_t *)cursorData + (cursorPitch * y) + (x / 8);
uint8_t *srcXor = srcAnd + cursorPitch * (cursorRect.y / 2);
uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x);
const uint8_t mask = 0x80 >> (x % 8);
const uint32_t andMask = (*srcAnd & mask) ? 0xFFFFFFFF : 0xFF000000;
const uint32_t xorMask = (*srcXor & mask) ? 0x00FFFFFF : 0x00000000;
*dst = (*dst & andMask) ^ xorMask;
}
break;
}
}
}
}; };

View File

@ -335,6 +335,7 @@
<ClCompile Include="CrashHandler.cpp" /> <ClCompile Include="CrashHandler.cpp" />
<ClCompile Include="ivshmem.cpp" /> <ClCompile Include="ivshmem.cpp" />
<ClCompile Include="main.cpp" /> <ClCompile Include="main.cpp" />
<ClCompile Include="MultiMemcpy.cpp" />
<ClCompile Include="Service.cpp" /> <ClCompile Include="Service.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
@ -344,6 +345,7 @@
<ClInclude Include="CrashHandler.h" /> <ClInclude Include="CrashHandler.h" />
<ClInclude Include="ICapture.h" /> <ClInclude Include="ICapture.h" />
<ClInclude Include="ivshmem.h" /> <ClInclude Include="ivshmem.h" />
<ClInclude Include="MultiMemcpy.h" />
<ClInclude Include="Service.h" /> <ClInclude Include="Service.h" />
<ClInclude Include="Util.h" /> <ClInclude Include="Util.h" />
</ItemGroup> </ItemGroup>

View File

@ -42,6 +42,9 @@
<ClCompile Include="CrashHandler.cpp"> <ClCompile Include="CrashHandler.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="MultiMemcpy.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="ivshmem.h"> <ClInclude Include="ivshmem.h">
@ -68,5 +71,8 @@
<ClInclude Include="CrashHandler.h"> <ClInclude Include="CrashHandler.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="MultiMemcpy.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
</Project> </Project>