[host] dxgi: d3d12 now writes direclty into ivshmem

This is still incomplete as d3d12 doesn't have any proper sync with the
captured frame and as such is still not suggested for general usage. This
change though is monumental for this project as it removes a full memory
copy reducing bandwidth consumption enormously.
This commit is contained in:
Geoffrey McRae
2024-01-27 17:55:44 +11:00
parent adaf40e2bf
commit 34e8a2255e
10 changed files with 18754 additions and 8144 deletions

View File

@@ -14,7 +14,7 @@ add_library(capture_DXGI STATIC
src/pp/rgb24.c
)
add_definitions("-DCOBJMACROS -DINITGUID")
add_definitions("-DCOBJMACROS -DINITGUID -DWIDL_C_INLINE_WRAPPERS")
target_link_libraries(capture_DXGI
lg_common

View File

@@ -36,7 +36,11 @@ struct DXGICopyBackend
const char * code;
// create the copy backend
bool (*create)(unsigned textures);
bool (*create)(
void * ivshmemBase,
unsigned * alignSize,
unsigned frameBuffers,
unsigned textures);
// configure the copy backend with the specified format
bool (*configure)(
@@ -50,7 +54,10 @@ struct DXGICopyBackend
void (*free)(void);
// called just before the copy starts
bool (*preCopy)(ID3D11Texture2D * src, unsigned textureIndex);
bool (*preCopy)(ID3D11Texture2D * src,
unsigned textureIndex,
unsigned frameBufferIndex,
FrameBuffer * frameBuffer);
// called to copy the full frame
bool (*copyFull)(ID3D11Texture2D * src, unsigned textureIndex);
@@ -65,6 +72,9 @@ struct DXGICopyBackend
// maps the copied frame into memory
CaptureResult (*mapTexture)(unsigned textureIndex, void ** map);
// [optional] backend specific write into the FrameBuffer
CaptureResult (*writeFrame)(int textureIndex, FrameBuffer * frame);
// unmaps the copied frame from memory
void (*unmapTexture)(unsigned textureIndex);

View File

@@ -43,7 +43,11 @@ struct D3D11Backend
static struct D3D11Backend * this = NULL;
static bool d3d11_create(unsigned textures)
static bool d3d11_create(
void * ivshmemBase,
unsigned * alignSize,
unsigned frameBuffers,
unsigned textures)
{
DEBUG_ASSERT(!this);
this = calloc(1,
@@ -128,7 +132,11 @@ static void d3d11_free(void)
this = NULL;
}
static bool d3d11_preCopy(ID3D11Texture2D * src, unsigned textureIndex)
static bool d3d11_preCopy(
ID3D11Texture2D * src,
unsigned textureIndex,
unsigned frameBufferIndex,
FrameBuffer * frameBuffer)
{
dxgi_contextLock();
this->texture[textureIndex].copyTime = microtime();

View File

@@ -35,7 +35,6 @@
struct D3D12Texture
{
ID3D12Resource ** tex;
ID3D12CommandAllocator ** commandAllocator;
ID3D12CommandList ** commandList;
ID3D12GraphicsCommandList ** graphicsCommandList;
@@ -58,6 +57,11 @@ struct D3D12Backend
float copySleep;
ID3D12Device ** device;
ID3D12Device3 ** device3;
void * ivshmemBase;
ID3D12Heap ** heap;
ID3D12Resource *** frameBuffers;
ID3D12Resource ** dstResource;
ID3D12CommandQueue ** commandQueue;
UINT64 fenceValue;
ID3D12Fence ** fence;
@@ -88,7 +92,11 @@ typedef HRESULT (*D3D12GetDebugInterface_t)(
static void d3d12_free(void);
static bool d3d12_create(unsigned textures)
static bool d3d12_create(
void * ivshmemBase,
unsigned * alignSize,
unsigned frameBuffers,
unsigned textures)
{
DEBUG_ASSERT(!this);
comRef_scopePush();
@@ -109,6 +117,13 @@ static bool d3d12_create(unsigned textures)
if (!this->d3d12)
goto exit;
this->frameBuffers = calloc(frameBuffers, sizeof(*this->frameBuffers));
if (!this->frameBuffers)
{
DEBUG_ERROR("Failed to allocate the ID3D12Resource frame buffer array");
goto exit;
}
if (dxgi_debug())
{
D3D12GetDebugInterface_t D3D12GetDebugInterface = (D3D12GetDebugInterface_t)
@@ -145,6 +160,33 @@ static bool d3d12_create(unsigned textures)
goto exit;
}
comRef_defineLocal(ID3D12Device3, device3);
status = ID3D12Device_QueryInterface(
*device, &IID_ID3D12Device3, (void **)device3);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to obtain the ID3D12Device3 interface", status);
goto exit;
}
comRef_defineLocal(ID3D12Heap, heap);
status = ID3D12Device3_OpenExistingHeapFromAddress(
*device3,
ivshmemBase,
&IID_ID3D12Heap,
(void **)heap);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to open the framebuffer as a heap", status);
return false;
}
D3D12_HEAP_DESC heapDesc = ID3D12Heap_GetDesc(*heap);
DEBUG_INFO("ID3D12Heap : Size:%I64u Alignment:%I64u",
heapDesc.SizeInBytes, heapDesc.Alignment);
*alignSize = heapDesc.Alignment;
D3D12_COMMAND_QUEUE_DESC queueDesc =
{
.Type = D3D12_COMMAND_LIST_TYPE_COPY,
@@ -186,7 +228,11 @@ static bool d3d12_create(unsigned textures)
goto exit;
}
this->ivshmemBase = ivshmemBase;
comRef_toGlobal(this->device , device );
comRef_toGlobal(this->device3 , device3 );
comRef_toGlobal(this->heap , heap );
comRef_toGlobal(this->commandQueue, commandQueue);
comRef_toGlobal(this->fence , fence );
@@ -213,48 +259,12 @@ static bool d3d12_configure(
this->format = format;
this->pitch = ALIGN_TO(width * bpp, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
D3D12_HEAP_PROPERTIES readbackHeapProperties =
{
.Type = D3D12_HEAP_TYPE_READBACK,
};
D3D12_RESOURCE_DESC texDesc =
{
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
.Width = this->pitch * height,
.Height = 1,
.DepthOrArraySize = 1,
.MipLevels = 1,
.Format = DXGI_FORMAT_UNKNOWN,
.SampleDesc.Count = 1,
.SampleDesc.Quality = 0,
.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
.Flags = D3D12_RESOURCE_FLAG_NONE
};
comRef_defineLocal(ID3D12Resource , texture );
comRef_defineLocal(ID3D12Fence , fence );
comRef_defineLocal(ID3D12CommandAllocator , commandAllocator );
comRef_defineLocal(ID3D12GraphicsCommandList, graphicsCommandList);
comRef_defineLocal(ID3D12CommandList , commandList );
for (int i = 0; i < this->textures; ++i)
{
status = ID3D12Device_CreateCommittedResource(*this->device,
&readbackHeapProperties,
D3D12_HEAP_FLAG_NONE,
&texDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
NULL,
&IID_ID3D12Resource,
(void **)texture);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to create texture", status);
goto exit;
}
this->texture[i].event = CreateEvent(NULL, FALSE, FALSE, NULL);
if (!this->texture[i].event)
{
@@ -307,7 +317,6 @@ static bool d3d12_configure(
goto exit;
}
comRef_toGlobal(this->texture[i].tex , texture );
comRef_toGlobal(this->texture[i].fence , fence );
comRef_toGlobal(this->texture[i].commandAllocator , commandAllocator );
comRef_toGlobal(this->texture[i].graphicsCommandList, graphicsCommandList);
@@ -320,6 +329,7 @@ static bool d3d12_configure(
exit:
comRef_scopePop();
return result;
}
@@ -328,13 +338,15 @@ static void d3d12_free(void)
if (!this)
return;
for (int i = 0; i < this->textures; ++i)
for(int i = 0; i < this->textures; ++i)
if (this->texture[i].event)
CloseHandle(this->texture[i].event);
if (this->event)
CloseHandle(this->event);
free(this->frameBuffers);
if (this->d3d12)
FreeLibrary(this->d3d12);
@@ -342,7 +354,11 @@ static void d3d12_free(void)
this = NULL;
}
static bool d3d12_preCopy(ID3D11Texture2D * src, unsigned textureIndex)
static bool d3d12_preCopy(
ID3D11Texture2D * src,
unsigned textureIndex,
unsigned frameBufferIndex,
FrameBuffer * frameBuffer)
{
comRef_scopePush();
bool result = false;
@@ -352,9 +368,6 @@ static bool d3d12_preCopy(ID3D11Texture2D * src, unsigned textureIndex)
ID3D11DeviceContext_Flush(dxgi_getContext());
dxgi_contextUnlock();
if (this->copySleep > 0)
nsleep((uint64_t)(this->copySleep * 1000000));
this->d12src = NULL;
if (this->sharedCacheCount > -1)
{
@@ -363,8 +376,7 @@ static bool d3d12_preCopy(ID3D11Texture2D * src, unsigned textureIndex)
if (this->sharedCache[i].tex == src)
{
this->d12src = *this->sharedCache[i].d12src;
result = true;
goto exit;
goto framebuffer;
}
}
@@ -419,8 +431,52 @@ static bool d3d12_preCopy(ID3D11Texture2D * src, unsigned textureIndex)
this->sharedCacheCount = -1;
}
framebuffer:
if (this->frameBuffers[frameBufferIndex])
goto done;
// prepare a IVSHMEM backed texture for the provided framebuffer
D3D12_RESOURCE_DESC desc =
{
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
.Width = this->height * this->pitch,
.Height = 1,
.DepthOrArraySize = 1,
.MipLevels = 1,
.Format = DXGI_FORMAT_UNKNOWN,
.SampleDesc.Count = 1,
.SampleDesc.Quality = 0,
.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
.Flags = D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER
};
comRef_defineLocal(ID3D12Resource, resource);
status = ID3D12Device3_CreatePlacedResource(
*this->device3,
*this->heap,
(uintptr_t)framebuffer_get_data(frameBuffer) - (uintptr_t)this->ivshmemBase,
&desc,
D3D12_RESOURCE_STATE_COPY_DEST,
NULL,
&IID_ID3D12Resource,
(void**)resource);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to create D3D12 resource from heap", status);
goto exit;
}
comRef_toGlobal(this->frameBuffers[frameBufferIndex], resource);
done:
this->dstResource = this->frameBuffers[frameBufferIndex];
result = true;
if (this->copySleep > 0)
nsleep((uint64_t)(this->copySleep * 1000000));
exit:
if (!result)
comRef_scopePop();
@@ -441,7 +497,7 @@ static bool d3d12_copyFull(ID3D11Texture2D * src, unsigned textureIndex)
D3D12_TEXTURE_COPY_LOCATION destLoc =
{
.pResource = *tex->tex,
.pResource = *this->dstResource,
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
.PlacedFootprint =
{
@@ -463,7 +519,6 @@ static bool d3d12_copyFull(ID3D11Texture2D * src, unsigned textureIndex)
return true;
}
static bool d3d12_copyRect(ID3D11Texture2D * src, unsigned textureIndex,
FrameDamageRect * rect)
{
@@ -478,7 +533,7 @@ static bool d3d12_copyRect(ID3D11Texture2D * src, unsigned textureIndex,
D3D12_TEXTURE_COPY_LOCATION destLoc =
{
.pResource = *tex->tex,
.pResource = *this->dstResource,
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
.PlacedFootprint =
{
@@ -561,7 +616,8 @@ static bool d3d12_postCopy(ID3D11Texture2D * src, unsigned textureIndex)
result = true;
exit:
comRef_scopePop(); //push is in pre-copy
//push is in preCopy
comRef_scopePop();
return result;
}
@@ -587,32 +643,17 @@ static CaptureResult d3d12_mapTexture(unsigned textureIndex, void ** map)
return CAPTURE_RESULT_ERROR;
}
D3D12_RANGE range =
{
.Begin = 0,
.End = this->pitch * this->height
};
status = ID3D12Resource_Map(*tex->tex, 0, &range, map);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to map the texture", status);
return CAPTURE_RESULT_ERROR;
}
return CAPTURE_RESULT_OK;
}
static CaptureResult d3d12_writeFrame(int textureIndex, FrameBuffer * frame)
{
framebuffer_set_write_ptr(frame, this->height * this->pitch);
return CAPTURE_RESULT_OK;
}
static void d3d12_unmapTexture(unsigned textureIndex)
{
struct D3D12Texture * tex = &this->texture[textureIndex];
D3D12_RANGE range =
{
.Begin = 0,
.End = 0
};
ID3D12Resource_Unmap(*tex->tex, 0, &range);
}
static void d3d12_preRelease(void)
@@ -632,6 +673,7 @@ struct DXGICopyBackend copyBackendD3D12 =
.copyRect = d3d12_copyRect,
.postCopy = d3d12_postCopy,
.mapTexture = d3d12_mapTexture,
.writeFrame = d3d12_writeFrame,
.unmapTexture = d3d12_unmapTexture,
.preRelease = d3d12_preRelease,
};

View File

@@ -125,6 +125,7 @@ struct DXGIInterface
D3D_FEATURE_LEVEL featureLevel;
IDXGIOutputDuplication ** dup;
int maxTextures;
void * ivshmemBase;
Texture * texture;
int texRIndex;
int texWIndex;
@@ -275,7 +276,10 @@ static void dxgi_initOptions(void)
ppEarlyInit();
}
static bool dxgi_create(CaptureGetPointerBuffer getPointerBufferFn, CapturePostPointerBuffer postPointerBufferFn)
static bool dxgi_create(
void * ivshmemBase,
CaptureGetPointerBuffer getPointerBufferFn,
CapturePostPointerBuffer postPointerBufferFn)
{
DEBUG_ASSERT(!this);
this = calloc(1, sizeof(*this));
@@ -302,6 +306,7 @@ static bool dxgi_create(CaptureGetPointerBuffer getPointerBufferFn, CapturePostP
this->allowRGB24 = option_get_bool("dxgi", "allowRGB24");
this->dwmFlush = option_get_bool("dxgi", "dwmFlush");
this->disableDamage = option_get_bool("dxgi", "disableDamage");
this->ivshmemBase = ivshmemBase;
this->texture = calloc(this->maxTextures, sizeof(*this->texture));
this->getPointerBufferFn = getPointerBufferFn;
this->postPointerBufferFn = postPointerBufferFn;
@@ -360,7 +365,7 @@ static bool initVertexShader(void)
return true;
}
static bool dxgi_init(void)
static bool dxgi_init(unsigned * alignSize)
{
DEBUG_ASSERT(this);
@@ -800,7 +805,11 @@ static bool dxgi_init(void)
{
if (!strcasecmp(copyBackend, backends[i]->code))
{
if (!backends[i]->create(this->maxTextures))
if (!backends[i]->create(
this->ivshmemBase,
alignSize,
LGMP_Q_FRAME_LEN,
this->maxTextures))
{
DEBUG_ERROR("Failed to initialize selected capture backend: %s", backends[i]->name);
backends[i]->free();
@@ -1052,7 +1061,8 @@ static void computeTexDamage(Texture * tex)
}
}
static CaptureResult dxgi_capture(void)
static CaptureResult dxgi_capture(unsigned frameBufferIndex,
FrameBuffer * frameBuffer)
{
DEBUG_ASSERT(this);
DEBUG_ASSERT(this->initialized);
@@ -1101,6 +1111,7 @@ static CaptureResult dxgi_capture(void)
{
if (result == CAPTURE_RESULT_ERROR)
DEBUG_WINERROR("AcquireNextFrame failed", status);
goto exit;
}
@@ -1230,7 +1241,8 @@ static CaptureResult dxgi_capture(void)
computeFrameDamage(tex);
computeTexDamage(tex);
if (!this->backend->preCopy(dst, this->texWIndex))
if (!this->backend->preCopy(dst, this->texWIndex,
frameBufferIndex, frameBuffer))
{
result = CAPTURE_RESULT_ERROR;
goto exit;
@@ -1440,39 +1452,48 @@ static CaptureResult dxgi_getFrame(FrameBuffer * frame, int frameIndex)
Texture * tex = &this->texture[this->texRIndex];
FrameDamage * damage = &this->frameDamage[frameIndex];
if (tex->damageRectsCount == 0 ||
damage->count < 0 ||
damage->count + tex->damageRectsCount > KVMFR_MAX_DAMAGE_RECTS)
if (this->backend->writeFrame)
{
// damage all
framebuffer_write(frame, tex->map, this->pitch * this->dataHeight);
CaptureResult result = this->backend->writeFrame(frameIndex, frame);
if (result != CAPTURE_RESULT_OK)
return result;
}
else
{
memcpy(damage->rects + damage->count, tex->damageRects,
tex->damageRectsCount * sizeof(*tex->damageRects));
damage->count += tex->damageRectsCount;
if (this->outputFormat == CAPTURE_FMT_BGR_32)
if (tex->damageRectsCount == 0 ||
damage->count < 0 ||
damage->count + tex->damageRectsCount > KVMFR_MAX_DAMAGE_RECTS)
{
FrameDamageRect scaledDamageRects[damage->count];
for (int i = 0; i < ARRAYSIZE(scaledDamageRects); i++) {
FrameDamageRect rect = damage->rects[i];
int originalX = rect.x;
int scaledX = originalX * 3 / 4;
rect.x = scaledX;
rect.width = (((originalX + rect.width) * 3 + 3) / 4) - scaledX;
scaledDamageRects[i] = rect;
}
rectsBufferToFramebuffer(scaledDamageRects, damage->count, this->bpp, frame,
this->pitch, this->dataHeight, tex->map, this->pitch);
// damage all
framebuffer_write(frame, tex->map, this->pitch * this->dataHeight);
}
else
{
rectsBufferToFramebuffer(damage->rects, damage->count, this->bpp, frame,
this->pitch, this->dataHeight, tex->map, this->pitch);
memcpy(damage->rects + damage->count, tex->damageRects,
tex->damageRectsCount * sizeof(*tex->damageRects));
damage->count += tex->damageRectsCount;
if (this->outputFormat == CAPTURE_FMT_BGR_32)
{
FrameDamageRect scaledDamageRects[damage->count];
for (int i = 0; i < ARRAYSIZE(scaledDamageRects); i++) {
FrameDamageRect rect = damage->rects[i];
int originalX = rect.x;
int scaledX = originalX * 3 / 4;
rect.x = scaledX;
rect.width = (((originalX + rect.width) * 3 + 3) / 4) - scaledX;
scaledDamageRects[i] = rect;
}
rectsBufferToFramebuffer(scaledDamageRects, damage->count, this->bpp, frame,
this->pitch, this->dataHeight, tex->map, this->pitch);
}
else
{
rectsBufferToFramebuffer(damage->rects, damage->count, this->bpp, frame,
this->pitch, this->dataHeight, tex->map, this->pitch);
}
}
}

View File

@@ -185,8 +185,9 @@ static void nvfbc_initOptions(void)
}
static bool nvfbc_create(
CaptureGetPointerBuffer getPointerBufferFn,
CapturePostPointerBuffer postPointerBufferFn)
void * ivshmemBase,
CaptureGetPointerBuffer getPointerBufferFn,
CapturePostPointerBuffer postPointerBufferFn)
{
if (!NvFBCInit())
return false;
@@ -220,7 +221,7 @@ static void updateScale(void)
this->targetHeight = this->height;
}
static bool nvfbc_init(void)
static bool nvfbc_init(unsigned * alignSize)
{
int adapterIndex = option_get_int("nvfbc", "adapterIndex");
@@ -447,7 +448,8 @@ static void nvfbc_free(void)
NvFBCFree();
}
static CaptureResult nvfbc_capture(void)
static CaptureResult nvfbc_capture(unsigned frameBufferIndex,
FrameBuffer * frameBuffer)
{
// this is a bit of a hack as it causes this thread to block until the next
// present keeping us locked with the refresh rate of the monitor being