d12: implement indirectCopy fallback for compatibillity

When the heapTest fails for DMA copies to shared memory, fallback to
conventional CPU copy via mapped resources. While this is less optimal,
it is still faster then the older DXGI capture backend.
This commit is contained in:
Geoffrey McRae 2025-02-19 16:01:24 +11:00
parent c169d4ab23
commit 77f6054f0a

View File

@ -57,6 +57,7 @@ struct D12Interface
void * ivshmemBase; void * ivshmemBase;
ID3D12Heap ** ivshmemHeap; ID3D12Heap ** ivshmemHeap;
bool indirectCopy;
CaptureGetPointerBuffer getPointerBufferFn; CaptureGetPointerBuffer getPointerBufferFn;
CapturePostPointerBuffer postPointerBufferFn; CapturePostPointerBuffer postPointerBufferFn;
@ -69,6 +70,7 @@ struct D12Interface
D12FrameFormat captureFormat; D12FrameFormat captureFormat;
unsigned formatVer; unsigned formatVer;
unsigned pitch; unsigned pitch;
unsigned bpp;
// output format tracking // output format tracking
D12FrameFormat dstFormat; D12FrameFormat dstFormat;
@ -91,6 +93,8 @@ struct D12Interface
FrameBuffer * frameBuffer; FrameBuffer * frameBuffer;
// the resource backed by the framebuffer // the resource backed by the framebuffer
ID3D12Resource ** resource; ID3D12Resource ** resource;
// the mapped resource if indirectCopy is in use
void * map;
} }
frameBuffers[0]; frameBuffers[0];
}; };
@ -118,7 +122,8 @@ static bool d12_heapTest(ID3D12Device3 * device, ID3D12Heap * heap);
static ID3D12Resource * d12_frameBufferToResource( static ID3D12Resource * d12_frameBufferToResource(
unsigned frameBufferIndex, unsigned frameBufferIndex,
FrameBuffer * frameBuffer, FrameBuffer * frameBuffer,
unsigned size); unsigned size,
void ** map);
// implementation // implementation
@ -159,6 +164,13 @@ static void d12_initOptions(void)
.type = OPTION_TYPE_BOOL, .type = OPTION_TYPE_BOOL,
.value.x_bool = false .value.x_bool = false
}, },
{
.module = "d12",
.name = "indirectCopy",
.description = "Force the less optimal indirect copy method",
.type = OPTION_TYPE_BOOL,
.value.x_bool = false
},
{0} {0}
}; };
@ -183,10 +195,11 @@ static bool d12_create(
this->debug = option_get_bool("d12", "debug" ); this->debug = option_get_bool("d12", "debug" );
this->trackDamage = option_get_bool("d12", "trackDamage" ); this->trackDamage = option_get_bool("d12", "trackDamage" );
this->indirectCopy = option_get_bool("d12", "indirectCopy");
DEBUG_INFO( DEBUG_INFO(
"debug:%d trackDamage:%d", "debug:%d trackDamage:%d indirectCopy:%d",
this->debug, this->trackDamage); this->debug, this->trackDamage, this->indirectCopy);
this->d3d12 = LoadLibrary("d3d12.dll"); this->d3d12 = LoadLibrary("d3d12.dll");
if (!this->d3d12) if (!this->d3d12)
@ -352,9 +365,11 @@ static bool d12_init(void * ivshmemBase, unsigned * alignSize)
*device, D3D12_COMMAND_LIST_TYPE_COMPUTE, &this->computeCommand, L"Compute")) *device, D3D12_COMMAND_LIST_TYPE_COMPUTE, &this->computeCommand, L"Compute"))
goto exit; goto exit;
comRef_defineLocal(ID3D12Heap, ivshmemHeap);
if (!this->indirectCopy)
{
// Create the IVSHMEM heap // Create the IVSHMEM heap
this->ivshmemBase = ivshmemBase; this->ivshmemBase = ivshmemBase;
comRef_defineLocal(ID3D12Heap, ivshmemHeap);
DEBUG_TRACE("ID3D12Device3_OpenExistingHeapFromAddress"); DEBUG_TRACE("ID3D12Device3_OpenExistingHeapFromAddress");
hr = ID3D12Device3_OpenExistingHeapFromAddress( hr = ID3D12Device3_OpenExistingHeapFromAddress(
*device, ivshmemBase, &IID_ID3D12Heap, (void **)ivshmemHeap); *device, ivshmemBase, &IID_ID3D12Heap, (void **)ivshmemHeap);
@ -375,9 +390,10 @@ static bool d12_init(void * ivshmemBase, unsigned * alignSize)
DEBUG_TRACE("d12_heapTest"); DEBUG_TRACE("d12_heapTest");
if (!d12_heapTest(*device, *ivshmemHeap)) if (!d12_heapTest(*device, *ivshmemHeap))
{ {
DEBUG_ERROR( this->indirectCopy = true;
"Unable to create resources in the IVSHMEM heap, is REBAR working?"); DEBUG_WARN("Unable to create resources in the IVSHMEM heap, "
goto exit; "falling back to indirect copy");
}
} }
// initialize the backend // initialize the backend
@ -413,7 +429,9 @@ static bool d12_init(void * ivshmemBase, unsigned * alignSize)
comRef_toGlobal(this->device , device ); comRef_toGlobal(this->device , device );
comRef_toGlobal(this->copyQueue , copyQueue ); comRef_toGlobal(this->copyQueue , copyQueue );
comRef_toGlobal(this->computeQueue, computeQueue ); comRef_toGlobal(this->computeQueue, computeQueue );
comRef_toGlobal(this->ivshmemHeap , ivshmemHeap );
if (!this->indirectCopy)
comRef_toGlobal(this->ivshmemHeap, ivshmemHeap);
DEBUG_TRACE("Init success"); DEBUG_TRACE("Init success");
result = true; result = true;
@ -441,6 +459,13 @@ static bool d12_deinit(void)
{ {
bool result = true; bool result = true;
if (this->indirectCopy)
{
for(int i = 0; i < this->frameBufferCount; ++i)
if (this->frameBuffers[i].map)
ID3D12Resource_Unmap(*this->frameBuffers[i].resource, 0, NULL);
}
D12Effect * effect; D12Effect * effect;
vector_forEach(effect, &this->effects) vector_forEach(effect, &this->effects)
d12_effectFree(&effect); d12_effectFree(&effect);
@ -606,7 +631,7 @@ static CaptureResult d12_waitFrame(unsigned frameBufferIndex,
this->pitch = layout.Footprint.RowPitch; this->pitch = layout.Footprint.RowPitch;
const unsigned maxRows = maxFrameSize / layout.Footprint.RowPitch; const unsigned maxRows = maxFrameSize / layout.Footprint.RowPitch;
const unsigned bpp = this->dstFormat.format == CAPTURE_FMT_RGBA16F ? 8 : 4; this->bpp = this->dstFormat.format == CAPTURE_FMT_RGBA16F ? 8 : 4;
frame->formatVer = this->formatVer; frame->formatVer = this->formatVer;
frame->screenWidth = srcFormat.width; frame->screenWidth = srcFormat.width;
@ -617,7 +642,7 @@ static CaptureResult d12_waitFrame(unsigned frameBufferIndex,
frame->frameHeight = this->dstFormat.height; frame->frameHeight = this->dstFormat.height;
frame->truncated = maxRows < this->dstFormat.desc.Height; frame->truncated = maxRows < this->dstFormat.desc.Height;
frame->pitch = this->pitch; frame->pitch = this->pitch;
frame->stride = this->pitch / bpp; frame->stride = this->pitch / this->bpp;
frame->format = this->dstFormat.format; frame->format = this->dstFormat.format;
frame->hdr = this->dstFormat.colorSpace == frame->hdr = this->dstFormat.colorSpace ==
DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020;
@ -673,6 +698,9 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex,
comRef_defineLocal(ID3D12Resource, src); comRef_defineLocal(ID3D12Resource, src);
DEBUG_TRACE("d12_backendFetch"); DEBUG_TRACE("d12_backendFetch");
*src = d12_backendFetch(this->backend, frameBufferIndex, &desc); *src = d12_backendFetch(this->backend, frameBufferIndex, &desc);
unsigned rectCount = 0;
FrameDamageRect allRects[this->nbDirtyRects + desc.nbDirtyRects];
if (!*src) if (!*src)
{ {
DEBUG_ERROR("D12 backend failed to produce an expected frame: %u", DEBUG_ERROR("D12 backend failed to produce an expected frame: %u",
@ -680,9 +708,11 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex,
goto exit; goto exit;
} }
void * map;
comRef_defineLocal(ID3D12Resource, dst) comRef_defineLocal(ID3D12Resource, dst)
DEBUG_TRACE("d12_frameBufferToResource"); DEBUG_TRACE("d12_frameBufferToResource");
*dst = d12_frameBufferToResource(frameBufferIndex, frameBuffer, maxFrameSize); *dst = d12_frameBufferToResource(frameBufferIndex, frameBuffer, maxFrameSize,
&map);
if (!*dst) if (!*dst)
goto exit; goto exit;
@ -759,15 +789,12 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex,
{ {
DEBUG_TRACE("Damage aware update"); DEBUG_TRACE("Damage aware update");
FrameDamageRect allRects[this->nbDirtyRects + desc.nbDirtyRects];
unsigned count = 0;
/* we must update the rects that were dirty in the prior frame also, /* we must update the rects that were dirty in the prior frame also,
* otherwise the frame in memory will not be consistent when areas need to * otherwise the frame in memory will not be consistent when areas need to
* be redrawn by the client, such as under the cursor */ * be redrawn by the client, such as under the cursor */
for(const RECT * rect = this->dirtyRects; for(const RECT * rect = this->dirtyRects;
rect < this->dirtyRects + this->nbDirtyRects; ++rect) rect < this->dirtyRects + this->nbDirtyRects; ++rect)
allRects[count++] = (FrameDamageRect){ allRects[rectCount++] = (FrameDamageRect){
.x = rect->left, .x = rect->left,
.y = rect->top, .y = rect->top,
.width = rect->right - rect->left, .width = rect->right - rect->left,
@ -777,7 +804,7 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex,
/* add the new dirtyRects to the array */ /* add the new dirtyRects to the array */
for(const RECT * rect = desc.dirtyRects; for(const RECT * rect = desc.dirtyRects;
rect < desc.dirtyRects + desc.nbDirtyRects; ++rect) rect < desc.dirtyRects + desc.nbDirtyRects; ++rect)
allRects[count++] = (FrameDamageRect){ allRects[rectCount++] = (FrameDamageRect){
.x = rect->left, .x = rect->left,
.y = rect->top, .y = rect->top,
.width = rect->right - rect->left, .width = rect->right - rect->left,
@ -785,10 +812,10 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex,
}; };
/* resolve the rects */ /* resolve the rects */
count = rectsMergeOverlapping(allRects, count); rectCount = rectsMergeOverlapping(allRects, rectCount);
/* copy all the rects */ /* copy all the rects */
for(FrameDamageRect * rect = allRects; rect < allRects + count; ++rect) for(FrameDamageRect * rect = allRects; rect < allRects + rectCount; ++rect)
{ {
D3D12_BOX box = D3D12_BOX box =
{ {
@ -832,9 +859,30 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex,
DEBUG_TRACE("Fence wait"); DEBUG_TRACE("Fence wait");
d12_commandGroupWait(&this->copyCommand); d12_commandGroupWait(&this->copyCommand);
if (this->indirectCopy)
{
if (rectCount == 0)
{
framebuffer_write(frameBuffer, map,
this->pitch * this->dstFormat.desc.Height);
}
else
{
/* copy all the rects */
rectsBufferToFramebuffer(allRects, rectCount, this->bpp, frameBuffer,
this->pitch, this->dstFormat.desc.Height, map, this->pitch);
// signal the frame is complete // signal the frame is complete
framebuffer_set_write_ptr(frameBuffer, framebuffer_set_write_ptr(frameBuffer,
this->dstFormat.desc.Height * this->pitch); this->dstFormat.desc.Height * this->pitch);
}
}
else
{
// signal the frame is complete
framebuffer_set_write_ptr(frameBuffer,
this->dstFormat.desc.Height * this->pitch);
}
// reset the command queues // reset the command queues
if (this->effectsActive) if (this->effectsActive)
@ -1031,7 +1079,7 @@ exit:
} }
static ID3D12Resource * d12_frameBufferToResource(unsigned frameBufferIndex, static ID3D12Resource * d12_frameBufferToResource(unsigned frameBufferIndex,
FrameBuffer * frameBuffer, unsigned size) FrameBuffer * frameBuffer, unsigned size, void ** map)
{ {
ID3D12Resource * result = NULL; ID3D12Resource * result = NULL;
comRef_scopePush(10); comRef_scopePush(10);
@ -1042,6 +1090,7 @@ static ID3D12Resource * d12_frameBufferToResource(unsigned frameBufferIndex,
if (fb->resource && fb->frameBuffer == frameBuffer && fb->size >= size) if (fb->resource && fb->frameBuffer == frameBuffer && fb->size >= size)
{ {
result = *fb->resource; result = *fb->resource;
*map = fb->map;
ID3D12Resource_AddRef(result); ID3D12Resource_AddRef(result);
goto exit; goto exit;
} }
@ -1049,6 +1098,70 @@ static ID3D12Resource * d12_frameBufferToResource(unsigned frameBufferIndex,
fb->size = size; fb->size = size;
fb->frameBuffer = frameBuffer; fb->frameBuffer = frameBuffer;
comRef_defineLocal(ID3D12Resource, resource);
HRESULT hr;
if (this->indirectCopy)
{
if (fb->map)
{
ID3D12Resource_Unmap(*fb->resource, 0, NULL);
fb->map = NULL;
}
D3D12_HEAP_PROPERTIES heapProps =
{
.Type = D3D12_HEAP_TYPE_READBACK,
.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
.CreationNodeMask = 1,
.VisibleNodeMask = 1
};
D3D12_RESOURCE_DESC desc =
{
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
.Alignment = 0,
.Width = size,
.Height = 1,
.DepthOrArraySize = 1,
.MipLevels = 1,
.Format = DXGI_FORMAT_UNKNOWN,
.SampleDesc.Count = 1,
.SampleDesc.Quality = 0,
.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
.Flags = D3D12_RESOURCE_FLAG_NONE
};
hr = ID3D12Device3_CreateCommittedResource(
*this->device,
&heapProps,
D3D12_HEAP_FLAG_NONE,
&desc,
D3D12_RESOURCE_STATE_COPY_DEST,
NULL,
&IID_ID3D12Resource,
(void **)resource);
if (FAILED(hr))
{
DEBUG_WINERROR("Failed to create the intermediate ID3D12Resource", hr);
goto exit;
}
fb->map = NULL;
D3D12_RANGE range = {0, 0};
HRESULT hr = ID3D12Resource_Map(*resource, 0, &range, &fb->map);
if (FAILED(hr) || !fb->map)
{
DEBUG_WINERROR("Failed to map readback resource", hr);
goto exit;
}
*map = fb->map;
}
else
{
D3D12_RESOURCE_DESC desc = D3D12_RESOURCE_DESC desc =
{ {
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
@ -1064,11 +1177,11 @@ static ID3D12Resource * d12_frameBufferToResource(unsigned frameBufferIndex,
.Flags = D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER .Flags = D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER
}; };
comRef_defineLocal(ID3D12Resource, resource); hr = ID3D12Device3_CreatePlacedResource(
HRESULT hr = ID3D12Device3_CreatePlacedResource(
*this->device, *this->device,
*this->ivshmemHeap, *this->ivshmemHeap,
(uintptr_t)framebuffer_get_data(frameBuffer) - (uintptr_t)this->ivshmemBase, (uintptr_t)framebuffer_get_data(frameBuffer) -
(uintptr_t)this->ivshmemBase,
&desc, &desc,
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_DEST,
NULL, NULL,
@ -1080,6 +1193,7 @@ static ID3D12Resource * d12_frameBufferToResource(unsigned frameBufferIndex,
DEBUG_WINERROR("Failed to create the FrameBuffer ID3D12Resource", hr); DEBUG_WINERROR("Failed to create the FrameBuffer ID3D12Resource", hr);
goto exit; goto exit;
} }
}
// cache the resource // cache the resource
comRef_toGlobal(fb->resource, resource); comRef_toGlobal(fb->resource, resource);