From 60b01566e101d510418791f9b9d348a788057025 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Fri, 2 Feb 2024 02:37:17 +1100 Subject: [PATCH] [host] d12: implement initial RGB24 support --- common/include/common/util.h | 5 + .../Windows/capture/D12/CMakeLists.txt | 1 + host/platform/Windows/capture/D12/backend.h | 6 +- .../platform/Windows/capture/D12/backend/dd.c | 17 +- .../Windows/capture/D12/command_group.c | 20 +- .../Windows/capture/D12/command_group.h | 4 + host/platform/Windows/capture/D12/d12.c | 216 ++++++---- host/platform/Windows/capture/D12/d12.h | 70 ++++ host/platform/Windows/capture/D12/effect.h | 75 ++++ .../Windows/capture/D12/effect/rgb24.c | 379 ++++++++++++++++++ 10 files changed, 704 insertions(+), 89 deletions(-) create mode 100644 host/platform/Windows/capture/D12/effect.h create mode 100644 host/platform/Windows/capture/D12/effect/rgb24.c diff --git a/common/include/common/util.h b/common/include/common/util.h index 2c9d9d2b..271c42f3 100644 --- a/common/include/common/util.h +++ b/common/include/common/util.h @@ -21,6 +21,8 @@ #ifndef _H_LG_COMMON_UTIL_ #define _H_LG_COMMON_UTIL_ +#include + #ifndef min #define min(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; }) @@ -43,4 +45,7 @@ #define unlikely(expr) __builtin_expect(!!(expr), 0) #define likely(expr) __builtin_expect(!!(expr), 1) +#define _STR(x) #x +#define STR(x) _STR(x) + #endif diff --git a/host/platform/Windows/capture/D12/CMakeLists.txt b/host/platform/Windows/capture/D12/CMakeLists.txt index 06c00bb1..3d1e820c 100644 --- a/host/platform/Windows/capture/D12/CMakeLists.txt +++ b/host/platform/Windows/capture/D12/CMakeLists.txt @@ -5,6 +5,7 @@ add_library(capture_D12 STATIC d12.c command_group.c backend/dd.c + effect/rgb24.c ) target_link_libraries(capture_D12 diff --git a/host/platform/Windows/capture/D12/backend.h b/host/platform/Windows/capture/D12/backend.h index 01238fe4..59010db9 100644 --- a/host/platform/Windows/capture/D12/backend.h +++ b/host/platform/Windows/capture/D12/backend.h @@ -57,8 +57,8 @@ struct D12Backend unsigned frameBufferIndex); }; -static inline bool d12_backendCreate( - D12Backend * backend, D12Backend ** instance, unsigned frameBuffers) +static inline bool d12_backendCreate(const D12Backend * backend, + D12Backend ** instance, unsigned frameBuffers) { if (!backend->create(instance, frameBuffers)) return false; @@ -90,6 +90,6 @@ static inline ID3D12Resource * d12_backendFetch(D12Backend * instance, // Backend defines -extern D12Backend D12Backend_DD; +extern const D12Backend D12Backend_DD; #endif diff --git a/host/platform/Windows/capture/D12/backend/dd.c b/host/platform/Windows/capture/D12/backend/dd.c index 0cef10c3..1eb3a4f1 100644 --- a/host/platform/Windows/capture/D12/backend/dd.c +++ b/host/platform/Windows/capture/D12/backend/dd.c @@ -41,7 +41,11 @@ typedef struct DDCacheInfo { D3D11_TEXTURE2D_DESC format; - ID3D11Texture2D ** srcTex; + + /* this value is likely released, only used to check if the texture supplied + by DD is different, do not rely on it pointing to valid memory! */ + ID3D11Texture2D * srcTex; + ID3D12Resource ** d12Res; ID3D11Fence ** fence; ID3D12Fence ** d12Fence; @@ -386,7 +390,10 @@ retry: // if this was not a frame update, go back and try again if (frameInfo.LastPresentTime.QuadPart == 0) + { + comRef_release(res); goto retry; + } exit: comRef_scopePop(); @@ -605,7 +612,7 @@ static bool d12_dd_getCache(DDInstance * this, } // check for a resource match - if (*cache->srcTex != srcTex) + if (cache->srcTex != srcTex) continue; // check if the match is not valid @@ -715,8 +722,8 @@ static bool d12_dd_convertResource(DDInstance * this, CloseHandle(sharedHandle); // store the details - ID3D11Texture2D_AddRef(srcTex); - comRef_toGlobal(cache->srcTex , &srcTex ); + cache->srcTex = srcTex; + comRef_toGlobal(cache->d12Res , dst ); comRef_toGlobal(cache->fence , fence ); comRef_toGlobal(cache->d12Fence, d12Fence); @@ -730,7 +737,7 @@ exit: return result; } -D12Backend D12Backend_DD = +const D12Backend D12Backend_DD = { .name = "Desktop Duplication", .codeName = "DD", diff --git a/host/platform/Windows/capture/D12/command_group.c b/host/platform/Windows/capture/D12/command_group.c index 928a1714..a56ba9a5 100644 --- a/host/platform/Windows/capture/D12/command_group.c +++ b/host/platform/Windows/capture/D12/command_group.c @@ -129,13 +129,21 @@ bool d12_commandGroupExecute(ID3D12CommandQueue * queue, D12CommandGroup * grp) return false; } - if (ID3D12Fence_GetCompletedValue(*grp->fence) < grp->fenceValue) - { - ID3D12Fence_SetEventOnCompletion(*grp->fence, grp->fenceValue, grp->event); - WaitForSingleObject(grp->event, INFINITE); - } + return true; +} - hr = ID3D12CommandAllocator_Reset(*grp->allocator); +void d12_commandGroupWait(D12CommandGroup * grp) +{ + if (ID3D12Fence_GetCompletedValue(*grp->fence) >= grp->fenceValue) + return; + + ID3D12Fence_SetEventOnCompletion(*grp->fence, grp->fenceValue, grp->event); + WaitForSingleObject(grp->event, INFINITE); +} + +bool d12_commandGroupReset(D12CommandGroup * grp) +{ + HRESULT hr = ID3D12CommandAllocator_Reset(*grp->allocator); if (FAILED(hr)) { DEBUG_WINERROR("Failed to reset the command allocator", hr); diff --git a/host/platform/Windows/capture/D12/command_group.h b/host/platform/Windows/capture/D12/command_group.h index 119a566d..7010881d 100644 --- a/host/platform/Windows/capture/D12/command_group.h +++ b/host/platform/Windows/capture/D12/command_group.h @@ -42,4 +42,8 @@ void d12_commandGroupFree(D12CommandGroup * grp); bool d12_commandGroupExecute(ID3D12CommandQueue * queue, D12CommandGroup * grp); +void d12_commandGroupWait(D12CommandGroup * grp); + +bool d12_commandGroupReset(D12CommandGroup * grp); + #endif diff --git a/host/platform/Windows/capture/D12/d12.c b/host/platform/Windows/capture/D12/d12.c index 6bed0d0b..5bf70736 100644 --- a/host/platform/Windows/capture/D12/d12.c +++ b/host/platform/Windows/capture/D12/d12.c @@ -28,6 +28,7 @@ #include "com_ref.h" #include "backend.h" +#include "effect.h" #include "command_group.h" #include @@ -35,30 +36,17 @@ #include // definitions - -typedef HRESULT (*D3D12CreateDevice_t)( - IUnknown *pAdapter, - D3D_FEATURE_LEVEL MinimumFeatureLevel, - REFIID riid, - void **ppDevice -); - -typedef HRESULT (*D3D12GetDebugInterface_t)( - REFIID riid, - void **ppvDebug -); - struct D12Interface { - HMODULE d3d12; - D3D12CreateDevice_t D3D12CreateDevice; - D3D12GetDebugInterface_t D3D12GetDebugInterface; + HMODULE d3d12; IDXGIFactory2 ** factory; ID3D12Device3 ** device; - ID3D12CommandQueue ** commandQueue; + ID3D12CommandQueue ** copyQueue; + ID3D12CommandQueue ** computeQueue; D12CommandGroup copyCommand; + D12CommandGroup computeCommand; void * ivshmemBase; ID3D12Heap ** ivshmemHeap; @@ -67,14 +55,19 @@ struct D12Interface CapturePostPointerBuffer postPointerBufferFn; D12Backend * backend; + D12Effect * rgb24; // capture format tracking - D3D12_RESOURCE_DESC lastFormat; + D3D12_RESOURCE_DESC captureFormat; unsigned formatVer; + // output format tracking + D3D12_RESOURCE_DESC dstFormat; + // options bool debug; + unsigned frameBufferCount; // must be last struct { @@ -90,8 +83,11 @@ struct D12Interface // gloabls +struct DX12 DX12 = {0}; ComScope * d12_comScope = NULL; +// defines + // locals static struct D12Interface * this = NULL; @@ -108,19 +104,6 @@ static ID3D12Resource * d12_frameBufferToResource( FrameBuffer * frameBuffer, unsigned size); -// workarounds - -static D3D12_HEAP_DESC _ID3D12Heap_GetDesc(ID3D12Heap* This) -{ - D3D12_HEAP_DESC __ret; - return *This->lpVtbl->GetDesc(This, &__ret); -} - -static D3D12_RESOURCE_DESC _ID3D12Resource_GetDesc(ID3D12Resource* This) { - D3D12_RESOURCE_DESC __ret; - return *This->lpVtbl->GetDesc(This,&__ret); -} - // implementation static const char * d12_getName(void) @@ -154,12 +137,16 @@ static bool d12_create( return false; } - this->D3D12CreateDevice = (D3D12CreateDevice_t) + DX12.D3D12CreateDevice = (typeof(DX12.D3D12CreateDevice)) GetProcAddress(this->d3d12, "D3D12CreateDevice"); - this->D3D12GetDebugInterface = (D3D12GetDebugInterface_t) + DX12.D3D12GetDebugInterface = (typeof(DX12.D3D12GetDebugInterface)) GetProcAddress(this->d3d12, "D3D12GetDebugInterface"); + DX12.D3D12SerializeVersionedRootSignature = + (typeof(DX12.D3D12SerializeVersionedRootSignature)) + GetProcAddress(this->d3d12, "D3D12SerializeVersionedRootSignature"); + this->getPointerBufferFn = getPointerBufferFn; this->postPointerBufferFn = postPointerBufferFn; @@ -171,6 +158,7 @@ static bool d12_create( return false; } + this->frameBufferCount = frameBuffers; return true; } @@ -202,7 +190,7 @@ static bool d12_init(void * ivshmemBase, unsigned * alignSize) if (this->debug) { comRef_defineLocal(ID3D12Debug1, debug); - hr = this->D3D12GetDebugInterface(&IID_ID3D12Debug1, (void **)debug); + hr = DX12.D3D12GetDebugInterface(&IID_ID3D12Debug1, (void **)debug); if (FAILED(hr)) { DEBUG_WINERROR("D3D12GetDebugInterface", hr); @@ -216,7 +204,7 @@ static bool d12_init(void * ivshmemBase, unsigned * alignSize) // create the D3D12 device comRef_defineLocal(ID3D12Device3, device); - hr = this->D3D12CreateDevice( + hr = DX12.D3D12CreateDevice( (IUnknown *)*adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device3, @@ -237,10 +225,10 @@ static bool d12_init(void * ivshmemBase, unsigned * alignSize) .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, }; - comRef_defineLocal(ID3D12CommandQueue, commandQueue); + comRef_defineLocal(ID3D12CommandQueue, copyQueue); retryCreateCommandQueue: hr = ID3D12Device3_CreateCommandQueue( - *device, &queueDesc, &IID_ID3D12CommandQueue, (void **)commandQueue); + *device, &queueDesc, &IID_ID3D12CommandQueue, (void **)copyQueue); if (FAILED(hr)) { if (queueDesc.Priority == D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME) @@ -250,15 +238,37 @@ retryCreateCommandQueue: goto retryCreateCommandQueue; } - DEBUG_WINERROR("Failed to create ID3D12CommandQueue", hr); + DEBUG_WINERROR("Failed to create ID3D12CommandQueue (copy)", hr); goto exit; } - ID3D12CommandQueue_SetName(*commandQueue, L"Command Queue"); + ID3D12CommandQueue_SetName(*copyQueue, L"Copy"); + + // create the compute queue + D3D12_COMMAND_QUEUE_DESC computeQueueDesc = + { + .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE, + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, + }; + queueDesc.Priority = queueDesc.Priority; + + comRef_defineLocal(ID3D12CommandQueue, computeQueue); + hr = ID3D12Device3_CreateCommandQueue( + *device, &computeQueueDesc, &IID_ID3D12CommandQueue, (void **)computeQueue); + if (FAILED(hr)) + { + DEBUG_WINERROR("Failed to create the ID3D12CommandQueue (compute)", hr); + goto exit; + } + ID3D12CommandQueue_SetName(*computeQueue, L"Compute"); if (!d12_commandGroupCreate( *device, D3D12_COMMAND_LIST_TYPE_COPY, &this->copyCommand, L"Copy")) goto exit; + if (!d12_commandGroupCreate( + *device, D3D12_COMMAND_LIST_TYPE_COMPUTE, &this->computeCommand, L"Compute")) + goto exit; + // Create the IVSHMEM heap this->ivshmemBase = ivshmemBase; comRef_defineLocal(ID3D12Heap, ivshmemHeap); @@ -271,17 +281,21 @@ retryCreateCommandQueue: } // Adjust the alignSize based on the required heap alignment - D3D12_HEAP_DESC heapDesc = _ID3D12Heap_GetDesc(*ivshmemHeap); + D3D12_HEAP_DESC heapDesc = ID3D12Heap_GetDesc(*ivshmemHeap); *alignSize = heapDesc.Alignment; // initialize the backend if (!d12_backendInit(this->backend, this->debug, *device, *adapter, *output)) goto exit; - comRef_toGlobal(this->factory , factory ); - comRef_toGlobal(this->device , device ); - comRef_toGlobal(this->commandQueue, commandQueue); - comRef_toGlobal(this->ivshmemHeap , ivshmemHeap ); + if (!d12_effectCreate(&D12Effect_RGB24, &this->rgb24, *device)) + goto exit; + + comRef_toGlobal(this->factory , factory ); + comRef_toGlobal(this->device , device ); + comRef_toGlobal(this->copyQueue , copyQueue ); + comRef_toGlobal(this->computeQueue, computeQueue ); + comRef_toGlobal(this->ivshmemHeap , ivshmemHeap ); result = true; @@ -300,11 +314,29 @@ static void d12_stop(void) static bool d12_deinit(void) { bool result = true; - if (!this->backend->deinit(this->backend)) + d12_effectFree(&this->rgb24); + + if (!d12_backendDeinit(this->backend)) result = false; - d12_commandGroupFree(&this->copyCommand); + d12_commandGroupFree(&this->copyCommand ); + d12_commandGroupFree(&this->computeCommand); + + IDXGIFactory2 * factory = *this->factory; + IDXGIFactory2_AddRef(factory); comRef_freeScope(&d12_comScope); + if (IDXGIFactory2_Release(factory) != 0) + DEBUG_WARN("MEMORY LEAK"); + + // zero the framebuffers + memset(this->frameBuffers, 0, + sizeof(*this->frameBuffers) * this->frameBufferCount); + + /* zero the formats so we properly reinit otherwise we wont detect the format + change and setup the effect chain */ + memset(&this->captureFormat, 0, sizeof(this->captureFormat)); + memset(&this->dstFormat , 0, sizeof(this->dstFormat )); + return result; } @@ -338,28 +370,47 @@ static CaptureResult d12_waitFrame(unsigned frameBufferIndex, goto exit; } - D3D12_RESOURCE_DESC desc = _ID3D12Resource_GetDesc(*src); - if (desc.Width != this->lastFormat.Width || - desc.Height != this->lastFormat.Height || - desc.Format != this->lastFormat.Format) + + D3D12_RESOURCE_DESC srcFormat = ID3D12Resource_GetDesc(*src); + D3D12_RESOURCE_DESC dstFormat = srcFormat; + + // if the input format changed, reconfigure the effects + if (dstFormat.Width != this->captureFormat.Width || + dstFormat.Height != this->captureFormat.Height || + dstFormat.Format != this->captureFormat.Format) { - ++this->formatVer; - memcpy(&this->lastFormat, &desc, sizeof(desc)); + this->captureFormat = dstFormat; + + //TODO: loop through an effect array + if (!d12_effectSetFormat(this->rgb24, *this->device, &srcFormat, &dstFormat)) + { + DEBUG_ERROR("Failed to set the effect input format"); + goto exit; + } + + // if the output format changed + if (dstFormat.Width != this->dstFormat.Width || + dstFormat.Height != this->dstFormat.Height || + dstFormat.Format != this->dstFormat.Format) + { + ++this->formatVer; + this->dstFormat = dstFormat; + } } - const unsigned int maxRows = maxFrameSize / (desc.Width * 4); + const unsigned int maxRows = maxFrameSize / (dstFormat.Width * 4); frame->formatVer = this->formatVer; - frame->screenWidth = desc.Width; - frame->screenHeight = desc.Height; - frame->dataWidth = desc.Width; - frame->dataHeight = min(maxRows, desc.Height); - frame->frameWidth = desc.Width; - frame->frameHeight = desc.Height; - frame->truncated = maxRows < desc.Height; - frame->pitch = desc.Width * 4; - frame->stride = desc.Width; - frame->format = CAPTURE_FMT_BGRA; + frame->screenWidth = srcFormat.Width; + frame->screenHeight = srcFormat.Height; + frame->dataWidth = dstFormat.Width; + frame->dataHeight = min(maxRows, dstFormat.Height); + frame->frameWidth = srcFormat.Width; + frame->frameHeight = srcFormat.Height; + frame->truncated = maxRows < dstFormat.Height; + frame->pitch = dstFormat.Width * 4; + frame->stride = dstFormat.Width; + frame->format = CAPTURE_FMT_BGR_32; frame->hdr = false; frame->hdrPQ = false; frame->rotation = CAPTURE_ROT_0; @@ -376,7 +427,7 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex, FrameBuffer * frameBuffer, const size_t maxFrameSize) { CaptureResult result = CAPTURE_RESULT_ERROR; - comRef_scopePush(2); + comRef_scopePush(3); comRef_defineLocal(ID3D12Resource, src); *src = d12_backendFetch(this->backend, frameBufferIndex); @@ -392,11 +443,19 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex, if (!*dst) goto exit; + // place a fence into the compute queue + result = d12_backendSync(this->backend, *this->computeQueue); + if (result != CAPTURE_RESULT_OK) + goto exit; + + ID3D12Resource * next = *src; + next = d12_effectRun( + this->rgb24, *this->device, *this->computeCommand.gfxList, next); + // copy into the framebuffer resource - D3D12_RESOURCE_DESC desc = _ID3D12Resource_GetDesc(*src); D3D12_TEXTURE_COPY_LOCATION srcLoc = { - .pResource = *src, + .pResource = next, .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, .SubresourceIndex = 0 }; @@ -410,11 +469,11 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex, .Offset = 0, .Footprint = { - .Format = desc.Format, - .Width = desc.Width, - .Height = desc.Height, + .Format = this->dstFormat.Format, + .Width = this->dstFormat.Width, + .Height = this->dstFormat.Height, .Depth = 1, - .RowPitch = desc.Width * 4 + .RowPitch = this->dstFormat.Width * 4 } } }; @@ -422,14 +481,21 @@ static CaptureResult d12_getFrame(unsigned frameBufferIndex, ID3D12GraphicsCommandList_CopyTextureRegion( *this->copyCommand.gfxList, &dstLoc, 0, 0, 0, &srcLoc, NULL); - // allow the backend to insert a fence into the command queue if it needs it - result = d12_backendSync(this->backend, *this->commandQueue); - if (result != CAPTURE_RESULT_OK) + // execute all the commands + d12_commandGroupExecute(*this->computeQueue, &this->computeCommand); + d12_commandGroupWait(&this->computeCommand); + if (!d12_commandGroupReset(&this->computeCommand)) goto exit; - d12_commandGroupExecute(*this->commandQueue, &this->copyCommand); + d12_commandGroupExecute(*this->copyQueue , &this->copyCommand ); + d12_commandGroupWait(&this->copyCommand); + if (!d12_commandGroupReset(&this->copyCommand)) + goto exit; + + // signal the frame is complete + framebuffer_set_write_ptr(frameBuffer, + this->dstFormat.Height * this->dstFormat.Width * 4); - framebuffer_set_write_ptr(frameBuffer, desc.Height * desc.Width * 4); result = CAPTURE_RESULT_OK; exit: diff --git a/host/platform/Windows/capture/D12/d12.h b/host/platform/Windows/capture/D12/d12.h index 826d4a17..151ef986 100644 --- a/host/platform/Windows/capture/D12/d12.h +++ b/host/platform/Windows/capture/D12/d12.h @@ -24,6 +24,8 @@ #include "com_ref.h" #include "interface/capture.h" +#include + extern ComScope * d12_comScope; #define comRef_toGlobal(dst, src) \ _comRef_toGlobal(d12_comScope, dst, src) @@ -33,4 +35,72 @@ extern ComScope * d12_comScope; void d12_updatePointer( CapturePointer * pointer, void * shape, size_t shapeSize); +// DirectX12 library functions + +struct DX12 +{ + PFN_D3D12_CREATE_DEVICE D3D12CreateDevice; + PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface; + PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE + D3D12SerializeVersionedRootSignature; +}; + +extern struct DX12 DX12; + +#ifdef ID3D12Heap_GetDesc +#undef ID3D12Heap_GetDesc +static inline D3D12_HEAP_DESC ID3D12Heap_GetDesc(ID3D12Heap* This) +{ + D3D12_HEAP_DESC __ret; + return *This->lpVtbl->GetDesc(This, &__ret); +} +#endif + +#ifdef ID3D12Resource_GetDesc +#undef ID3D12Resource_GetDesc +static inline D3D12_RESOURCE_DESC ID3D12Resource_GetDesc(ID3D12Resource* This) { + D3D12_RESOURCE_DESC __ret; + return *This->lpVtbl->GetDesc(This,&__ret); +} +#endif + +#ifndef ID3DBlob_GetBufferPointer +#define ID3DBlob_GetBufferPointer ID3D10Blob_GetBufferPointer +#endif + +#ifndef ID3DBlob_GetBufferSize +#define ID3DBlob_GetBufferSize ID3D10Blob_GetBufferSize +#endif + +#ifdef ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart +#undef ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart +static inline D3D12_CPU_DESCRIPTOR_HANDLE + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart( + ID3D12DescriptorHeap* This) +{ + D3D12_CPU_DESCRIPTOR_HANDLE __ret; + return *This->lpVtbl->GetCPUDescriptorHandleForHeapStart(This,&__ret); +} +#endif + +#ifdef ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart +#undef ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart +static inline D3D12_GPU_DESCRIPTOR_HANDLE + ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart( + ID3D12DescriptorHeap* This) +{ + D3D12_GPU_DESCRIPTOR_HANDLE __ret; + return *This->lpVtbl->GetGPUDescriptorHandleForHeapStart(This,&__ret); +} +#endif + +#ifdef ID3D12Resource_GetDesc +#undef ID3D12Resource_GetDesc +static inline D3D12_RESOURCE_DESC ID3D12Resource_GetDesc(ID3D12Resource* This) +{ + D3D12_RESOURCE_DESC __ret; + return *This->lpVtbl->GetDesc(This,&__ret); +} +#endif + #endif diff --git a/host/platform/Windows/capture/D12/effect.h b/host/platform/Windows/capture/D12/effect.h new file mode 100644 index 00000000..bd0cad09 --- /dev/null +++ b/host/platform/Windows/capture/D12/effect.h @@ -0,0 +1,75 @@ +/** + * Looking Glass + * Copyright © 2017-2024 The Looking Glass Authors + * https://looking-glass.io + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _H_D12_EFFECT_ +#define _H_D12_EFFECT_ + +#include +#include + +typedef struct D12Effect D12Effect; + +struct D12Effect +{ + const char * name; + + bool (*create)(D12Effect ** instance, ID3D12Device3 * device); + + void (*free)(D12Effect ** instance); + + // set the input format, and get the output format of the effect + bool (*setFormat)(D12Effect * effect, + ID3D12Device3 * device, + const D3D12_RESOURCE_DESC * src, + D3D12_RESOURCE_DESC * dst); + + ID3D12Resource * (*run)(D12Effect * effect, + ID3D12Device3 * device, ID3D12GraphicsCommandList * commandList, + ID3D12Resource * src); +}; + +static inline bool d12_effectCreate(const D12Effect * effect, + D12Effect ** instance, ID3D12Device3 * device) +{ + if (!effect->create(instance, device)) + return false; + memcpy(*instance, effect, sizeof(*effect)); + return true; +} + +static inline void d12_effectFree(D12Effect ** instance) + { (*instance)->free(instance); } + +static inline bool d12_effectSetFormat(D12Effect * effect, + ID3D12Device3 * device, + const D3D12_RESOURCE_DESC * src, + D3D12_RESOURCE_DESC * dst) + { return effect->setFormat(effect, device, src, dst); } + +static inline ID3D12Resource * d12_effectRun(D12Effect * effect, + ID3D12Device3 * device, ID3D12GraphicsCommandList * commandList, + ID3D12Resource * src) + { return effect->run(effect, device, commandList, src); } + +// effect defines + +extern const D12Effect D12Effect_RGB24; + +#endif diff --git a/host/platform/Windows/capture/D12/effect/rgb24.c b/host/platform/Windows/capture/D12/effect/rgb24.c new file mode 100644 index 00000000..d325b20a --- /dev/null +++ b/host/platform/Windows/capture/D12/effect/rgb24.c @@ -0,0 +1,379 @@ +#include "effect.h" + +#include "d12.h" +#include "command_group.h" + +#include "com_ref.h" +#include "common/debug.h" +#include "common/windebug.h" +#include "common/array.h" + +#include + +typedef struct TestInstance +{ + D12Effect base; + + ID3D12RootSignature ** rootSignature; + ID3D12PipelineState ** pso; + ID3D12DescriptorHeap ** descHeap; + + unsigned threadsX, threadsY; + ID3D12Resource ** dst; +} +TestInstance; + +#define THREADS 8 + +static bool d12_effect_rgb24Create(D12Effect ** instance, ID3D12Device3 * device) +{ + TestInstance * this = calloc(1, sizeof(*this)); + if (!this) + { + DEBUG_ERROR("out of memory"); + return false; + } + + bool result = false; + HRESULT hr; + comRef_scopePush(10); + + // shader resource view + D3D12_DESCRIPTOR_RANGE descriptorRanges[2] = + { + { + .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + .NumDescriptors = 1, + .BaseShaderRegister = 0, + .RegisterSpace = 0, + .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND + }, + { + .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV, + .NumDescriptors = 1, + .BaseShaderRegister = 0, + .RegisterSpace = 0, + .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND + } + }; + + // discriptor table + D3D12_ROOT_PARAMETER rootParams[1] = + { + { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + .DescriptorTable = + { + .NumDescriptorRanges = ARRAY_LENGTH(descriptorRanges), + .pDescriptorRanges = descriptorRanges + } + } + }; + + // root signature + D3D12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc = + { + .Version = D3D_ROOT_SIGNATURE_VERSION_1, + .Desc_1_0 = + { + .NumParameters = ARRAY_LENGTH(rootParams), + .pParameters = rootParams, + .NumStaticSamplers = 0, + .pStaticSamplers = NULL, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE + } + }; + + // Serialize the root signature + comRef_defineLocal(ID3DBlob, blob ); + comRef_defineLocal(ID3DBlob, error); + hr = DX12.D3D12SerializeVersionedRootSignature( + &rootSignatureDesc, blob, error); + if (FAILED(hr)) + { + DEBUG_WINERROR("Failed to serialize the root signature", hr); + DEBUG_ERROR("%s", (const char *)ID3DBlob_GetBufferPointer(*error)); + goto exit; + } + + // Create the root signature + comRef_defineLocal(ID3D12RootSignature, rootSignature); + hr = ID3D12Device_CreateRootSignature( + device, + 0, + ID3DBlob_GetBufferPointer(*blob), + ID3DBlob_GetBufferSize(*blob), + &IID_ID3D12RootSignature, + (void **)rootSignature); + if (FAILED(hr)) + { + DEBUG_WINERROR("Failed to create the root signature", hr); + goto exit; + } + + // Compile the shader + const char * testCode = + "Texture2D src : register(t0);\n" + "RWTexture2D dst : register(u0);\n" + "\n" + "[numthreads(" STR(THREADS) ", " STR(THREADS) ", 1)]\n" + "void main(uint3 dt : SV_DispatchThreadID)\n" + "{\n" + " uint fstInputX = (dt.x * 4) / 3;\n" + " float4 color0 = src[uint2(fstInputX, dt.y)];\n" + "\n" + " uint sndInputX = fstInputX + 1;\n" + " float4 color3 = src[uint2(sndInputX, dt.y)];\n" + "\n" + " uint xmod3 = dt.x % 3;\n" + "\n" + " float4 color1 = xmod3 <= 1 ? color0 : color3;\n" + " float4 color2 = xmod3 == 0 ? color0 : color3;\n" + "\n" + " float b = color0.bgr[xmod3];\n" + " float g = color1.grb[xmod3];\n" + " float r = color2.rbg[xmod3];\n" + " float a = color3.bgr[xmod3];\n" + "\n" + " dst[dt.xy] = float4(r, g, b, a);\n" + "}\n"; + + bool debug = false; + hr = D3DCompile( + testCode, strlen(testCode), + NULL, NULL, NULL, "main", "cs_5_0", + debug ? (D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION) : 0, + 0, blob, error); + if (FAILED(hr)) + { + DEBUG_ERROR("Failed to compile the shader"); + DEBUG_ERROR("%s", (const char *)ID3DBlob_GetBufferPointer(*error)); + goto exit; + } + + // Create the PSO + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = + { + .pRootSignature = *rootSignature, + .CS = + { + .pShaderBytecode = ID3DBlob_GetBufferPointer(*blob), + .BytecodeLength = ID3DBlob_GetBufferSize (*blob) + } + }; + + comRef_defineLocal(ID3D12PipelineState, pso); + hr = ID3D12Device3_CreateComputePipelineState( + device, &psoDesc, &IID_ID3D12PipelineState, (void **)pso); + if (FAILED(hr)) + { + DEBUG_WINERROR("Failed to create the PSO", hr); + goto exit; + } + + // Create the descriptor heap + D3D12_DESCRIPTOR_HEAP_DESC descHeapDesc = + { + .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + .NumDescriptors = ARRAY_LENGTH(descriptorRanges), + .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + .NodeMask = 0 + }; + + comRef_defineLocal(ID3D12DescriptorHeap, descHeap); + hr = ID3D12Device3_CreateDescriptorHeap( + device, &descHeapDesc, &IID_ID3D12DescriptorHeap, (void **)descHeap); + if (FAILED(hr)) + { + DEBUG_WINERROR("Failed to create the parameter heap", hr); + goto exit; + } + + comRef_toGlobal(this->rootSignature, rootSignature); + comRef_toGlobal(this->pso , pso ); + comRef_toGlobal(this->descHeap , descHeap ); + + result = true; + +exit: + if (result) + *instance = &this->base; + else + free(this); + + comRef_scopePop(); + return result; +} + +static void d12_effect_rgb24Free(D12Effect ** instance) +{ + TestInstance * this = UPCAST(TestInstance, *instance); + + free(this); +} + +static bool d12_effect_rgb24SetFormat(D12Effect * effect, + ID3D12Device3 * device, + const D3D12_RESOURCE_DESC * src, + D3D12_RESOURCE_DESC * dst) +{ + TestInstance * this = UPCAST(TestInstance, effect); + comRef_scopePush(1); + + bool result = false; + HRESULT hr; + + if (src->Format != DXGI_FORMAT_B8G8R8A8_UNORM) + { + DEBUG_ERROR("RGB24 requires DXGI_FORMAT_B8G8R8A8_UNORM input"); + goto exit; + } + + D3D12_HEAP_PROPERTIES heapProps = + { + .Type = D3D12_HEAP_TYPE_DEFAULT, + .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN, + .CreationNodeMask = 1, + .VisibleNodeMask = 1 + }; + + const unsigned packedPitch = ALIGN_TO(src->Width * 3, 4); + D3D12_RESOURCE_DESC desc = + { + .Format = DXGI_FORMAT_B8G8R8A8_UNORM, + .Width = ALIGN_TO(packedPitch / 4, 64), + .Height = (src->Width * src->Height) / (packedPitch / 3), + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, + .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, + .MipLevels = 1, + .DepthOrArraySize = 1, + .SampleDesc.Count = 1 + }; + + comRef_defineLocal(ID3D12Resource, res); + hr = ID3D12Device3_CreateCommittedResource( + device, &heapProps, D3D12_HEAP_FLAG_CREATE_NOT_ZEROED, &desc, + D3D12_RESOURCE_STATE_COPY_SOURCE, NULL, &IID_ID3D12Resource, + (void **)res); + + if (FAILED(hr)) + { + DEBUG_ERROR("Failed to create the destination texture"); + goto exit; + } + + comRef_toGlobal(this->dst, res); + this->threadsX = (desc.Width + (THREADS-1)) / THREADS; + this->threadsY = (desc.Height + (THREADS-1)) / THREADS; + + *dst = desc; + result = true; + +exit: + comRef_scopePop(); + return result; +} + +static ID3D12Resource * d12_effect_rgb24Run(D12Effect * effect, + ID3D12Device3 * device, ID3D12GraphicsCommandList * commandList, + ID3D12Resource * src) +{ + TestInstance * this = UPCAST(TestInstance, effect); + + // transition the destination texture to unordered access so we can write to it + { + D3D12_RESOURCE_BARRIER barrier = + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = + { + .pResource = *this->dst, + .StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE, + .StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES + } + }; + ID3D12GraphicsCommandList_ResourceBarrier(commandList, 1, &barrier); + } + + // get the heap handle + D3D12_CPU_DESCRIPTOR_HANDLE cpuSrvUavHandle = + ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(*this->descHeap); + + // descriptor for input SRV + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = + { + .Format = DXGI_FORMAT_B8G8R8A8_UNORM, + .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Texture2D.MipLevels = 1 + }; + ID3D12Device3_CreateShaderResourceView( + device, src, &srvDesc, cpuSrvUavHandle); + + // move to the next slot + cpuSrvUavHandle.ptr += ID3D12Device3_GetDescriptorHandleIncrementSize( + device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + // descriptor for the output UAV + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = + { + .Format = DXGI_FORMAT_B8G8R8A8_UNORM, + .ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D + }; + ID3D12Device3_CreateUnorderedAccessView( + device, *this->dst, NULL, &uavDesc, cpuSrvUavHandle); + + // bind the descriptor heap to the pipeline + ID3D12GraphicsCommandList_SetDescriptorHeaps(commandList, 1, this->descHeap); + + // set the pipeline state + ID3D12GraphicsCommandList_SetPipelineState(commandList, *this->pso); + + // set the root signature on the command list + ID3D12GraphicsCommandList_SetComputeRootSignature( + commandList, *this->rootSignature); + + // get the GPU side handle for our heap + D3D12_GPU_DESCRIPTOR_HANDLE gpuSrvUavHandle = + ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(*this->descHeap); + + // bind the descriptor tables to the root signature + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable( + commandList, 0, gpuSrvUavHandle); + + ID3D12GraphicsCommandList_Dispatch( + commandList, this->threadsX, this->threadsY, 1); + + // transition the destination texture to a copy source for the next stage + { + D3D12_RESOURCE_BARRIER barrier = + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = + { + .pResource = *this->dst, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES + } + }; + ID3D12GraphicsCommandList_ResourceBarrier(commandList, 1, &barrier); + } + + // return the output buffer + return *this->dst; +} + +const D12Effect D12Effect_RGB24 = +{ + .name = "RGB24", + .create = d12_effect_rgb24Create, + .free = d12_effect_rgb24Free, + .setFormat = d12_effect_rgb24SetFormat, + .run = d12_effect_rgb24Run +};