diff --git a/common/include/common/KVMFR.h b/common/include/common/KVMFR.h index 54fe5b11..abfb32f6 100644 --- a/common/include/common/KVMFR.h +++ b/common/include/common/KVMFR.h @@ -28,7 +28,7 @@ #include "types.h" #define KVMFR_MAGIC "KVMFR---" -#define KVMFR_VERSION 19 +#define KVMFR_VERSION 20 #define KVMFR_MAX_DAMAGE_RECTS 64 @@ -149,6 +149,8 @@ typedef struct KVMFRFrame FrameType type; // the frame data type uint32_t screenWidth; // the client's screen width uint32_t screenHeight; // the client's screen height + uint32_t dataWidth; // the packed width of the frame data + uint32_t dataHeight; // the packed height of the frame data uint32_t frameWidth; // the frame width uint32_t frameHeight; // the frame height FrameRotation rotation; // the frame rotation diff --git a/common/include/common/types.h b/common/include/common/types.h index f266d955..afdd61d9 100644 --- a/common/include/common/types.h +++ b/common/include/common/types.h @@ -55,6 +55,7 @@ typedef enum FrameType FRAME_TYPE_RGBA , // RGBA interleaved: R,G,B,A 32bpp FRAME_TYPE_RGBA10 , // RGBA interleaved: R,G,B,A 10,10,10,2 bpp FRAME_TYPE_RGBA16F , // RGBA interleaved: R,G,B,A 16,16,16,16 bpp float + FRAME_TYPE_BGR , // BGR interleaved: B,G,R 24bpp FRAME_TYPE_MAX , // sentinel value } FrameType; diff --git a/common/src/KVMFR.c b/common/src/KVMFR.c index 81eb8a7c..01c26cd9 100644 --- a/common/src/KVMFR.c +++ b/common/src/KVMFR.c @@ -26,5 +26,6 @@ const char * FrameTypeStr[FRAME_TYPE_MAX] = "FRAME_TYPE_BGRA", "FRAME_TYPE_RGBA", "FRAME_TYPE_RGBA10", - "FRAME_TYPE_RGBA16F" + "FRAME_TYPE_RGBA16F", + "FRAME_TYPE_BGR" }; diff --git a/host/include/interface/capture.h b/host/include/interface/capture.h index cffd760d..9fb4ca22 100644 --- a/host/include/interface/capture.h +++ b/host/include/interface/capture.h @@ -48,6 +48,7 @@ typedef enum CaptureFormat CAPTURE_FMT_RGBA , CAPTURE_FMT_RGBA10 , CAPTURE_FMT_RGBA16F, + CAPTURE_FMT_BGR , // pointer formats CAPTURE_FMT_COLOR , @@ -69,18 +70,21 @@ CaptureRotation; typedef struct CaptureFrame { - unsigned int formatVer; - unsigned int screenWidth; - unsigned int screenHeight; - unsigned int frameWidth; - unsigned int frameHeight; - bool truncated; - unsigned int pitch; - unsigned int stride; - CaptureFormat format; - bool hdr; - bool hdrPQ; - CaptureRotation rotation; + unsigned formatVer; + unsigned screenWidth; // actual screen width + unsigned screenHeight; // actual screen height + unsigned dataWidth; // the width of the packed frame data + unsigned dataHeight; // the height of the packed frame data + unsigned frameWidth; // width of the frame image + unsigned frameHeight; // height of the frame image + unsigned pitch; // total width of one row of data in bytes + unsigned stride; // total width of one row of data in pixels + CaptureFormat format; // the data format of the frame + bool truncated; // true if the frame data is truncated + bool hdr; // true if the frame format is HDR + bool hdrPQ; // true if the frame format is PQ transformed + CaptureRotation rotation; // output rotation of the frame + uint32_t damageRectsCount; FrameDamageRect damageRects[KVMFR_MAX_DAMAGE_RECTS]; } @@ -94,9 +98,9 @@ typedef struct CapturePointer bool shapeUpdate; CaptureFormat format; - unsigned int hx, hy; - unsigned int width, height; - unsigned int pitch; + unsigned hx, hy; + unsigned width, height; + unsigned pitch; } CapturePointer; @@ -123,6 +127,6 @@ typedef struct CaptureInterface CaptureResult (*capture )(void); CaptureResult (*waitFrame )(CaptureFrame * frame, const size_t maxFrameSize); - CaptureResult (*getFrame )(FrameBuffer * frame, const unsigned int height, int frameIndex); + CaptureResult (*getFrame )(FrameBuffer * frame, int frameIndex); } CaptureInterface; diff --git a/host/platform/Windows/capture/DXGI/CMakeLists.txt b/host/platform/Windows/capture/DXGI/CMakeLists.txt index 6b78bbcd..1b60dc2f 100644 --- a/host/platform/Windows/capture/DXGI/CMakeLists.txt +++ b/host/platform/Windows/capture/DXGI/CMakeLists.txt @@ -10,6 +10,7 @@ add_library(capture_DXGI STATIC src/com_ref.c src/pp/sdrwhitelevel.c + src/pp/rgb24.c ) add_definitions("-DCOBJMACROS -DINITGUID") diff --git a/host/platform/Windows/capture/DXGI/src/d3d11.c b/host/platform/Windows/capture/DXGI/src/d3d11.c index e6ec122b..d931c1b9 100644 --- a/host/platform/Windows/capture/DXGI/src/d3d11.c +++ b/host/platform/Windows/capture/DXGI/src/d3d11.c @@ -35,9 +35,7 @@ struct D3D11Backend struct D3D11TexImpl { - ID3D11Texture2D ** gpu; - ID3D11Texture2D ** cpu; - ID3D11ShaderResourceView ** srv; + ID3D11Texture2D ** cpu; }; #define TEXIMPL(x) ((struct D3D11TexImpl *)(x).impl) @@ -49,7 +47,6 @@ static void d3d11_free(void); static bool d3d11_create(struct DXGIInterface * intf) { - HRESULT status; dxgi = intf; DEBUG_ASSERT(!this); @@ -61,32 +58,24 @@ static bool d3d11_create(struct DXGIInterface * intf) } this->avgMapTime = runningavg_new(10); + return true; +} - D3D11_TEXTURE2D_DESC gpuTexDesc = - { - .Width = dxgi->width, - .Height = dxgi->height, - .MipLevels = dxgi->downsampleLevel + 1, - .ArraySize = 1, - .SampleDesc.Count = 1, - .SampleDesc.Quality = 0, - .Usage = D3D11_USAGE_DEFAULT, - .Format = dxgi->dxgiFormat, - .BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE, - .CPUAccessFlags = 0, - .MiscFlags = D3D11_RESOURCE_MISC_GENERATE_MIPS - }; +static bool d3d11_configure(unsigned width, unsigned height, DXGI_FORMAT format, + unsigned * pitch) +{ + HRESULT status; D3D11_TEXTURE2D_DESC cpuTexDesc = { - .Width = dxgi->targetWidth, - .Height = dxgi->targetHeight, + .Width = width, + .Height = height, .MipLevels = 1, .ArraySize = 1, .SampleDesc.Count = 1, .SampleDesc.Quality = 0, .Usage = D3D11_USAGE_STAGING, - .Format = dxgi->dxgiFormat, + .Format = format, .BindFlags = 0, .CPUAccessFlags = D3D11_CPU_ACCESS_READ, .MiscFlags = 0 @@ -111,22 +100,6 @@ static bool d3d11_create(struct DXGIInterface * intf) DEBUG_WINERROR("Failed to create CPU texture", status); goto fail; } - - if (!dxgi->downsampleLevel) - continue; - - status = ID3D11Device_CreateTexture2D(*dxgi->device, &gpuTexDesc, NULL, - (ID3D11Texture2D **)comRef_newGlobal(&teximpl->gpu)); - - if (FAILED(status)) - { - DEBUG_WINERROR("Failed to create GPU texture", status); - goto fail; - } - - ID3D11Device_CreateShaderResourceView(*dxgi->device, - *(ID3D11Resource **)teximpl->gpu, NULL, - (ID3D11ShaderResourceView **)comRef_newGlobal(&teximpl->srv)); } // map the texture simply to get the pitch and stride @@ -141,16 +114,13 @@ static bool d3d11_create(struct DXGIInterface * intf) goto fail; } - dxgi->pitch = mapping.RowPitch; - dxgi->stride = mapping.RowPitch / dxgi->bpp; - ID3D11DeviceContext_Unmap(*dxgi->deviceContext, *(ID3D11Resource **)TEXIMPL(dxgi->texture[0])->cpu, 0); + *pitch = mapping.RowPitch; return true; fail: - d3d11_free(); return false; } @@ -199,76 +169,12 @@ static void copyFrameFull(Texture * tex, ID3D11Texture2D * src) } } -static void copyFrameDownsampled(Texture * tex, ID3D11Texture2D * src) -{ - struct D3D11TexImpl * teximpl = TEXIMPL(*tex); - ID3D11Texture2D * dst = *teximpl->gpu; - - if (tex->texDamageCount < 0) - ID3D11DeviceContext_ResolveSubresource(*dxgi->deviceContext, - (ID3D11Resource *)dst, 0, - (ID3D11Resource *)src, 0, - dxgi->dxgiFormat); - else - { - for (int i = 0; i < tex->texDamageCount; ++i) - { - FrameDamageRect * rect = tex->texDamageRects + i; - D3D11_BOX box = - { - .left = rect->x << dxgi->downsampleLevel, - .top = rect->y << dxgi->downsampleLevel, - .front = 0, - .back = 1, - .right = (rect->x + rect->width ) << dxgi->downsampleLevel, - .bottom = (rect->y + rect->height) << dxgi->downsampleLevel, - }; - ID3D11DeviceContext_CopySubresourceRegion(*dxgi->deviceContext, - (ID3D11Resource *)dst, 0, box.left, box.top, 0, - (ID3D11Resource *)src, 0, &box); - } - } - - ID3D11DeviceContext_GenerateMips(*dxgi->deviceContext, *teximpl->srv); - if (tex->texDamageCount < 0) - ID3D11DeviceContext_CopySubresourceRegion(*dxgi->deviceContext, - *(ID3D11Resource **)teximpl->cpu, 0, 0, 0, 0, - (ID3D11Resource * )dst , dxgi->downsampleLevel, NULL); - else - { - for (int i = 0; i < tex->texDamageCount; ++i) - { - FrameDamageRect * rect = tex->texDamageRects + i; - D3D11_BOX box = - { - .left = rect->x, - .top = rect->y, - .front = 0, - .back = 1, - .right = rect->x + rect->width , - .bottom = rect->y + rect->height, - }; - - ID3D11DeviceContext_CopySubresourceRegion(*dxgi->deviceContext, - *(ID3D11Resource **)teximpl->cpu, 0, box.left, box.top, 0, - (ID3D11Resource * )dst , dxgi->downsampleLevel, &box); - } - } -} - static bool d3d11_copyFrame(Texture * tex, ID3D11Texture2D * src) { - struct D3D11TexImpl * teximpl = TEXIMPL(*tex); - INTERLOCKED_SECTION(dxgi->deviceContextLock, { tex->copyTime = microtime(); - - if (teximpl->gpu && *teximpl->gpu) - copyFrameDownsampled(tex, src); - else - copyFrameFull(tex, src); - + copyFrameFull(tex, src); ID3D11DeviceContext_Flush(*dxgi->deviceContext); }); return true; @@ -339,6 +245,7 @@ struct DXGICopyBackend copyBackendD3D11 = { .name = "Direct3D 11", .code = "d3d11", .create = d3d11_create, + .configure = d3d11_configure, .free = d3d11_free, .copyFrame = d3d11_copyFrame, .mapTexture = d3d11_mapTexture, diff --git a/host/platform/Windows/capture/DXGI/src/d3d12.c b/host/platform/Windows/capture/DXGI/src/d3d12.c index 3eb3dd47..f953602e 100644 --- a/host/platform/Windows/capture/DXGI/src/d3d12.c +++ b/host/platform/Windows/capture/DXGI/src/d3d12.c @@ -95,8 +95,8 @@ static bool d3d12_create(struct DXGIInterface * intf) { DEBUG_WARN("The D3D12 backend does not support downsampling yet"); dxgi->downsampleLevel = 0; - dxgi->targetWidth = dxgi->width; - dxgi->targetHeight = dxgi->height; + dxgi->outputWidth = dxgi->width; + dxgi->outputHeight = dxgi->height; } if (dxgi->debug) diff --git a/host/platform/Windows/capture/DXGI/src/dxgi.c b/host/platform/Windows/capture/DXGI/src/dxgi.c index 2d26caa9..80b8e52c 100644 --- a/host/platform/Windows/capture/DXGI/src/dxgi.c +++ b/host/platform/Windows/capture/DXGI/src/dxgi.c @@ -51,6 +51,7 @@ //post processers extern const DXGIPostProcess DXGIPP_SDRWhiteLevel; +extern const DXGIPostProcess DXGIPP_RGB24; typedef struct { @@ -58,6 +59,9 @@ typedef struct ID3D11ShaderResourceView * srv; const DXGIPostProcess * pp; void * opaque; + + bool configured; + int rows, cols; } PostProcessInstance; @@ -89,7 +93,10 @@ static bool dxgi_deinit(void); static CaptureResult dxgi_releaseFrame(void); static bool ppInit(const DXGIPostProcess * pp, bool shareable); -static ID3D11Texture2D * ppRun(Texture * tex, ID3D11Texture2D * src); +static ID3D11Texture2D * ppRun(Texture * tex, ID3D11Texture2D * src, + int * width, int * height, + int * rows , int * cols, + CaptureFormat * format); static void ppFreeAll(void); // implementation @@ -301,7 +308,7 @@ static bool initVertexShader(void) // compile and create the vertex shader comRef_defineLocal(ID3DBlob, byteCode); - if (!compileShader(byteCode, "main", "vs_5_0", vshaderSrc)) + if (!compileShader(byteCode, "main", "vs_5_0", vshaderSrc, NULL)) return false; comRef_defineLocal(ID3D11VertexShader, vshader); @@ -530,7 +537,8 @@ static bool dxgi_init(void) *tmp, D3D_DRIVER_TYPE_UNKNOWN, NULL, - D3D11_CREATE_DEVICE_VIDEO_SUPPORT, + D3D11_CREATE_DEVICE_VIDEO_SUPPORT | + (this->debug ? D3D11_CREATE_DEVICE_DEBUG : 0), featureLevels, featureLevelCount, D3D11_SDK_VERSION, (ID3D11Device **)comRef_newGlobal(&this->device), @@ -729,7 +737,7 @@ static bool dxgi_init(void) this->hdr = this->dxgiColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; } - this->bpp = 4; + // set the initial format switch(this->dxgiFormat) { case DXGI_FORMAT_B8G8R8A8_UNORM: @@ -740,10 +748,12 @@ static bool dxgi_init(void) this->format = CAPTURE_FMT_RGBA; break; - // we convert to HDR10 to save bandwidth + case DXGI_FORMAT_R10G10B10A2_UNORM: + this->format = CAPTURE_FMT_RGBA10; + break; + case DXGI_FORMAT_R16G16B16A16_FLOAT: - this->dxgiFormat = DXGI_FORMAT_R10G10B10A2_UNORM; - this->format = CAPTURE_FMT_RGBA10; + this->format = CAPTURE_FMT_RGBA16F; break; default: @@ -752,8 +762,8 @@ static bool dxgi_init(void) } this->downsampleLevel = 0; - this->targetWidth = this->width; - this->targetHeight = this->height; + this->outputWidth = this->width; + this->outputHeight = this->height; DownsampleRule * rule, * match = NULL; vector_forEachRef(rule, &downsampleRules) @@ -770,11 +780,11 @@ static bool dxgi_init(void) { DEBUG_INFO("Matched downsample rule %d", rule->id); this->downsampleLevel = match->level; - this->targetWidth >>= match->level; - this->targetHeight >>= match->level; + this->outputWidth >>= match->level; + this->outputHeight >>= match->level; } - DEBUG_INFO("Request Size : %u x %u", this->targetWidth, this->targetHeight); + DEBUG_INFO("Request Size : %u x %u", this->outputWidth, this->outputHeight); const char * copyBackend = option_get_string("dxgi", "copyBackend"); for (int i = 0; i < ARRAY_LENGTH(backends); ++i) @@ -792,7 +802,7 @@ static bool dxgi_init(void) } } - DEBUG_INFO("Output Size : %u x %u", this->targetWidth, this->targetHeight); + DEBUG_INFO("Output Size : %u x %u", this->outputWidth, this->outputHeight); if (!this->backend) { @@ -812,23 +822,21 @@ static bool dxgi_init(void) if (!initVertexShader()) goto fail; - const D3D11_VIEWPORT vp = - { - .TopLeftX = 0.0f, - .TopLeftY = 0.0f, - .Width = this->width, - .Height = this->height, - .MinDepth = 0.0f, - .MaxDepth = 1.0f, - }; - ID3D11DeviceContext_RSSetViewports(*this->deviceContext, 1, &vp); - // if HDR add the SDRWhiteLevel post processor to correct the output - if (this->hdr && !ppInit(&DXGIPP_SDRWhiteLevel, - this->backend != ©BackendD3D11)) + if (this->hdr) { - DEBUG_ERROR("Failed to initialize the SDRWhiteLevel post processor"); - goto fail; + if (!ppInit(&DXGIPP_SDRWhiteLevel, this->backend != ©BackendD3D11)) + { + DEBUG_ERROR("Failed to initialize the SDRWhiteLevel post processor"); + goto fail; + } + } + else + { + // only support DX11 for this atm + if (this->backend == ©BackendD3D11) + if (!ppInit(&DXGIPP_RGB24, false)) + DEBUG_WARN("Failed to initialize the RGB24 post processor"); } for (int i = 0; i < LGMP_Q_FRAME_LEN; ++i) @@ -874,6 +882,7 @@ static bool dxgi_deinit(void) ppFreeAll(); if (this->backend) { + this->backendConfigured = false; this->backend->free(); this->backend = NULL; } @@ -1121,9 +1130,75 @@ static CaptureResult dxgi_capture(void) computeFrameDamage(tex); computeTexDamage(tex); - ID3D11Texture2D * dst = ppRun(tex, *src); - if (dst != *src) - ID3D11DeviceContext_Flush(*this->deviceContext); + // run any postprocessors + int width = this->width; + int height = this->height; + int cols = this->width; + int rows = this->height; + CaptureFormat format = this->format; + ID3D11Texture2D *dst = ppRun( + tex, *src, &width, &height, &cols, &rows, &format); + + if (!this->backendConfigured) + { + switch(format) + { + case CAPTURE_FMT_RGBA: + this->bpp = 4; + this->dxgiFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + + case CAPTURE_FMT_BGRA: + this->bpp = 4; + this->dxgiFormat = DXGI_FORMAT_B8G8R8A8_UNORM; + break; + + case CAPTURE_FMT_RGBA10: + this->bpp = 4; + this->dxgiFormat = DXGI_FORMAT_R10G10B10A2_UNORM; + break; + + case CAPTURE_FMT_RGBA16F: + this->bpp = 8; + this->dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + + case CAPTURE_FMT_BGR: + this->bpp = 4; + this->dxgiFormat = DXGI_FORMAT_B8G8R8A8_UNORM; + break; + + case CAPTURE_FMT_COLOR : + case CAPTURE_FMT_MONO : + case CAPTURE_FMT_MASKED: + case CAPTURE_FMT_MAX : + DEBUG_ERROR("Unsupported input format"); + result = CAPTURE_RESULT_ERROR; + goto exit; + } + + unsigned pitch = 0; + LG_LOCK(this->deviceContextLock); + if (!this->backend->configure(cols, rows, this->dxgiFormat, &pitch)) + { + LG_UNLOCK(this->deviceContextLock); + DEBUG_ERROR("Failed to configure the copy backend"); + result = CAPTURE_RESULT_ERROR; + goto exit; + } + LG_UNLOCK(this->deviceContextLock); + + DEBUG_ASSERT(pitch && "copy backend did not return the pitch"); + + this->backendConfigured = true; + this->outputWidth = width; + this->outputHeight = height; + this->outputFormat = format; + this->dataWidth = cols; + this->dataHeight = rows; + this->pitch = pitch; + this->stride = pitch / this->bpp; + } if (!this->backend->copyFrame(tex, dst)) { @@ -1265,17 +1340,19 @@ static CaptureResult dxgi_waitFrame(CaptureFrame * frame, const size_t maxFrameS tex->state = TEXTURE_STATE_MAPPED; - const unsigned int maxHeight = maxFrameSize / this->pitch; + const unsigned int maxRows = maxFrameSize / this->pitch; frame->formatVer = tex->formatVer; frame->screenWidth = this->width; frame->screenHeight = this->height; - frame->frameWidth = this->targetWidth; - frame->frameHeight = min(maxHeight, this->targetHeight); - frame->truncated = maxHeight < this->targetHeight; + frame->dataWidth = this->dataWidth; + frame->dataHeight = min(maxRows, this->dataHeight); + frame->frameWidth = this->outputWidth; + frame->frameHeight = this->outputHeight; + frame->truncated = maxRows < this->dataHeight; frame->pitch = this->pitch; frame->stride = this->stride; - frame->format = this->format; + frame->format = this->outputFormat; frame->hdr = this->hdr; frame->hdrPQ = false; frame->rotation = this->rotation; @@ -1288,8 +1365,7 @@ static CaptureResult dxgi_waitFrame(CaptureFrame * frame, const size_t maxFrameS return CAPTURE_RESULT_OK; } -static CaptureResult dxgi_getFrame(FrameBuffer * frame, - const unsigned int height, int frameIndex) +static CaptureResult dxgi_getFrame(FrameBuffer * frame, int frameIndex) { DEBUG_ASSERT(this); DEBUG_ASSERT(this->initialized); @@ -1301,14 +1377,14 @@ static CaptureResult dxgi_getFrame(FrameBuffer * frame, damage->count + tex->damageRectsCount > KVMFR_MAX_DAMAGE_RECTS; if (damageAll) - framebuffer_write(frame, tex->map, this->pitch * height); + framebuffer_write(frame, tex->map, this->pitch * this->dataHeight); else { memcpy(damage->rects + damage->count, tex->damageRects, tex->damageRectsCount * sizeof(*tex->damageRects)); damage->count += tex->damageRectsCount; rectsBufferToFramebuffer(damage->rects, damage->count, this->bpp, frame, - this->pitch, height, tex->map, this->pitch); + this->pitch, this->dataHeight, tex->map, this->pitch); } for (int i = 0; i < LGMP_Q_FRAME_LEN; ++i) @@ -1373,22 +1449,27 @@ static CaptureResult dxgi_releaseFrame(void) static bool ppInit(const DXGIPostProcess * pp, bool shareable) { - if (!pp->setup(this->device, this->deviceContext, this->output)) + if (!pp->setup(this->device, this->deviceContext, this->output, shareable)) return false; for(int i = 0; i < this->maxTextures; ++i) { PostProcessInstance inst = { .pp = pp }; - if (!pp->init(&inst.opaque, this->width, this->height, shareable)) + if (!pp->init(&inst.opaque)) + { + DEBUG_ERROR("Failed to init a post processor"); return false; - + } vector_push(&this->texture[i].pp, &inst); } return true; } -static ID3D11Texture2D * ppRun(Texture * tex, ID3D11Texture2D * src) +static ID3D11Texture2D * ppRun(Texture * tex, ID3D11Texture2D * src, + int * width, int * height, + int * rows, int * cols, + CaptureFormat * format) { PostProcessInstance * inst; vector_forEachRef(inst, &tex->pp) @@ -1419,12 +1500,42 @@ static ID3D11Texture2D * ppRun(Texture * tex, ID3D11Texture2D * src) if (FAILED(status)) { DEBUG_WINERROR("Failed to create the source resource view", status); - continue; + return NULL; } inst->src = src; } + LG_LOCK(this->deviceContextLock); + if (!inst->configured) + { + if (!inst->pp->configure(inst->opaque, + width, height, + cols , rows, + format)) + { + LG_UNLOCK(this->deviceContextLock); + DEBUG_ERROR("setFormat failed on a post processor"); + return NULL; + } + + inst->configured = true; + inst->rows = *rows; + inst->cols = *cols; + } + + // set the viewport + const D3D11_VIEWPORT vp = + { + .TopLeftX = 0.0f, + .TopLeftY = 0.0f, + .Width = inst->cols, + .Height = inst->rows, + .MinDepth = 0.0f, + .MaxDepth = 1.0f, + }; + ID3D11DeviceContext_RSSetViewports(*this->deviceContext, 1, &vp); + // set the vertex shader ID3D11DeviceContext_VSSetShader( *this->deviceContext, *this->vshader, NULL, 0); @@ -1432,9 +1543,19 @@ static ID3D11Texture2D * ppRun(Texture * tex, ID3D11Texture2D * src) // run the post processor ID3D11Texture2D * out = inst->pp->run(inst->opaque, inst->srv); - // if the post processor returned a different texture then draw to run it + // if the post processor failed + if (!out) + { + LG_UNLOCK(this->deviceContextLock); + return NULL; + } + + // if the post processor did nothing, just continue if (out == src) + { + LG_UNLOCK(this->deviceContextLock); continue; + } // draw the full screen quad ID3D11DeviceContext_IASetPrimitiveTopology( @@ -1443,6 +1564,7 @@ static ID3D11Texture2D * ppRun(Texture * tex, ID3D11Texture2D * src) // the output is now the input src = out; + LG_UNLOCK(this->deviceContextLock); } return src; diff --git a/host/platform/Windows/capture/DXGI/src/dxgi_capture.h b/host/platform/Windows/capture/DXGI/src/dxgi_capture.h index 6b7572f4..269fd756 100644 --- a/host/platform/Windows/capture/DXGI/src/dxgi_capture.h +++ b/host/platform/Windows/capture/DXGI/src/dxgi_capture.h @@ -96,19 +96,21 @@ struct DXGIInterface DXGI_COLOR_SPACE_TYPE dxgiColorSpace; ID3D11VertexShader ** vshader; struct DXGICopyBackend * backend; + bool backendConfigured; CaptureGetPointerBuffer getPointerBufferFn; CapturePostPointerBuffer postPointerBufferFn; LGEvent * frameEvent; unsigned int formatVer; - unsigned int width , targetWidth ; - unsigned int height, targetHeight; + unsigned int width , outputWidth , dataWidth; + unsigned int height, outputHeight, dataHeight; unsigned int downsampleLevel; unsigned int pitch; unsigned int stride; + unsigned int padding; unsigned int bpp; - CaptureFormat format; + CaptureFormat format, outputFormat; CaptureRotation rotation; int lastPointerX, lastPointerY; @@ -122,6 +124,8 @@ struct DXGICopyBackend const char * name; const char * code; bool (*create)(struct DXGIInterface * intf); + bool (*configure)(unsigned width, unsigned height, + DXGI_FORMAT format, unsigned * pitch); void (*free)(void); bool (*copyFrame)(Texture * tex, ID3D11Texture2D * src); CaptureResult (*mapTexture)(Texture * tex); diff --git a/host/platform/Windows/capture/DXGI/src/pp.h b/host/platform/Windows/capture/DXGI/src/pp.h index 4a537807..71ac36b4 100644 --- a/host/platform/Windows/capture/DXGI/src/pp.h +++ b/host/platform/Windows/capture/DXGI/src/pp.h @@ -21,6 +21,9 @@ #include #include +#include "interface/capture.h" +#include "common/locking.h" + typedef struct { /* the friendly name of the processor for debugging */ @@ -33,14 +36,17 @@ typedef struct bool (*setup)( ID3D11Device ** device, ID3D11DeviceContext ** context, - IDXGIOutput ** output); + IDXGIOutput ** output, + bool shareable); /* instance initialization */ - bool (*init)( - void ** opaque, - int width, - int height, - bool shareable); + bool (*init)(void ** opaque); + + /* showtime configuration */ + bool (*configure)(void * opaque, + int * width, int * height, // the image dimensions + int * cols , int * rows , // the texture dimensions for packed data + CaptureFormat * type); /* perform the processing */ ID3D11Texture2D * (*run)(void * opaque, ID3D11ShaderResourceView * srv); diff --git a/host/platform/Windows/capture/DXGI/src/pp/rgb24.c b/host/platform/Windows/capture/DXGI/src/pp/rgb24.c new file mode 100644 index 00000000..59cde845 --- /dev/null +++ b/host/platform/Windows/capture/DXGI/src/pp/rgb24.c @@ -0,0 +1,251 @@ +/** + * Looking Glass + * Copyright © 2017-2023 The Looking Glass Authors + * https://looking-glass.io + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "pp.h" +#include "com_ref.h" +#include "util.h" + +#include "common/debug.h" +#include "common/windebug.h" + +#include + +typedef struct RGB24 +{ + ID3D11Device ** device; + ID3D11DeviceContext ** context; + bool shareable; + + int size; + ID3D11PixelShader ** pshader; +} +RGB24; +static RGB24 this = {0}; + +typedef struct +{ + ID3D11Texture2D ** tex; + ID3D11RenderTargetView ** target; +} +RGB24Inst; + + +static bool rgb24_setup( + ID3D11Device ** device, + ID3D11DeviceContext ** context, + IDXGIOutput ** output, + bool shareable +) +{ + this.device = device; + this.context = context; + this.shareable = shareable; + return true; +} + +static void rgb24_finish(void) +{ + memset(&this, 0, sizeof(this)); +} + +static bool rgb24_configure(void * opaque, + int * width, int * height, + int * cols , int * rows , + CaptureFormat * format) +{ + RGB24Inst * inst = (RGB24Inst *)opaque; + + HRESULT status; + comRef_scopePush(); + + if (!this.pshader) + { + int pixels = (*cols * *rows) * 3 / 4; + this.size = (((int)ceil(sqrt(pixels))) + 0x3F) & ~0x3F; + + char sWidth[6], sHeight[6], sSize[6]; + snprintf(sWidth , sizeof(sWidth ), "%d", *width ); + snprintf(sHeight, sizeof(sHeight), "%d", *height); + snprintf(sSize , sizeof(sSize ), "%d", this.size); + + const D3D_SHADER_MACRO defines[] = + { + {"INPUT_WIDTH" , sWidth }, + {"INPUT_HEIGHT" , sHeight}, + {"OUTPUT_SIZE" , sSize }, + {NULL, NULL} + }; + + static const char * pshaderSrc = + "Texture2D gInputTexture : register(t0);\n" + "\n" + "float4 main(\n" + " float4 position : SV_POSITION,\n" + " float2 texCoord : TEXCOORD0) : SV_TARGET\n" + "{\n" + " uint outputIdx = uint(texCoord.y * OUTPUT_SIZE) * OUTPUT_SIZE +\n" + " uint(texCoord.x * OUTPUT_SIZE);\n" + "\n" + " uint fst = (outputIdx * 4) / 3;\n" + " float4 color0 = gInputTexture.Load(\n" + " uint3(fst % INPUT_WIDTH, fst / INPUT_WIDTH, 0));\n" + "\n" + " uint snd = fst + 1;\n" + " float4 color3 = gInputTexture.Load(\n" + " uint3(snd % INPUT_WIDTH, snd / INPUT_WIDTH, 0));\n" + "\n" + " uint outputIdxMod3 = outputIdx % 3;\n" + "\n" + " float4 color1 = outputIdxMod3 <= 1 ? color0 : color3;\n" + " float4 color2 = outputIdxMod3 == 0 ? color0 : color3;\n" + "\n" + " float b = color0.bgr[outputIdxMod3];\n" + " float g = color1.grb[outputIdxMod3];\n" + " float r = color2.rbg[outputIdxMod3];\n" + " float a = color3.bgr[outputIdxMod3];\n" + " return float4(r, g, b, a);\n" + "}\n"; + + comRef_defineLocal(ID3DBlob, byteCode); + if (!compileShader(byteCode, "main", "ps_5_0", pshaderSrc, defines)) + goto fail; + + comRef_defineLocal(ID3D11PixelShader, pshader); + HRESULT status = ID3D11Device_CreatePixelShader( + *this.device, + ID3D10Blob_GetBufferPointer(*byteCode), + ID3D10Blob_GetBufferSize (*byteCode), + NULL, + pshader); + + if (FAILED(status)) + { + DEBUG_WINERROR("Failed to create the pixel shader", status); + goto fail; + } + + comRef_toGlobal(this.pshader, pshader); + } + + // This texture is actually going to contain the packed BGR24 output + D3D11_TEXTURE2D_DESC texDesc = + { + .Width = this.size, + .Height = this.size, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .SampleDesc.Quality = 0, + .Usage = D3D11_USAGE_DEFAULT, + .Format = DXGI_FORMAT_B8G8R8A8_UNORM, + .BindFlags = D3D11_BIND_RENDER_TARGET | + D3D11_BIND_SHADER_RESOURCE, + .CPUAccessFlags = 0, + .MiscFlags = 0 + }; + + // allow texture sharing with other backends + if (this.shareable) + texDesc.MiscFlags |= + D3D11_RESOURCE_MISC_SHARED | + D3D11_RESOURCE_MISC_SHARED_NTHANDLE; + + comRef_defineLocal(ID3D11Texture2D, tex); + status = ID3D11Device_CreateTexture2D( + *this.device, &texDesc, NULL, tex); + + if (FAILED(status)) + { + DEBUG_WINERROR("Failed to create the output texture", status); + goto fail; + } + + comRef_defineLocal(ID3D11RenderTargetView, target); + status = ID3D11Device_CreateRenderTargetView( + *this.device, *(ID3D11Resource **)tex, NULL, target); + + if (FAILED(status)) + { + DEBUG_WINERROR("Failed to create the render target view", status); + goto fail; + } + + *cols = this.size; + *rows = this.size; + *format = CAPTURE_FMT_BGR; + + comRef_toGlobal(inst->tex , tex ); + comRef_toGlobal(inst->target, target ); + + comRef_scopePop(); + return true; + +fail: + comRef_scopePop(); + return false; +} + +static bool rgb24_init(void ** opaque) +{ + RGB24Inst * inst = (RGB24Inst *)calloc(1, sizeof(*inst)); + if (!inst) + { + DEBUG_ERROR("Failed to allocate memory"); + return false; + } + + *opaque = inst; + return true; +} + +static void rgb24_free(void * opaque) +{ + RGB24Inst * inst = (RGB24Inst *)opaque; + comRef_release(inst->target); + comRef_release(inst->tex ); + free(inst); +} + +static ID3D11Texture2D * rgb24_run(void * opaque, + ID3D11ShaderResourceView * srv) +{ + RGB24Inst * inst = (RGB24Inst *)opaque; + + // set the pixel shader & resources + ID3D11DeviceContext_PSSetShader(*this.context, *this.pshader, NULL, 0); + ID3D11DeviceContext_PSSetShaderResources(*this.context, 0, 1, &srv); + + // set the render target + ID3D11DeviceContext_OMSetRenderTargets(*this.context, 1, inst->target, NULL); + + return *inst->tex; +} + +DXGIPostProcess DXGIPP_RGB24 = +{ + .name = "RGB24", + .earlyInit = NULL, + .setup = rgb24_setup, + .init = rgb24_init, + .free = rgb24_free, + .configure = rgb24_configure, + .run = rgb24_run, + .finish = rgb24_finish +}; diff --git a/host/platform/Windows/capture/DXGI/src/pp/sdrwhitelevel.c b/host/platform/Windows/capture/DXGI/src/pp/sdrwhitelevel.c index 067f3352..5bb3c2fd 100644 --- a/host/platform/Windows/capture/DXGI/src/pp/sdrwhitelevel.c +++ b/host/platform/Windows/capture/DXGI/src/pp/sdrwhitelevel.c @@ -32,6 +32,7 @@ typedef struct SDRWhiteLevel ID3D11Device ** device; ID3D11DeviceContext ** context; + bool shareable; ID3D11PixelShader ** pshader; ID3D11SamplerState ** sampler; ID3D11Buffer ** buffer; @@ -40,7 +41,7 @@ typedef struct SDRWhiteLevel float sdrWhiteLevel; } SDRWhiteLevel; -SDRWhiteLevel this = {0}; +static SDRWhiteLevel this = {0}; typedef struct { @@ -60,15 +61,17 @@ static void updateConsts(void); static bool sdrWhiteLevel_setup( ID3D11Device ** device, ID3D11DeviceContext ** context, - IDXGIOutput ** output + IDXGIOutput ** output, + bool shareable ) { bool result = false; comRef_scopePush(); HRESULT status; - this.device = device; - this.context = context; + this.device = device; + this.context = context; + this.shareable = shareable; comRef_defineLocal(IDXGIOutput6, output6); status = IDXGIOutput_QueryInterface( @@ -106,7 +109,7 @@ static bool sdrWhiteLevel_setup( "}\n"; comRef_defineLocal(ID3DBlob, byteCode); - if (!compileShader(byteCode, "main", "ps_5_0", pshaderSrc)) + if (!compileShader(byteCode, "main", "ps_5_0", pshaderSrc, NULL)) goto exit; comRef_defineLocal(ID3D11PixelShader, pshader); @@ -178,11 +181,7 @@ static void sdrWhiteLevel_finish(void) memset(&this, 0, sizeof(this)); } -static bool sdrWhiteLevel_init( - void ** opaque, - int width, - int height, - bool shareable) +static bool sdrWhiteLevel_init(void ** opaque) { SDRWhiteLevelInst * inst = (SDRWhiteLevelInst *)calloc(1, sizeof(*inst)); if (!inst) @@ -191,13 +190,48 @@ static bool sdrWhiteLevel_init( return false; } + *opaque = inst; + return true; +} + +static void sdrWhiteLevel_free(void * opaque) +{ + SDRWhiteLevelInst * inst = (SDRWhiteLevelInst *)opaque; + comRef_release(inst->target); + comRef_release(inst->tex ); + free(inst); +} + +static void updateConsts(void) +{ + float nits = getSDRWhiteLevel(&this.displayPathInfo); + if (nits == this.sdrWhiteLevel) + return; + + this.sdrWhiteLevel = nits; + + struct ShaderConsts consts = { .sdrWhiteLevel = 80.0f / nits }; + ID3D11DeviceContext_UpdateSubresource( + *this.context, *(ID3D11Resource**)this.buffer, + 0, NULL, &consts, 0, 0); +} + +static bool sdrWhiteLevel_configure(void * opaque, + int * width, int * height, + int * cols , int * rows, + CaptureFormat * format) +{ + SDRWhiteLevelInst * inst = (SDRWhiteLevelInst *)opaque; + if (inst->tex) + return true; + comRef_scopePush(); // create the output texture D3D11_TEXTURE2D_DESC texDesc = { - .Width = width, - .Height = height, + .Width = *width, + .Height = *height, .MipLevels = 1, .ArraySize = 1, .SampleDesc.Count = 1, @@ -211,7 +245,7 @@ static bool sdrWhiteLevel_init( }; // allow texture sharing with other backends - if (shareable) + if (this.shareable) texDesc.MiscFlags |= D3D11_RESOURCE_MISC_SHARED | D3D11_RESOURCE_MISC_SHARED_NTHANDLE; @@ -236,41 +270,19 @@ static bool sdrWhiteLevel_init( goto fail; } - *opaque = inst; comRef_toGlobal(inst->tex , tex ); comRef_toGlobal(inst->target, target); comRef_scopePop(); + + *format = CAPTURE_FMT_RGBA10; return true; fail: comRef_scopePop(); - free(inst); return false; } -static void sdrWhiteLevel_free(void * opaque) -{ - SDRWhiteLevelInst * inst = (SDRWhiteLevelInst *)opaque; - comRef_release(inst->target); - comRef_release(inst->tex ); - free(inst); -} - -static void updateConsts(void) -{ - float nits = getSDRWhiteLevel(&this.displayPathInfo); - if (nits == this.sdrWhiteLevel) - return; - - this.sdrWhiteLevel = nits; - - struct ShaderConsts consts = { .sdrWhiteLevel = 80.0f / nits }; - ID3D11DeviceContext_UpdateSubresource( - *this.context, *(ID3D11Resource**)this.buffer, - 0, NULL, &consts, 0, 0); -} - static ID3D11Texture2D * sdrWhiteLevel_run(void * opaque, ID3D11ShaderResourceView * srv) { @@ -299,6 +311,7 @@ DXGIPostProcess DXGIPP_SDRWhiteLevel = .setup = sdrWhiteLevel_setup, .init = sdrWhiteLevel_init, .free = sdrWhiteLevel_free, + .configure = sdrWhiteLevel_configure, .run = sdrWhiteLevel_run, .finish = sdrWhiteLevel_finish }; diff --git a/host/platform/Windows/capture/DXGI/src/util.c b/host/platform/Windows/capture/DXGI/src/util.c index e91fb788..6036ecb6 100644 --- a/host/platform/Windows/capture/DXGI/src/util.c +++ b/host/platform/Windows/capture/DXGI/src/util.c @@ -196,18 +196,18 @@ const char * getDXGIColorSpaceTypeStr(DXGI_COLOR_SPACE_TYPE type) } bool compileShader(ID3DBlob ** dst, const char * entry, const char * target, - const char * code) + const char * code, const D3D_SHADER_MACRO * defines) { ID3DBlob * errors; HRESULT status = D3DCompile( code, strlen(code), NULL, - NULL, + defines, NULL, entry, target, - D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION, + 0,//D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION, 0, dst, &errors); diff --git a/host/platform/Windows/capture/DXGI/src/util.h b/host/platform/Windows/capture/DXGI/src/util.h index 059a0afd..2e23700f 100644 --- a/host/platform/Windows/capture/DXGI/src/util.h +++ b/host/platform/Windows/capture/DXGI/src/util.h @@ -25,7 +25,7 @@ const char * getDXGIFormatStr(DXGI_FORMAT format); const char * getDXGIColorSpaceTypeStr(DXGI_COLOR_SPACE_TYPE type); bool compileShader(ID3DBlob ** dst, const char * entry, const char * target, - const char * code); + const char * code, const D3D_SHADER_MACRO * defines); bool getDisplayPathInfo(HMONITOR monitor, DISPLAYCONFIG_PATH_INFO * info); diff --git a/host/platform/Windows/capture/NVFBC/src/nvfbc.c b/host/platform/Windows/capture/NVFBC/src/nvfbc.c index 006e9db0..65173be4 100644 --- a/host/platform/Windows/capture/NVFBC/src/nvfbc.c +++ b/host/platform/Windows/capture/NVFBC/src/nvfbc.c @@ -75,6 +75,7 @@ struct iface unsigned int maxWidth , maxHeight; unsigned int width , height; + unsigned int frameHeight; bool resChanged, scale; unsigned int targetWidth, targetHeight; @@ -744,12 +745,15 @@ static CaptureResult nvfbc_waitFrame(CaptureFrame * frame, } const unsigned int maxHeight = maxFrameSize / (this->shmStride * 4); + this->frameHeight = min(maxHeight, this->grabHeight); frame->formatVer = this->formatVer; frame->screenWidth = this->width; frame->screenHeight = this->height; + frame->dataWidth = this->width; + frame->dataHeight = this->height; frame->frameWidth = this->grabWidth; - frame->frameHeight = min(maxHeight, this->grabHeight); + frame->frameHeight = this->frameHeight; frame->truncated = maxHeight < this->grabHeight; frame->pitch = this->shmStride * 4; frame->stride = this->shmStride; @@ -764,8 +768,7 @@ static CaptureResult nvfbc_waitFrame(CaptureFrame * frame, return CAPTURE_RESULT_OK; } -static CaptureResult nvfbc_getFrame(FrameBuffer * frame, - const unsigned int height, int frameIndex) +static CaptureResult nvfbc_getFrame(FrameBuffer * frame, int frameIndex) { const unsigned int h = DIFF_MAP_DIM(this->grabHeight, this->diffShift); const unsigned int w = DIFF_MAP_DIM(this->grabWidth, this->diffShift); @@ -779,7 +782,8 @@ static CaptureResult nvfbc_getFrame(FrameBuffer * frame, for (unsigned int y = 0; y < h; ++y) { const unsigned int ystart = y << this->diffShift; - const unsigned int yend = min(height, (y + 1) << this->diffShift); + const unsigned int yend = min(this->frameHeight, (y + 1) + << this->diffShift); for (unsigned int x = 0; x < w; ) { @@ -804,9 +808,9 @@ static CaptureResult nvfbc_getFrame(FrameBuffer * frame, } else if (this->grabStride != this->shmStride) { - for (int y = 0; y < height; y += 64) + for (int y = 0; y < this->frameHeight; y += 64) { - int yend = min(height, y + 128); + int yend = min(this->frameHeight, y + 128); rectCopyUnaligned(frameData, this->frameBuffer, y, yend, 0, this->shmStride * 4, this->grabStride * 4, this->grabWidth * 4); framebuffer_set_write_ptr(frame, yend * this->shmStride * 4); @@ -816,7 +820,7 @@ static CaptureResult nvfbc_getFrame(FrameBuffer * frame, framebuffer_write( frame, this->frameBuffer, - height * this->grabInfo.dwBufferWidth * 4 + this->frameHeight * this->grabInfo.dwBufferWidth * 4 ); for (int i = 0; i < LGMP_Q_FRAME_LEN; ++i) diff --git a/host/src/app.c b/host/src/app.c index cd6f51dc..ff5f8635 100644 --- a/host/src/app.c +++ b/host/src/app.c @@ -250,8 +250,13 @@ static bool sendFrame(void) switch(frame.format) { - case CAPTURE_FMT_BGRA : fi->type = FRAME_TYPE_BGRA ; break; - case CAPTURE_FMT_RGBA : fi->type = FRAME_TYPE_RGBA ; break; + case CAPTURE_FMT_BGRA: + fi->type = FRAME_TYPE_BGRA; + break; + + case CAPTURE_FMT_RGBA: + fi->type = FRAME_TYPE_RGBA; + break; case CAPTURE_FMT_RGBA10: fi->type = FRAME_TYPE_RGBA10; @@ -262,6 +267,10 @@ static bool sendFrame(void) flags |= FRAME_FLAG_HDR; break; + case CAPTURE_FMT_BGR: + fi->type = FRAME_TYPE_BGR; + break; + default: DEBUG_ERROR("Unsupported frame format %d, skipping frame", frame.format); return true; @@ -292,6 +301,8 @@ static bool sendFrame(void) fi->frameSerial = app.frameSerial++; fi->screenWidth = frame.screenWidth; fi->screenHeight = frame.screenHeight; + fi->dataWidth = frame.dataWidth; + fi->dataHeight = frame.dataHeight; fi->frameWidth = frame.frameWidth; fi->frameHeight = frame.frameHeight; fi->stride = frame.stride; @@ -317,7 +328,7 @@ static bool sendFrame(void) return true; } - app.iface->getFrame(fb, frame.frameHeight, app.frameIndex); + app.iface->getFrame(fb, app.frameIndex); return true; }