[host] dxgi: add support for downsampling the capture before sending

This is an experimental & incomplete feature for those using
supersampling. Anything > 1200p will be downsampled by 50% before
copying out of the GPU to save on memory bandwidth.

Unfinished! Has issues with damage tracking and currently can not
be configured. Only dx11 has been tested at this point, everything
else will likely have problems/crash.
This commit is contained in:
Geoffrey McRae 2022-05-01 19:45:44 +10:00
parent 132d0e3c42
commit 3134ec84de
6 changed files with 195 additions and 49 deletions

View File

@ -28,7 +28,7 @@
#include "types.h" #include "types.h"
#define KVMFR_MAGIC "KVMFR---" #define KVMFR_MAGIC "KVMFR---"
#define KVMFR_VERSION 18 #define KVMFR_VERSION 19
#define KVMFR_MAX_DAMAGE_RECTS 64 #define KVMFR_MAX_DAMAGE_RECTS 64
@ -127,7 +127,8 @@ KVMFRCursor;
enum enum
{ {
FRAME_FLAG_BLOCK_SCREENSAVER = 0x1, FRAME_FLAG_BLOCK_SCREENSAVER = 0x1,
FRAME_FLAG_REQUEST_ACTIVATION = 0x2 FRAME_FLAG_REQUEST_ACTIVATION = 0x2,
FRAME_FLAG_TRUNCATED = 0x4 // ivshmem was too small for the frame
}; };
typedef uint32_t KVMFRFrameFlags; typedef uint32_t KVMFRFrameFlags;
@ -137,9 +138,10 @@ typedef struct KVMFRFrame
uint32_t formatVer; // the frame format version number uint32_t formatVer; // the frame format version number
uint32_t frameSerial; // the unique frame number uint32_t frameSerial; // the unique frame number
FrameType type; // the frame data type FrameType type; // the frame data type
uint32_t width; // the frame width uint32_t screenWidth; // the client's screen width
uint32_t height; // the frame height uint32_t screenHeight; // the client's screen height
uint32_t realHeight; // the real height if the frame was truncated due to low mem uint32_t frameWidth; // the frame width
uint32_t frameHeight; // the frame height
FrameRotation rotation; // the frame rotation FrameRotation rotation; // the frame rotation
uint32_t stride; // the row stride (zero if compressed data) uint32_t stride; // the row stride (zero if compressed data)
uint32_t pitch; // the row pitch (stride in bytes or the compressed frame size) uint32_t pitch; // the row pitch (stride in bytes or the compressed frame size)

View File

@ -63,9 +63,11 @@ CaptureRotation;
typedef struct CaptureFrame typedef struct CaptureFrame
{ {
unsigned int formatVer; unsigned int formatVer;
unsigned int width; unsigned int screenWidth;
unsigned int height; unsigned int screenHeight;
unsigned int realHeight; unsigned int frameWidth;
unsigned int frameHeight;
bool truncated;
unsigned int pitch; unsigned int pitch;
unsigned int stride; unsigned int stride;
CaptureFormat format; CaptureFormat format;

View File

@ -32,6 +32,15 @@ struct D3D11Backend
uint64_t usleepMapTime; uint64_t usleepMapTime;
}; };
struct D3D11TexImpl
{
ID3D11Texture2D * gpu;
ID3D11Texture2D * cpu;
ID3D11ShaderResourceView * srv;
};
#define TEXIMPL(x) ((struct D3D11TexImpl *)(x).impl)
static struct DXGIInterface * dxgi = NULL; static struct DXGIInterface * dxgi = NULL;
static struct D3D11Backend * this = NULL; static struct D3D11Backend * this = NULL;
@ -52,33 +61,88 @@ static bool d3d11_create(struct DXGIInterface * intf)
this->avgMapTime = runningavg_new(10); this->avgMapTime = runningavg_new(10);
D3D11_TEXTURE2D_DESC texDesc; const bool downsample =
memset(&texDesc, 0, sizeof(texDesc)); dxgi->targetWidth == dxgi->width / 2 &&
texDesc.Width = dxgi->width; dxgi->targetHeight == dxgi->height / 2;
texDesc.Height = dxgi->height;
texDesc.MipLevels = 1; if (!downsample)
texDesc.ArraySize = 1; {
texDesc.SampleDesc.Count = 1; dxgi->targetWidth = dxgi->width;
texDesc.SampleDesc.Quality = 0; dxgi->targetHeight = dxgi->height;
texDesc.Usage = D3D11_USAGE_STAGING; }
texDesc.Format = dxgi->dxgiFormat;
texDesc.BindFlags = 0; D3D11_TEXTURE2D_DESC gpuTexDesc =
texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; {
texDesc.MiscFlags = 0; .Width = dxgi->width,
.Height = dxgi->height,
.MipLevels = 2,
.ArraySize = 1,
.SampleDesc.Count = 1,
.SampleDesc.Quality = 0,
.Usage = D3D11_USAGE_DEFAULT,
.Format = dxgi->dxgiFormat,
.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE,
.CPUAccessFlags = 0,
.MiscFlags = D3D11_RESOURCE_MISC_GENERATE_MIPS
};
D3D11_TEXTURE2D_DESC cpuTexDesc =
{
.Width = dxgi->targetWidth,
.Height = dxgi->targetHeight,
.MipLevels = 1,
.ArraySize = 1,
.SampleDesc.Count = 1,
.SampleDesc.Quality = 0,
.Usage = D3D11_USAGE_STAGING,
.Format = dxgi->dxgiFormat,
.BindFlags = 0,
.CPUAccessFlags = D3D11_CPU_ACCESS_READ,
.MiscFlags = 0
};
for (int i = 0; i < dxgi->maxTextures; ++i) for (int i = 0; i < dxgi->maxTextures; ++i)
{ {
status = ID3D11Device_CreateTexture2D(dxgi->device, &texDesc, NULL, (ID3D11Texture2D **)&dxgi->texture[i].impl); if (!(dxgi->texture[i].impl =
if (FAILED(status)) (struct D3D11TexImpl *)calloc(sizeof(struct D3D11TexImpl), 1)))
{ {
DEBUG_WINERROR("Failed to create texture", status); DEBUG_ERROR("Failed to allocate D3D11TexImpl struct");
goto fail; goto fail;
} }
struct D3D11TexImpl * teximpl = TEXIMPL(dxgi->texture[i]);
status = ID3D11Device_CreateTexture2D(dxgi->device, &cpuTexDesc, NULL,
&teximpl->cpu);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to create CPU texture", status);
goto fail;
}
if (!downsample)
continue;
status = ID3D11Device_CreateTexture2D(dxgi->device, &gpuTexDesc, NULL,
&teximpl->gpu);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to create GPU texture", status);
goto fail;
}
ID3D11Device_CreateShaderResourceView(dxgi->device,
(ID3D11Resource *)teximpl->gpu, NULL, &teximpl->srv);
} }
// map the texture simply to get the pitch and stride // map the texture simply to get the pitch and stride
D3D11_MAPPED_SUBRESOURCE mapping; D3D11_MAPPED_SUBRESOURCE mapping;
status = ID3D11DeviceContext_Map(dxgi->deviceContext, (ID3D11Resource *)dxgi->texture[0].impl, 0, D3D11_MAP_READ, 0, &mapping); status = ID3D11DeviceContext_Map(dxgi->deviceContext,
(ID3D11Resource *)TEXIMPL(dxgi->texture[0])->cpu, 0,
D3D11_MAP_READ, 0, &mapping);
if (FAILED(status)) if (FAILED(status))
{ {
DEBUG_WINERROR("Failed to map the texture", status); DEBUG_WINERROR("Failed to map the texture", status);
@ -87,7 +151,10 @@ static bool d3d11_create(struct DXGIInterface * intf)
dxgi->pitch = mapping.RowPitch; dxgi->pitch = mapping.RowPitch;
dxgi->stride = mapping.RowPitch / dxgi->bpp; dxgi->stride = mapping.RowPitch / dxgi->bpp;
ID3D11DeviceContext_Unmap(dxgi->deviceContext, (ID3D11Resource *)dxgi->texture[0].impl, 0);
ID3D11DeviceContext_Unmap(dxgi->deviceContext,
(ID3D11Resource *)TEXIMPL(dxgi->texture[0])->cpu, 0);
return true; return true;
fail: fail:
@ -100,8 +167,22 @@ static void d3d11_free(void)
DEBUG_ASSERT(this); DEBUG_ASSERT(this);
for (int i = 0; i < dxgi->maxTextures; ++i) for (int i = 0; i < dxgi->maxTextures; ++i)
if (dxgi->texture[i].impl) {
ID3D11Texture2D_Release((ID3D11Texture2D *) dxgi->texture[i].impl); struct D3D11TexImpl * teximpl = TEXIMPL(dxgi->texture[i]);
if (!teximpl)
continue;
if (teximpl->cpu)
ID3D11Texture2D_Release(teximpl->cpu);
if (teximpl->gpu)
ID3D11Texture2D_Release(teximpl->gpu);
if (teximpl->srv)
ID3D11ShaderResourceView_Release(teximpl->srv);
free(teximpl);
}
runningavg_free(&this->avgMapTime); runningavg_free(&this->avgMapTime);
free(this); free(this);
@ -110,28 +191,66 @@ static void d3d11_free(void)
static bool d3d11_copyFrame(Texture * tex, ID3D11Texture2D * src) static bool d3d11_copyFrame(Texture * tex, ID3D11Texture2D * src)
{ {
struct D3D11TexImpl * teximpl = TEXIMPL(*tex);
ID3D11Texture2D * dst = teximpl->gpu ? teximpl->gpu : teximpl->cpu;
INTERLOCKED_SECTION(dxgi->deviceContextLock, INTERLOCKED_SECTION(dxgi->deviceContextLock,
{ {
tex->copyTime = microtime(); tex->copyTime = microtime();
if (tex->texDamageCount < 0) if (tex->texDamageCount < 0)
ID3D11DeviceContext_CopyResource(dxgi->deviceContext, ID3D11DeviceContext_CopyResource(dxgi->deviceContext,
(ID3D11Resource *)tex->impl, (ID3D11Resource *)src); (ID3D11Resource *)dst, (ID3D11Resource *)src);
else else
{ {
for (int i = 0; i < tex->texDamageCount; ++i) for (int i = 0; i < tex->texDamageCount; ++i)
{ {
FrameDamageRect * rect = tex->texDamageRects + i; FrameDamageRect * rect = tex->texDamageRects + i;
D3D11_BOX box = { D3D11_BOX box =
.left = rect->x, .top = rect->y, .front = 0, .back = 1, {
.right = rect->x + rect->width, .bottom = rect->y + rect->height, .left = rect->x,
.top = rect->y,
.front = 0,
.back = 1,
.right = rect->x + rect->width,
.bottom = rect->y + rect->height,
}; };
ID3D11DeviceContext_CopySubresourceRegion(dxgi->deviceContext, ID3D11DeviceContext_CopySubresourceRegion(dxgi->deviceContext,
(ID3D11Resource *)tex->impl, 0, rect->x, rect->y, 0, (ID3D11Resource *)dst, 0, rect->x, rect->y, 0,
(ID3D11Resource *)src, 0, &box); (ID3D11Resource *)src, 0, &box);
} }
} }
if (teximpl->gpu)
{
ID3D11DeviceContext_GenerateMips(dxgi->deviceContext, teximpl->srv);
if (tex->texDamageCount < 0)
ID3D11DeviceContext_CopySubresourceRegion(dxgi->deviceContext,
(ID3D11Resource *)teximpl->cpu, 0, 0, 0, 0,
(ID3D11Resource *)dst , 1, NULL);
else
{
for (int i = 0; i < tex->texDamageCount; ++i)
{
FrameDamageRect * rect = tex->texDamageRects + i;
D3D11_BOX box =
{
.left = rect->x / 2,
.top = rect->y / 2,
.front = 0,
.back = 1,
.right = (rect->x + rect->width ) / 2,
.bottom = (rect->y + rect->height) / 2,
};
ID3D11DeviceContext_CopySubresourceRegion(dxgi->deviceContext,
(ID3D11Resource *)teximpl->cpu, 0, rect->x / 2, rect->y / 2, 0,
(ID3D11Resource *)dst , 1, &box);
}
}
}
ID3D11DeviceContext_Flush(dxgi->deviceContext); ID3D11DeviceContext_Flush(dxgi->deviceContext);
}); });
return true; return true;
@ -139,6 +258,7 @@ static bool d3d11_copyFrame(Texture * tex, ID3D11Texture2D * src)
static CaptureResult d3d11_mapTexture(Texture * tex) static CaptureResult d3d11_mapTexture(Texture * tex)
{ {
struct D3D11TexImpl * teximpl = TEXIMPL(*tex);
D3D11_MAPPED_SUBRESOURCE map; D3D11_MAPPED_SUBRESOURCE map;
// sleep until it's close to time to map // sleep until it's close to time to map
@ -152,8 +272,8 @@ static CaptureResult d3d11_mapTexture(Texture * tex)
HRESULT status; HRESULT status;
INTERLOCKED_SECTION(dxgi->deviceContextLock, { INTERLOCKED_SECTION(dxgi->deviceContextLock, {
status = ID3D11DeviceContext_Map(dxgi->deviceContext, (ID3D11Resource *) tex->impl, status = ID3D11DeviceContext_Map(dxgi->deviceContext,
0, D3D11_MAP_READ, 0x100000L, &map); (ID3D11Resource *)teximpl->cpu, 0, D3D11_MAP_READ, 0x100000L, &map);
}); });
if (status == DXGI_ERROR_WAS_STILL_DRAWING) if (status == DXGI_ERROR_WAS_STILL_DRAWING)
{ {
@ -183,8 +303,11 @@ static CaptureResult d3d11_mapTexture(Texture * tex)
static void d3d11_unmapTexture(Texture * tex) static void d3d11_unmapTexture(Texture * tex)
{ {
struct D3D11TexImpl * teximpl = TEXIMPL(*tex);
INTERLOCKED_SECTION(dxgi->deviceContextLock, { INTERLOCKED_SECTION(dxgi->deviceContextLock, {
ID3D11DeviceContext_Unmap(dxgi->deviceContext, (ID3D11Resource *) tex->impl, 0); ID3D11DeviceContext_Unmap(dxgi->deviceContext,
(ID3D11Resource *)teximpl->cpu, 0);
}); });
tex->map = NULL; tex->map = NULL;
} }

View File

@ -390,6 +390,16 @@ static bool dxgi_init(void)
break; break;
} }
this->targetWidth = this->width;
this->targetHeight = this->height;
//TODO: add logic here
if (this->width > 1920 && this->height > 1200)
{
this->targetWidth /= 2;
this->targetHeight /= 2;
}
switch(outputDesc.Rotation) switch(outputDesc.Rotation)
{ {
case DXGI_MODE_ROTATION_ROTATE90: case DXGI_MODE_ROTATION_ROTATE90:
@ -413,6 +423,7 @@ static bool dxgi_init(void)
DEBUG_INFO("Feature Level : 0x%x" , this->featureLevel); DEBUG_INFO("Feature Level : 0x%x" , this->featureLevel);
DEBUG_INFO("Capture Size : %u x %u", this->width, this->height); DEBUG_INFO("Capture Size : %u x %u", this->width, this->height);
DEBUG_INFO("Target Size : %u x %u", this->targetWidth, this->targetHeight);
DEBUG_INFO("AcquireLock : %s" , this->useAcquireLock ? "enabled" : "disabled"); DEBUG_INFO("AcquireLock : %s" , this->useAcquireLock ? "enabled" : "disabled");
DEBUG_INFO("Debug mode : %s" , this->debug ? "enabled" : "disabled"); DEBUG_INFO("Debug mode : %s" , this->debug ? "enabled" : "disabled");
@ -1016,9 +1027,11 @@ static CaptureResult dxgi_waitFrame(CaptureFrame * frame, const size_t maxFrameS
const unsigned int maxHeight = maxFrameSize / this->pitch; const unsigned int maxHeight = maxFrameSize / this->pitch;
frame->formatVer = tex->formatVer; frame->formatVer = tex->formatVer;
frame->width = this->width; frame->screenWidth = this->width;
frame->height = maxHeight > this->height ? this->height : maxHeight; frame->screenHeight = this->height;
frame->realHeight = this->height; frame->frameWidth = this->targetWidth;
frame->frameHeight = min(maxHeight, this->targetHeight);
frame->truncated = maxHeight < this->targetHeight;
frame->pitch = this->pitch; frame->pitch = this->pitch;
frame->stride = this->stride; frame->stride = this->stride;
frame->format = this->format; frame->format = this->format;

View File

@ -93,8 +93,8 @@ struct DXGIInterface
LGEvent * frameEvent; LGEvent * frameEvent;
unsigned int formatVer; unsigned int formatVer;
unsigned int width; unsigned int width , targetWidth ;
unsigned int height; unsigned int height, targetHeight;
unsigned int pitch; unsigned int pitch;
unsigned int stride; unsigned int stride;
unsigned int bpp; unsigned int bpp;

View File

@ -269,9 +269,10 @@ static bool sendFrame(void)
fi->formatVer = frame.formatVer; fi->formatVer = frame.formatVer;
fi->frameSerial = app.frameSerial++; fi->frameSerial = app.frameSerial++;
fi->width = frame.width; fi->screenWidth = frame.screenWidth;
fi->height = frame.height; fi->screenHeight = frame.screenHeight;
fi->realHeight = frame.realHeight; fi->frameWidth = frame.frameWidth;
fi->frameHeight = frame.frameHeight;
fi->stride = frame.stride; fi->stride = frame.stride;
fi->pitch = frame.pitch; fi->pitch = frame.pitch;
fi->offset = app.pageSize - FrameBufferStructSize; fi->offset = app.pageSize - FrameBufferStructSize;
@ -279,11 +280,15 @@ static bool sendFrame(void)
(os_blockScreensaver() ? (os_blockScreensaver() ?
FRAME_FLAG_BLOCK_SCREENSAVER : 0) | FRAME_FLAG_BLOCK_SCREENSAVER : 0) |
(os_getAndClearPendingActivationRequest() ? (os_getAndClearPendingActivationRequest() ?
FRAME_FLAG_REQUEST_ACTIVATION : 0); FRAME_FLAG_REQUEST_ACTIVATION : 0) |
app.frameValid = true; (frame.truncated ?
FRAME_FLAG_TRUNCATED : 0);
fi->damageRectsCount = frame.damageRectsCount; fi->damageRectsCount = frame.damageRectsCount;
memcpy(fi->damageRects, frame.damageRects, frame.damageRectsCount * sizeof(FrameDamageRect)); memcpy(fi->damageRects, frame.damageRects,
frame.damageRectsCount * sizeof(FrameDamageRect));
app.frameValid = true;
// put the framebuffer on the border of the next page // put the framebuffer on the border of the next page
// this is to allow for aligned DMA transfers by the receiver // this is to allow for aligned DMA transfers by the receiver
@ -291,13 +296,14 @@ static bool sendFrame(void)
framebuffer_prepare(fb); framebuffer_prepare(fb);
/* we post and then get the frame, this is intentional! */ /* we post and then get the frame, this is intentional! */
if ((status = lgmpHostQueuePost(app.frameQueue, 0, app.frameMemory[app.frameIndex])) != LGMP_OK) if ((status = lgmpHostQueuePost(app.frameQueue, 0,
app.frameMemory[app.frameIndex])) != LGMP_OK)
{ {
DEBUG_ERROR("%s", lgmpStatusString(status)); DEBUG_ERROR("%s", lgmpStatusString(status));
return true; return true;
} }
app.iface->getFrame(fb, frame.height, app.frameIndex); app.iface->getFrame(fb, frame.frameHeight, app.frameIndex);
return true; return true;
} }