[host] dxgi: add support for downsampling the capture before sending

This is an experimental & incomplete feature for those using
supersampling. Anything > 1200p will be downsampled by 50% before
copying out of the GPU to save on memory bandwidth.

Unfinished! Has issues with damage tracking and currently can not
be configured. Only dx11 has been tested at this point, everything
else will likely have problems/crash.
This commit is contained in:
Geoffrey McRae 2022-05-01 19:45:44 +10:00
parent 132d0e3c42
commit 3134ec84de
6 changed files with 195 additions and 49 deletions

View File

@ -28,7 +28,7 @@
#include "types.h"
#define KVMFR_MAGIC "KVMFR---"
#define KVMFR_VERSION 18
#define KVMFR_VERSION 19
#define KVMFR_MAX_DAMAGE_RECTS 64
@ -127,7 +127,8 @@ KVMFRCursor;
enum
{
FRAME_FLAG_BLOCK_SCREENSAVER = 0x1,
FRAME_FLAG_REQUEST_ACTIVATION = 0x2
FRAME_FLAG_REQUEST_ACTIVATION = 0x2,
FRAME_FLAG_TRUNCATED = 0x4 // ivshmem was too small for the frame
};
typedef uint32_t KVMFRFrameFlags;
@ -137,9 +138,10 @@ typedef struct KVMFRFrame
uint32_t formatVer; // the frame format version number
uint32_t frameSerial; // the unique frame number
FrameType type; // the frame data type
uint32_t width; // the frame width
uint32_t height; // the frame height
uint32_t realHeight; // the real height if the frame was truncated due to low mem
uint32_t screenWidth; // the client's screen width
uint32_t screenHeight; // the client's screen height
uint32_t frameWidth; // the frame width
uint32_t frameHeight; // the frame height
FrameRotation rotation; // the frame rotation
uint32_t stride; // the row stride (zero if compressed data)
uint32_t pitch; // the row pitch (stride in bytes or the compressed frame size)

View File

@ -63,9 +63,11 @@ CaptureRotation;
typedef struct CaptureFrame
{
unsigned int formatVer;
unsigned int width;
unsigned int height;
unsigned int realHeight;
unsigned int screenWidth;
unsigned int screenHeight;
unsigned int frameWidth;
unsigned int frameHeight;
bool truncated;
unsigned int pitch;
unsigned int stride;
CaptureFormat format;

View File

@ -32,6 +32,15 @@ struct D3D11Backend
uint64_t usleepMapTime;
};
struct D3D11TexImpl
{
ID3D11Texture2D * gpu;
ID3D11Texture2D * cpu;
ID3D11ShaderResourceView * srv;
};
#define TEXIMPL(x) ((struct D3D11TexImpl *)(x).impl)
static struct DXGIInterface * dxgi = NULL;
static struct D3D11Backend * this = NULL;
@ -52,33 +61,88 @@ static bool d3d11_create(struct DXGIInterface * intf)
this->avgMapTime = runningavg_new(10);
D3D11_TEXTURE2D_DESC texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.Width = dxgi->width;
texDesc.Height = dxgi->height;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
texDesc.Usage = D3D11_USAGE_STAGING;
texDesc.Format = dxgi->dxgiFormat;
texDesc.BindFlags = 0;
texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
texDesc.MiscFlags = 0;
const bool downsample =
dxgi->targetWidth == dxgi->width / 2 &&
dxgi->targetHeight == dxgi->height / 2;
if (!downsample)
{
dxgi->targetWidth = dxgi->width;
dxgi->targetHeight = dxgi->height;
}
D3D11_TEXTURE2D_DESC gpuTexDesc =
{
.Width = dxgi->width,
.Height = dxgi->height,
.MipLevels = 2,
.ArraySize = 1,
.SampleDesc.Count = 1,
.SampleDesc.Quality = 0,
.Usage = D3D11_USAGE_DEFAULT,
.Format = dxgi->dxgiFormat,
.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE,
.CPUAccessFlags = 0,
.MiscFlags = D3D11_RESOURCE_MISC_GENERATE_MIPS
};
D3D11_TEXTURE2D_DESC cpuTexDesc =
{
.Width = dxgi->targetWidth,
.Height = dxgi->targetHeight,
.MipLevels = 1,
.ArraySize = 1,
.SampleDesc.Count = 1,
.SampleDesc.Quality = 0,
.Usage = D3D11_USAGE_STAGING,
.Format = dxgi->dxgiFormat,
.BindFlags = 0,
.CPUAccessFlags = D3D11_CPU_ACCESS_READ,
.MiscFlags = 0
};
for (int i = 0; i < dxgi->maxTextures; ++i)
{
status = ID3D11Device_CreateTexture2D(dxgi->device, &texDesc, NULL, (ID3D11Texture2D **)&dxgi->texture[i].impl);
if (FAILED(status))
if (!(dxgi->texture[i].impl =
(struct D3D11TexImpl *)calloc(sizeof(struct D3D11TexImpl), 1)))
{
DEBUG_WINERROR("Failed to create texture", status);
DEBUG_ERROR("Failed to allocate D3D11TexImpl struct");
goto fail;
}
struct D3D11TexImpl * teximpl = TEXIMPL(dxgi->texture[i]);
status = ID3D11Device_CreateTexture2D(dxgi->device, &cpuTexDesc, NULL,
&teximpl->cpu);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to create CPU texture", status);
goto fail;
}
if (!downsample)
continue;
status = ID3D11Device_CreateTexture2D(dxgi->device, &gpuTexDesc, NULL,
&teximpl->gpu);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to create GPU texture", status);
goto fail;
}
ID3D11Device_CreateShaderResourceView(dxgi->device,
(ID3D11Resource *)teximpl->gpu, NULL, &teximpl->srv);
}
// map the texture simply to get the pitch and stride
D3D11_MAPPED_SUBRESOURCE mapping;
status = ID3D11DeviceContext_Map(dxgi->deviceContext, (ID3D11Resource *)dxgi->texture[0].impl, 0, D3D11_MAP_READ, 0, &mapping);
status = ID3D11DeviceContext_Map(dxgi->deviceContext,
(ID3D11Resource *)TEXIMPL(dxgi->texture[0])->cpu, 0,
D3D11_MAP_READ, 0, &mapping);
if (FAILED(status))
{
DEBUG_WINERROR("Failed to map the texture", status);
@ -87,7 +151,10 @@ static bool d3d11_create(struct DXGIInterface * intf)
dxgi->pitch = mapping.RowPitch;
dxgi->stride = mapping.RowPitch / dxgi->bpp;
ID3D11DeviceContext_Unmap(dxgi->deviceContext, (ID3D11Resource *)dxgi->texture[0].impl, 0);
ID3D11DeviceContext_Unmap(dxgi->deviceContext,
(ID3D11Resource *)TEXIMPL(dxgi->texture[0])->cpu, 0);
return true;
fail:
@ -100,8 +167,22 @@ static void d3d11_free(void)
DEBUG_ASSERT(this);
for (int i = 0; i < dxgi->maxTextures; ++i)
if (dxgi->texture[i].impl)
ID3D11Texture2D_Release((ID3D11Texture2D *) dxgi->texture[i].impl);
{
struct D3D11TexImpl * teximpl = TEXIMPL(dxgi->texture[i]);
if (!teximpl)
continue;
if (teximpl->cpu)
ID3D11Texture2D_Release(teximpl->cpu);
if (teximpl->gpu)
ID3D11Texture2D_Release(teximpl->gpu);
if (teximpl->srv)
ID3D11ShaderResourceView_Release(teximpl->srv);
free(teximpl);
}
runningavg_free(&this->avgMapTime);
free(this);
@ -110,28 +191,66 @@ static void d3d11_free(void)
static bool d3d11_copyFrame(Texture * tex, ID3D11Texture2D * src)
{
struct D3D11TexImpl * teximpl = TEXIMPL(*tex);
ID3D11Texture2D * dst = teximpl->gpu ? teximpl->gpu : teximpl->cpu;
INTERLOCKED_SECTION(dxgi->deviceContextLock,
{
tex->copyTime = microtime();
if (tex->texDamageCount < 0)
ID3D11DeviceContext_CopyResource(dxgi->deviceContext,
(ID3D11Resource *)tex->impl, (ID3D11Resource *)src);
(ID3D11Resource *)dst, (ID3D11Resource *)src);
else
{
for (int i = 0; i < tex->texDamageCount; ++i)
{
FrameDamageRect * rect = tex->texDamageRects + i;
D3D11_BOX box = {
.left = rect->x, .top = rect->y, .front = 0, .back = 1,
.right = rect->x + rect->width, .bottom = rect->y + rect->height,
D3D11_BOX box =
{
.left = rect->x,
.top = rect->y,
.front = 0,
.back = 1,
.right = rect->x + rect->width,
.bottom = rect->y + rect->height,
};
ID3D11DeviceContext_CopySubresourceRegion(dxgi->deviceContext,
(ID3D11Resource *)tex->impl, 0, rect->x, rect->y, 0,
(ID3D11Resource *)dst, 0, rect->x, rect->y, 0,
(ID3D11Resource *)src, 0, &box);
}
}
if (teximpl->gpu)
{
ID3D11DeviceContext_GenerateMips(dxgi->deviceContext, teximpl->srv);
if (tex->texDamageCount < 0)
ID3D11DeviceContext_CopySubresourceRegion(dxgi->deviceContext,
(ID3D11Resource *)teximpl->cpu, 0, 0, 0, 0,
(ID3D11Resource *)dst , 1, NULL);
else
{
for (int i = 0; i < tex->texDamageCount; ++i)
{
FrameDamageRect * rect = tex->texDamageRects + i;
D3D11_BOX box =
{
.left = rect->x / 2,
.top = rect->y / 2,
.front = 0,
.back = 1,
.right = (rect->x + rect->width ) / 2,
.bottom = (rect->y + rect->height) / 2,
};
ID3D11DeviceContext_CopySubresourceRegion(dxgi->deviceContext,
(ID3D11Resource *)teximpl->cpu, 0, rect->x / 2, rect->y / 2, 0,
(ID3D11Resource *)dst , 1, &box);
}
}
}
ID3D11DeviceContext_Flush(dxgi->deviceContext);
});
return true;
@ -139,6 +258,7 @@ static bool d3d11_copyFrame(Texture * tex, ID3D11Texture2D * src)
static CaptureResult d3d11_mapTexture(Texture * tex)
{
struct D3D11TexImpl * teximpl = TEXIMPL(*tex);
D3D11_MAPPED_SUBRESOURCE map;
// sleep until it's close to time to map
@ -152,8 +272,8 @@ static CaptureResult d3d11_mapTexture(Texture * tex)
HRESULT status;
INTERLOCKED_SECTION(dxgi->deviceContextLock, {
status = ID3D11DeviceContext_Map(dxgi->deviceContext, (ID3D11Resource *) tex->impl,
0, D3D11_MAP_READ, 0x100000L, &map);
status = ID3D11DeviceContext_Map(dxgi->deviceContext,
(ID3D11Resource *)teximpl->cpu, 0, D3D11_MAP_READ, 0x100000L, &map);
});
if (status == DXGI_ERROR_WAS_STILL_DRAWING)
{
@ -183,8 +303,11 @@ static CaptureResult d3d11_mapTexture(Texture * tex)
static void d3d11_unmapTexture(Texture * tex)
{
struct D3D11TexImpl * teximpl = TEXIMPL(*tex);
INTERLOCKED_SECTION(dxgi->deviceContextLock, {
ID3D11DeviceContext_Unmap(dxgi->deviceContext, (ID3D11Resource *) tex->impl, 0);
ID3D11DeviceContext_Unmap(dxgi->deviceContext,
(ID3D11Resource *)teximpl->cpu, 0);
});
tex->map = NULL;
}

View File

@ -390,6 +390,16 @@ static bool dxgi_init(void)
break;
}
this->targetWidth = this->width;
this->targetHeight = this->height;
//TODO: add logic here
if (this->width > 1920 && this->height > 1200)
{
this->targetWidth /= 2;
this->targetHeight /= 2;
}
switch(outputDesc.Rotation)
{
case DXGI_MODE_ROTATION_ROTATE90:
@ -413,6 +423,7 @@ static bool dxgi_init(void)
DEBUG_INFO("Feature Level : 0x%x" , this->featureLevel);
DEBUG_INFO("Capture Size : %u x %u", this->width, this->height);
DEBUG_INFO("Target Size : %u x %u", this->targetWidth, this->targetHeight);
DEBUG_INFO("AcquireLock : %s" , this->useAcquireLock ? "enabled" : "disabled");
DEBUG_INFO("Debug mode : %s" , this->debug ? "enabled" : "disabled");
@ -1016,9 +1027,11 @@ static CaptureResult dxgi_waitFrame(CaptureFrame * frame, const size_t maxFrameS
const unsigned int maxHeight = maxFrameSize / this->pitch;
frame->formatVer = tex->formatVer;
frame->width = this->width;
frame->height = maxHeight > this->height ? this->height : maxHeight;
frame->realHeight = this->height;
frame->screenWidth = this->width;
frame->screenHeight = this->height;
frame->frameWidth = this->targetWidth;
frame->frameHeight = min(maxHeight, this->targetHeight);
frame->truncated = maxHeight < this->targetHeight;
frame->pitch = this->pitch;
frame->stride = this->stride;
frame->format = this->format;

View File

@ -93,8 +93,8 @@ struct DXGIInterface
LGEvent * frameEvent;
unsigned int formatVer;
unsigned int width;
unsigned int height;
unsigned int width , targetWidth ;
unsigned int height, targetHeight;
unsigned int pitch;
unsigned int stride;
unsigned int bpp;

View File

@ -269,9 +269,10 @@ static bool sendFrame(void)
fi->formatVer = frame.formatVer;
fi->frameSerial = app.frameSerial++;
fi->width = frame.width;
fi->height = frame.height;
fi->realHeight = frame.realHeight;
fi->screenWidth = frame.screenWidth;
fi->screenHeight = frame.screenHeight;
fi->frameWidth = frame.frameWidth;
fi->frameHeight = frame.frameHeight;
fi->stride = frame.stride;
fi->pitch = frame.pitch;
fi->offset = app.pageSize - FrameBufferStructSize;
@ -279,11 +280,15 @@ static bool sendFrame(void)
(os_blockScreensaver() ?
FRAME_FLAG_BLOCK_SCREENSAVER : 0) |
(os_getAndClearPendingActivationRequest() ?
FRAME_FLAG_REQUEST_ACTIVATION : 0);
app.frameValid = true;
FRAME_FLAG_REQUEST_ACTIVATION : 0) |
(frame.truncated ?
FRAME_FLAG_TRUNCATED : 0);
fi->damageRectsCount = frame.damageRectsCount;
memcpy(fi->damageRects, frame.damageRects, frame.damageRectsCount * sizeof(FrameDamageRect));
memcpy(fi->damageRects, frame.damageRects,
frame.damageRectsCount * sizeof(FrameDamageRect));
app.frameValid = true;
// put the framebuffer on the border of the next page
// this is to allow for aligned DMA transfers by the receiver
@ -291,13 +296,14 @@ static bool sendFrame(void)
framebuffer_prepare(fb);
/* we post and then get the frame, this is intentional! */
if ((status = lgmpHostQueuePost(app.frameQueue, 0, app.frameMemory[app.frameIndex])) != LGMP_OK)
if ((status = lgmpHostQueuePost(app.frameQueue, 0,
app.frameMemory[app.frameIndex])) != LGMP_OK)
{
DEBUG_ERROR("%s", lgmpStatusString(status));
return true;
}
app.iface->getFrame(fb, frame.height, app.frameIndex);
app.iface->getFrame(fb, frame.frameHeight, app.frameIndex);
return true;
}