LookingGlass/common/src/framebuffer.c

176 lines
4.3 KiB
C

/*
KVMGFX Client - A KVM Client for VGA Passthrough
Copyright (C) 2017-2019 Geoffrey McRae <geoff@hostfission.com>
https://looking-glass.hostfission.com
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "common/framebuffer.h"
#include "common/debug.h"
#include <string.h>
#include <stdatomic.h>
#include <emmintrin.h>
#include <smmintrin.h>
#define FB_CHUNK_SIZE 1048576
struct stFrameBuffer
{
atomic_uint_least32_t wp;
uint8_t data[0];
};
const size_t FrameBufferStructSize = sizeof(FrameBuffer);
void framebuffer_wait(const FrameBuffer * frame, size_t size)
{
while(atomic_load_explicit(&frame->wp, memory_order_acquire) != size) {}
}
bool framebuffer_read(const FrameBuffer * frame, void * dst, size_t dstpitch,
size_t height, size_t width, size_t bpp, size_t pitch)
{
uint8_t *d = (uint8_t*)dst;
uint_least32_t rp = 0;
size_t y = 0;
const size_t linewidth = width * bpp;
const size_t blocks = linewidth / 16;
const size_t left = linewidth % 16;
while(y < height)
{
uint_least32_t wp;
/* spinlock */
do
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
while(wp - rp < pitch);
__m128i * s = (__m128i *)(frame->data + rp);
for(int i = 0; i < blocks; ++i, ++s, d += 16)
_mm_stream_si128((__m128i *)d, _mm_stream_load_si128(s));
if (left)
memcpy(d, frame->data + rp + blocks * 16, left);
rp += pitch;
d += dstpitch - blocks * 16;
++y;
}
return true;
}
bool framebuffer_read_fn(const FrameBuffer * frame, size_t height, size_t width,
size_t bpp, size_t pitch, FrameBufferReadFn fn, void * opaque)
{
uint_least32_t rp = 0;
size_t y = 0;
const size_t linewidth = width * bpp;
while(y < height)
{
uint_least32_t wp;
/* spinlock */
do
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
while(wp - rp < pitch);
if (!fn(opaque, frame->data + rp, linewidth))
return false;
rp += pitch;
++y;
}
return true;
}
/**
* Prepare the framebuffer for writing
*/
void framebuffer_prepare(FrameBuffer * frame)
{
atomic_store_explicit(&frame->wp, 0, memory_order_release);
}
bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
{
__m128i * s = (__m128i *)src;
__m128i * d = (__m128i *)frame->data;
size_t wp = 0;
/* copy in chunks */
while(size > 63)
{
const __m128i v1 = _mm_stream_load_si128(s++);
const __m128i v2 = _mm_stream_load_si128(s++);
const __m128i v3 = _mm_stream_load_si128(s++);
const __m128i v4 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
_mm_stream_si128(d++, v2);
_mm_stream_si128(d++, v3);
_mm_stream_si128(d++, v4);
size -= 64;
wp += 64;
if (wp % FB_CHUNK_SIZE == 0)
atomic_store_explicit(&frame->wp, wp, memory_order_release);
}
if (size > 47)
{
const __m128i v1 = _mm_stream_load_si128(s++);
const __m128i v2 = _mm_stream_load_si128(s++);
const __m128i v3 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
_mm_stream_si128(d++, v2);
_mm_stream_si128(d++, v3);
size -= 48;
wp += 48;
}
if (size > 31)
{
const __m128i v1 = _mm_stream_load_si128(s++);
const __m128i v2 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
_mm_stream_si128(d++, v2);
size -= 32;
wp += 32;
}
if (size > 15)
{
const __m128i v1 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
size -= 16;
wp += 16;
}
if(size)
{
memcpy(frame->data + wp, s, size);
wp += size;
}
atomic_store_explicit(&frame->wp, wp, memory_order_release);
return true;
}