mirror of
https://github.com/gnif/LookingGlass.git
synced 2024-11-26 15:27:19 +00:00
176 lines
4.3 KiB
C
176 lines
4.3 KiB
C
/*
|
|
KVMGFX Client - A KVM Client for VGA Passthrough
|
|
Copyright (C) 2017-2019 Geoffrey McRae <geoff@hostfission.com>
|
|
https://looking-glass.hostfission.com
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation; either version 2 of the License, or (at your option) any later
|
|
version.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
|
PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include "common/framebuffer.h"
|
|
#include "common/debug.h"
|
|
|
|
#include <string.h>
|
|
#include <stdatomic.h>
|
|
#include <emmintrin.h>
|
|
#include <smmintrin.h>
|
|
|
|
#define FB_CHUNK_SIZE 1048576
|
|
|
|
struct stFrameBuffer
|
|
{
|
|
atomic_uint_least32_t wp;
|
|
uint8_t data[0];
|
|
};
|
|
|
|
const size_t FrameBufferStructSize = sizeof(FrameBuffer);
|
|
|
|
void framebuffer_wait(const FrameBuffer * frame, size_t size)
|
|
{
|
|
while(atomic_load_explicit(&frame->wp, memory_order_acquire) != size) {}
|
|
}
|
|
|
|
|
|
bool framebuffer_read(const FrameBuffer * frame, void * dst, size_t dstpitch,
|
|
size_t height, size_t width, size_t bpp, size_t pitch)
|
|
{
|
|
uint8_t *d = (uint8_t*)dst;
|
|
uint_least32_t rp = 0;
|
|
size_t y = 0;
|
|
const size_t linewidth = width * bpp;
|
|
const size_t blocks = linewidth / 16;
|
|
const size_t left = linewidth % 16;
|
|
|
|
while(y < height)
|
|
{
|
|
uint_least32_t wp;
|
|
|
|
/* spinlock */
|
|
do
|
|
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
|
|
while(wp - rp < pitch);
|
|
|
|
__m128i * s = (__m128i *)(frame->data + rp);
|
|
for(int i = 0; i < blocks; ++i, ++s, d += 16)
|
|
_mm_stream_si128((__m128i *)d, _mm_stream_load_si128(s));
|
|
|
|
if (left)
|
|
memcpy(d, frame->data + rp + blocks * 16, left);
|
|
|
|
rp += pitch;
|
|
d += dstpitch - blocks * 16;
|
|
++y;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool framebuffer_read_fn(const FrameBuffer * frame, size_t height, size_t width,
|
|
size_t bpp, size_t pitch, FrameBufferReadFn fn, void * opaque)
|
|
{
|
|
uint_least32_t rp = 0;
|
|
size_t y = 0;
|
|
const size_t linewidth = width * bpp;
|
|
|
|
while(y < height)
|
|
{
|
|
uint_least32_t wp;
|
|
|
|
/* spinlock */
|
|
do
|
|
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
|
|
while(wp - rp < pitch);
|
|
|
|
if (!fn(opaque, frame->data + rp, linewidth))
|
|
return false;
|
|
|
|
rp += pitch;
|
|
++y;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Prepare the framebuffer for writing
|
|
*/
|
|
void framebuffer_prepare(FrameBuffer * frame)
|
|
{
|
|
atomic_store_explicit(&frame->wp, 0, memory_order_release);
|
|
}
|
|
|
|
bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
|
|
{
|
|
__m128i * s = (__m128i *)src;
|
|
__m128i * d = (__m128i *)frame->data;
|
|
size_t wp = 0;
|
|
|
|
/* copy in chunks */
|
|
while(size > 63)
|
|
{
|
|
const __m128i v1 = _mm_stream_load_si128(s++);
|
|
const __m128i v2 = _mm_stream_load_si128(s++);
|
|
const __m128i v3 = _mm_stream_load_si128(s++);
|
|
const __m128i v4 = _mm_stream_load_si128(s++);
|
|
_mm_stream_si128(d++, v1);
|
|
_mm_stream_si128(d++, v2);
|
|
_mm_stream_si128(d++, v3);
|
|
_mm_stream_si128(d++, v4);
|
|
|
|
size -= 64;
|
|
wp += 64;
|
|
|
|
if (wp % FB_CHUNK_SIZE == 0)
|
|
atomic_store_explicit(&frame->wp, wp, memory_order_release);
|
|
}
|
|
|
|
if (size > 47)
|
|
{
|
|
const __m128i v1 = _mm_stream_load_si128(s++);
|
|
const __m128i v2 = _mm_stream_load_si128(s++);
|
|
const __m128i v3 = _mm_stream_load_si128(s++);
|
|
_mm_stream_si128(d++, v1);
|
|
_mm_stream_si128(d++, v2);
|
|
_mm_stream_si128(d++, v3);
|
|
size -= 48;
|
|
wp += 48;
|
|
}
|
|
|
|
if (size > 31)
|
|
{
|
|
const __m128i v1 = _mm_stream_load_si128(s++);
|
|
const __m128i v2 = _mm_stream_load_si128(s++);
|
|
_mm_stream_si128(d++, v1);
|
|
_mm_stream_si128(d++, v2);
|
|
size -= 32;
|
|
wp += 32;
|
|
}
|
|
|
|
if (size > 15)
|
|
{
|
|
const __m128i v1 = _mm_stream_load_si128(s++);
|
|
_mm_stream_si128(d++, v1);
|
|
size -= 16;
|
|
wp += 16;
|
|
}
|
|
|
|
if(size)
|
|
{
|
|
memcpy(frame->data + wp, s, size);
|
|
wp += size;
|
|
}
|
|
|
|
atomic_store_explicit(&frame->wp, wp, memory_order_release);
|
|
return true;
|
|
}
|