/* KVMGFX Client - A KVM Client for VGA Passthrough Copyright (C) 2017-2019 Geoffrey McRae https://looking-glass.hostfission.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "common/framebuffer.h" #include "common/debug.h" #include #include #include #include #define FB_CHUNK_SIZE 1048576 struct stFrameBuffer { atomic_uint_least32_t wp; uint8_t data[0]; }; const size_t FrameBufferStructSize = sizeof(FrameBuffer); void framebuffer_wait(const FrameBuffer * frame, size_t size) { while(atomic_load_explicit(&frame->wp, memory_order_acquire) != size) {} } bool framebuffer_read(const FrameBuffer * frame, void * restrict dst, size_t dstpitch, size_t height, size_t width, size_t bpp, size_t pitch) { uint8_t * restrict d = (uint8_t*)dst; uint_least32_t rp = 0; size_t y = 0; const size_t linewidth = width * bpp; const size_t blocks = linewidth / 64; const size_t left = linewidth % 64; while(y < height) { uint_least32_t wp; /* spinlock */ do wp = atomic_load_explicit(&frame->wp, memory_order_acquire); while(wp - rp < pitch); _mm_mfence(); __m128i * restrict s = (__m128i *)(frame->data + rp); for(int i = 0; i < blocks; ++i) { __m128i *_d = (__m128i *)d; __m128i *_s = (__m128i *)s; __m128i v1 = _mm_stream_load_si128(_s + 0); __m128i v2 = _mm_stream_load_si128(_s + 1); __m128i v3 = _mm_stream_load_si128(_s + 2); __m128i v4 = _mm_stream_load_si128(_s + 3); _mm_storeu_si128(_d + 0, v1); _mm_storeu_si128(_d + 1, v2); _mm_storeu_si128(_d + 2, v3); _mm_storeu_si128(_d + 3, v4); d += 64; s += 4; } if (left) { memcpy(d, s, left); d += left; } rp += pitch; d += dstpitch - linewidth; ++y; } return true; } bool framebuffer_read_fn(const FrameBuffer * frame, size_t height, size_t width, size_t bpp, size_t pitch, FrameBufferReadFn fn, void * opaque) { uint_least32_t rp = 0; size_t y = 0; const size_t linewidth = width * bpp; while(y < height) { uint_least32_t wp; /* spinlock */ do wp = atomic_load_explicit(&frame->wp, memory_order_acquire); while(wp - rp < pitch); if (!fn(opaque, frame->data + rp, linewidth)) return false; rp += pitch; ++y; } return true; } /** * Prepare the framebuffer for writing */ void framebuffer_prepare(FrameBuffer * frame) { atomic_store_explicit(&frame->wp, 0, memory_order_release); } bool framebuffer_write(FrameBuffer * frame, const void * restrict src, size_t size) { __m128i * restrict s = (__m128i *)src; __m128i * restrict d = (__m128i *)frame->data; size_t wp = 0; _mm_mfence(); /* copy in chunks */ while(size > 63) { __m128i *_d = (__m128i *)d; __m128i *_s = (__m128i *)s; __m128i v1 = _mm_stream_load_si128(_s + 0); __m128i v2 = _mm_stream_load_si128(_s + 1); __m128i v3 = _mm_stream_load_si128(_s + 2); __m128i v4 = _mm_stream_load_si128(_s + 3); _mm_store_si128(_d + 0, v1); _mm_store_si128(_d + 1, v2); _mm_store_si128(_d + 2, v3); _mm_store_si128(_d + 3, v4); s += 4; d += 4; size -= 64; wp += 64; if (wp % FB_CHUNK_SIZE == 0) atomic_store_explicit(&frame->wp, wp, memory_order_release); } if(size) { memcpy(frame->data + wp, s, size); wp += size; } atomic_store_explicit(&frame->wp, wp, memory_order_release); return true; }