/* KVMGFX Client - A KVM Client for VGA Passthrough Copyright (C) 2017-2019 Geoffrey McRae https://looking-glass.hostfission.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "common/framebuffer.h" #include "common/debug.h" #include #include #include #include #define FB_CHUNK_SIZE 1048576 struct stFrameBuffer { atomic_uint_least32_t wp; uint8_t data[0]; }; const size_t FrameBufferStructSize = sizeof(FrameBuffer); void framebuffer_wait(const FrameBuffer * frame, size_t size) { while(atomic_load_explicit(&frame->wp, memory_order_acquire) != size) {} } bool framebuffer_read(const FrameBuffer * frame, void * dst, size_t dstpitch, size_t height, size_t width, size_t bpp, size_t pitch) { uint8_t *d = (uint8_t*)dst; uint_least32_t rp = 0; size_t y = 0; const size_t linewidth = width * bpp; const size_t blocks = linewidth / 16; const size_t left = linewidth % 16; while(y < height) { uint_least32_t wp; /* spinlock */ do wp = atomic_load_explicit(&frame->wp, memory_order_acquire); while(wp - rp < pitch); __m128i * s = (__m128i *)(frame->data + rp); for(int i = 0; i < blocks; ++i, ++s, d += 16) _mm_stream_si128((__m128i *)d, _mm_stream_load_si128(s)); if (left) memcpy(d, frame->data + rp + blocks * 16, left); rp += pitch; d += dstpitch - blocks * 16; ++y; } return true; } bool framebuffer_read_fn(const FrameBuffer * frame, size_t height, size_t width, size_t bpp, size_t pitch, FrameBufferReadFn fn, void * opaque) { uint_least32_t rp = 0; size_t y = 0; const size_t linewidth = width * bpp; while(y < height) { uint_least32_t wp; /* spinlock */ do wp = atomic_load_explicit(&frame->wp, memory_order_acquire); while(wp - rp < pitch); if (!fn(opaque, frame->data + rp, linewidth)) return false; rp += pitch; ++y; } return true; } /** * Prepare the framebuffer for writing */ void framebuffer_prepare(FrameBuffer * frame) { atomic_store_explicit(&frame->wp, 0, memory_order_release); } bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size) { __m128i * s = (__m128i *)src; __m128i * d = (__m128i *)frame->data; size_t wp = 0; /* copy in chunks */ while(size > 63) { const __m128i v1 = _mm_stream_load_si128(s++); const __m128i v2 = _mm_stream_load_si128(s++); const __m128i v3 = _mm_stream_load_si128(s++); const __m128i v4 = _mm_stream_load_si128(s++); _mm_stream_si128(d++, v1); _mm_stream_si128(d++, v2); _mm_stream_si128(d++, v3); _mm_stream_si128(d++, v4); size -= 64; wp += 64; if (wp % FB_CHUNK_SIZE == 0) atomic_store_explicit(&frame->wp, wp, memory_order_release); } if (size > 47) { const __m128i v1 = _mm_stream_load_si128(s++); const __m128i v2 = _mm_stream_load_si128(s++); const __m128i v3 = _mm_stream_load_si128(s++); _mm_stream_si128(d++, v1); _mm_stream_si128(d++, v2); _mm_stream_si128(d++, v3); size -= 48; wp += 48; } if (size > 31) { const __m128i v1 = _mm_stream_load_si128(s++); const __m128i v2 = _mm_stream_load_si128(s++); _mm_stream_si128(d++, v1); _mm_stream_si128(d++, v2); size -= 32; wp += 32; } if (size > 15) { const __m128i v1 = _mm_stream_load_si128(s++); _mm_stream_si128(d++, v1); size -= 16; wp += 16; } if(size) { memcpy(frame->data + wp, s, size); wp += size; } atomic_store_explicit(&frame->wp, wp, memory_order_release); return true; }