From 2329e993eeb493c0f8c3d9414543ce90bd041d13 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Mon, 3 Aug 2020 11:44:24 +1000 Subject: [PATCH] [common] fixed framebuffer write SIMD code performance --- common/src/framebuffer.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/common/src/framebuffer.c b/common/src/framebuffer.c index 88f02d2e..2d291f97 100644 --- a/common/src/framebuffer.c +++ b/common/src/framebuffer.c @@ -111,21 +111,25 @@ void framebuffer_prepare(FrameBuffer * frame) bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size) { __m128i * s = (__m128i *)src; + size_t wp = 0; /* copy in chunks */ while(size > 15) { - _mm_stream_si128((__m128i *)(frame->data + frame->wp), _mm_load_si128(s)); - atomic_fetch_add_explicit(&frame->wp, 16, memory_order_release); - ++s; + _mm_stream_si128((__m128i *)(frame->data + wp), _mm_loadu_si128(s++)); size -= 16; + wp += 16; + + if (wp % FB_CHUNK_SIZE == 0) + atomic_store_explicit(&frame->wp, wp, memory_order_release); } if(size) { memcpy(frame->data + frame->wp, s, size); - atomic_fetch_add_explicit(&frame->wp, size, memory_order_release); + wp += size; } + atomic_store_explicit(&frame->wp, wp, memory_order_release); return true; }