From 85b8c12abf3d5d527b334da041405d24a8811605 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Mon, 3 Aug 2020 12:33:08 +1000 Subject: [PATCH] [common] adjust framebuffer read/write strategy for better cache usage --- VERSION | 2 +- common/src/framebuffer.c | 31 +++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/VERSION b/VERSION index 2430f828..40183f68 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -B2-rc2-17-gaa32c5ffad+1 \ No newline at end of file +B2-rc2-19-g7af053497e+1 \ No newline at end of file diff --git a/common/src/framebuffer.c b/common/src/framebuffer.c index 99bb3535..b89ace17 100644 --- a/common/src/framebuffer.c +++ b/common/src/framebuffer.c @@ -118,10 +118,15 @@ bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size) /* copy in chunks */ while(size > 63) { - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + const __m128i v1 = _mm_stream_load_si128(s++); + const __m128i v2 = _mm_stream_load_si128(s++); + const __m128i v3 = _mm_stream_load_si128(s++); + const __m128i v4 = _mm_stream_load_si128(s++); + _mm_stream_si128(d++, v1); + _mm_stream_si128(d++, v2); + _mm_stream_si128(d++, v3); + _mm_stream_si128(d++, v4); + size -= 64; wp += 64; @@ -131,24 +136,30 @@ bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size) if (size > 47) { - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + const __m128i v1 = _mm_stream_load_si128(s++); + const __m128i v2 = _mm_stream_load_si128(s++); + const __m128i v3 = _mm_stream_load_si128(s++); + _mm_stream_si128(d++, v1); + _mm_stream_si128(d++, v2); + _mm_stream_si128(d++, v3); size -= 48; wp += 48; } if (size > 31) { - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + const __m128i v1 = _mm_stream_load_si128(s++); + const __m128i v2 = _mm_stream_load_si128(s++); + _mm_stream_si128(d++, v1); + _mm_stream_si128(d++, v2); size -= 32; wp += 32; } if (size > 15) { - _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + const __m128i v1 = _mm_stream_load_si128(s++); + _mm_stream_si128(d++, v1); size -= 16; wp += 16; }