[common] adjust framebuffer read/write strategy for better cache usage

This commit is contained in:
Geoffrey McRae 2020-08-03 12:33:08 +10:00
parent 7af053497e
commit 85b8c12abf
2 changed files with 22 additions and 11 deletions

View File

@ -1 +1 @@
B2-rc2-17-gaa32c5ffad+1 B2-rc2-19-g7af053497e+1

View File

@ -118,10 +118,15 @@ bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
/* copy in chunks */ /* copy in chunks */
while(size > 63) while(size > 63)
{ {
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v1 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v2 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v3 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v4 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
_mm_stream_si128(d++, v2);
_mm_stream_si128(d++, v3);
_mm_stream_si128(d++, v4);
size -= 64; size -= 64;
wp += 64; wp += 64;
@ -131,24 +136,30 @@ bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
if (size > 47) if (size > 47)
{ {
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v1 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v2 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v3 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
_mm_stream_si128(d++, v2);
_mm_stream_si128(d++, v3);
size -= 48; size -= 48;
wp += 48; wp += 48;
} }
if (size > 31) if (size > 31)
{ {
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v1 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v2 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
_mm_stream_si128(d++, v2);
size -= 32; size -= 32;
wp += 32; wp += 32;
} }
if (size > 15) if (size > 15)
{ {
_mm_stream_si128(d++, _mm_stream_load_si128(s++)); const __m128i v1 = _mm_stream_load_si128(s++);
_mm_stream_si128(d++, v1);
size -= 16; size -= 16;
wp += 16; wp += 16;
} }