From 750cab83a3265c101cd38109c249679a6eacd381 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Sun, 19 Nov 2023 00:18:48 +1100 Subject: [PATCH] Revert "[common] add AVX/AVX2 memory copy implementations" This reverts commit e61678ef1b898b8d443e6a4cb00fe912f52d4647. GCC only supports multi-versioning in C++ --- common/include/common/rects.h | 14 +++++- common/src/framebuffer.c | 84 +---------------------------------- common/src/rects.c | 39 ---------------- 3 files changed, 13 insertions(+), 124 deletions(-) diff --git a/common/include/common/rects.h b/common/include/common/rects.h index 738530dd..b238c711 100644 --- a/common/include/common/rects.h +++ b/common/include/common/rects.h @@ -27,8 +27,18 @@ #include "common/framebuffer.h" #include "common/types.h" -void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, - int ystart, int yend, int dx, int dstPitch, int srcPitch, int width); +inline static void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, + int ystart, int yend, int dx, int dstPitch, int srcPitch, int width) +{ + src += ystart * srcPitch + dx; + dst += ystart * dstPitch + dx; + for (int i = ystart; i < yend; ++i) + { + memcpy(dst, src, width); + src += srcPitch; + dst += dstPitch; + } +} void rectsBufferToFramebuffer(FrameDamageRect * rects, int count, int bpp, FrameBuffer * frame, int dstPitch, int height, diff --git a/common/src/framebuffer.c b/common/src/framebuffer.c index 8f6f379a..c743d5cf 100644 --- a/common/src/framebuffer.c +++ b/common/src/framebuffer.c @@ -29,7 +29,6 @@ #include #include #include -#include #include bool framebuffer_wait(const FrameBuffer * frame, size_t size) @@ -166,9 +165,7 @@ void framebuffer_prepare(FrameBuffer * frame) atomic_store_explicit(&frame->wp, 0, memory_order_release); } -__attribute__((target("default"))) -bool framebuffer_write(FrameBuffer * frame, const void * restrict src, - size_t size) +bool framebuffer_write(FrameBuffer * frame, const void * restrict src, size_t size) { #ifdef FB_PROFILE static RunningAvg ra = NULL; @@ -225,85 +222,6 @@ bool framebuffer_write(FrameBuffer * frame, const void * restrict src, return true; } -#if 1 -__attribute__((target("avx2"))) -bool framebuffer_write(FrameBuffer *frame, const void *restrict src, size_t size) -{ -#ifdef FB_PROFILE - static RunningAvg ra = NULL; - static int raCount = 0; - const uint64_t ts = microtime(); - if (!ra) - ra = runningavg_new(100); -#endif - - __m256i *restrict s = (__m256i *)src; - __m256i *restrict d = (__m256i *)frame->data; - size_t wp = 0; - - _mm_mfence(); - - /* copy in chunks */ - while (size > 127) - { - __m256i *_d = (__m256i *)d; - __m256i *_s = (__m256i *)s; - __m256i v1 = _mm256_stream_load_si256(_s + 0); - __m256i v2 = _mm256_stream_load_si256(_s + 1); - __m256i v3 = _mm256_stream_load_si256(_s + 2); - __m256i v4 = _mm256_stream_load_si256(_s + 3); - - _mm256_stream_si256(_d + 0, v1); - _mm256_stream_si256(_d + 1, v2); - _mm256_stream_si256(_d + 2, v3); - _mm256_stream_si256(_d + 3, v4); - - s += 4; - d += 4; - size -= 128; - wp += 128; - - if (wp % FB_CHUNK_SIZE == 0) - atomic_store_explicit(&frame->wp, wp, memory_order_release); - } - - if (size > 63) - { - __m256i *_d = (__m256i *)d; - __m256i *_s = (__m256i *)s; - __m256i v1 = _mm256_stream_load_si256(_s); - __m256i v2 = _mm256_stream_load_si256(_s + 1); - - _mm256_stream_si256(_d, v1); - _mm256_stream_si256(_d + 1, v2); - - s += 2; - d += 2; - size -= 64; - wp += 64; - - if (wp % FB_CHUNK_SIZE == 0) - atomic_store_explicit(&frame->wp, wp, memory_order_release); - } - - if (size) - { - memcpy(frame->data + wp, s, size); - wp += size; - } - - atomic_store_explicit(&frame->wp, wp, memory_order_release); - -#ifdef FB_PROFILE - runningavg_push(ra, microtime() - ts); - if (++raCount % 100 == 0) - DEBUG_INFO("Average Copy Time: %.2fμs", runningavg_calc(ra)); -#endif - - return true; -} -#endif - const uint8_t * framebuffer_get_buffer(const FrameBuffer * frame) { return frame->data; diff --git a/common/src/rects.c b/common/src/rects.c index 59be63f8..eaa645ae 100644 --- a/common/src/rects.c +++ b/common/src/rects.c @@ -22,45 +22,6 @@ #include "common/util.h" #include -#include - -__attribute__((target("default"))) -void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, - int ystart, int yend, int dx, int dstPitch, int srcPitch, int width) -{ - src += ystart * srcPitch + dx; - dst += ystart * dstPitch + dx; - for (int i = ystart; i < yend; ++i) - { - memcpy(dst, src, width); - src += srcPitch; - dst += dstPitch; - } -} - -__attribute__((target("avx"))) -void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, - int ystart, int yend, int dx, int dstPitch, int srcPitch, int width) -{ - src += ystart * srcPitch + dx; - dst += ystart * dstPitch + dx; - for (int i = ystart; i < yend; ++i) - { - int col; - for(col = 0; col <= width - 32; col += 32) - { - _mm_prefetch(src + col + 256, _MM_HINT_T0); - __m256i srcData = _mm256_loadu_si256((__m256i*)(src + col)); - _mm256_storeu_si256((__m256i*)(dst + col), srcData); - } - - for(; col < width; ++col) - dst[col] = src[col]; - - src += srcPitch; - dst += dstPitch; - } -} struct Corner {