mirror of
				https://github.com/gnif/LookingGlass.git
				synced 2025-10-31 20:52:09 +00:00 
			
		
		
		
	[common] add AVX/AVX2 memory copy implementations
This commit is contained in:
		| @@ -27,18 +27,8 @@ | ||||
| #include "common/framebuffer.h" | ||||
| #include "common/types.h" | ||||
|  | ||||
| inline static void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, | ||||
|     int ystart, int yend, int dx, int dstPitch, int srcPitch, int width) | ||||
| { | ||||
|   src += ystart * srcPitch + dx; | ||||
|   dst += ystart * dstPitch + dx; | ||||
|   for (int i = ystart; i < yend; ++i) | ||||
|   { | ||||
|     memcpy(dst, src, width); | ||||
|     src += srcPitch; | ||||
|     dst += dstPitch; | ||||
|  } | ||||
| } | ||||
| void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, | ||||
|   int ystart, int yend, int dx, int dstPitch, int srcPitch, int width); | ||||
|  | ||||
| void rectsBufferToFramebuffer(FrameDamageRect * rects, int count, int bpp, | ||||
|   FrameBuffer * frame, int dstPitch, int height, | ||||
|   | ||||
| @@ -29,6 +29,7 @@ | ||||
| #include <string.h> | ||||
| #include <emmintrin.h> | ||||
| #include <smmintrin.h> | ||||
| #include <immintrin.h> | ||||
| #include <unistd.h> | ||||
|  | ||||
| bool framebuffer_wait(const FrameBuffer * frame, size_t size) | ||||
| @@ -165,7 +166,9 @@ void framebuffer_prepare(FrameBuffer * frame) | ||||
|   atomic_store_explicit(&frame->wp, 0, memory_order_release); | ||||
| } | ||||
|  | ||||
| bool framebuffer_write(FrameBuffer * frame, const void * restrict src, size_t size) | ||||
| __attribute__((target("default"))) | ||||
| bool framebuffer_write(FrameBuffer * frame, const void * restrict src, | ||||
|     size_t size) | ||||
| { | ||||
| #ifdef FB_PROFILE | ||||
|   static RunningAvg ra = NULL; | ||||
| @@ -222,6 +225,85 @@ bool framebuffer_write(FrameBuffer * frame, const void * restrict src, size_t si | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| #if 1 | ||||
| __attribute__((target("avx2"))) | ||||
| bool framebuffer_write(FrameBuffer *frame, const void *restrict src, size_t size) | ||||
| { | ||||
| #ifdef FB_PROFILE | ||||
|     static RunningAvg ra = NULL; | ||||
|     static int raCount = 0; | ||||
|     const uint64_t ts = microtime(); | ||||
|     if (!ra) | ||||
|         ra = runningavg_new(100); | ||||
| #endif | ||||
|  | ||||
|   __m256i *restrict s = (__m256i *)src; | ||||
|   __m256i *restrict d = (__m256i *)frame->data; | ||||
|   size_t wp = 0; | ||||
|  | ||||
|   _mm_mfence(); | ||||
|  | ||||
|   /* copy in chunks */ | ||||
|   while (size > 127) | ||||
|   { | ||||
|     __m256i *_d = (__m256i *)d; | ||||
|     __m256i *_s = (__m256i *)s; | ||||
|     __m256i v1 = _mm256_stream_load_si256(_s + 0); | ||||
|     __m256i v2 = _mm256_stream_load_si256(_s + 1); | ||||
|     __m256i v3 = _mm256_stream_load_si256(_s + 2); | ||||
|     __m256i v4 = _mm256_stream_load_si256(_s + 3); | ||||
|  | ||||
|     _mm256_stream_si256(_d + 0, v1); | ||||
|     _mm256_stream_si256(_d + 1, v2); | ||||
|     _mm256_stream_si256(_d + 2, v3); | ||||
|     _mm256_stream_si256(_d + 3, v4); | ||||
|  | ||||
|     s += 4; | ||||
|     d += 4; | ||||
|     size -= 128; | ||||
|     wp += 128; | ||||
|  | ||||
|     if (wp % FB_CHUNK_SIZE == 0) | ||||
|       atomic_store_explicit(&frame->wp, wp, memory_order_release); | ||||
|   } | ||||
|  | ||||
|   if (size > 63) | ||||
|   { | ||||
|     __m256i *_d = (__m256i *)d; | ||||
|     __m256i *_s = (__m256i *)s; | ||||
|     __m256i v1 = _mm256_stream_load_si256(_s); | ||||
|     __m256i v2 = _mm256_stream_load_si256(_s + 1); | ||||
|  | ||||
|     _mm256_stream_si256(_d, v1); | ||||
|     _mm256_stream_si256(_d + 1, v2); | ||||
|  | ||||
|     s += 2; | ||||
|     d += 2; | ||||
|     size -= 64; | ||||
|     wp += 64; | ||||
|  | ||||
|     if (wp % FB_CHUNK_SIZE == 0) | ||||
|       atomic_store_explicit(&frame->wp, wp, memory_order_release); | ||||
|   } | ||||
|  | ||||
|   if (size) | ||||
|   { | ||||
|     memcpy(frame->data + wp, s, size); | ||||
|     wp += size; | ||||
|   } | ||||
|  | ||||
|   atomic_store_explicit(&frame->wp, wp, memory_order_release); | ||||
|  | ||||
| #ifdef FB_PROFILE | ||||
|   runningavg_push(ra, microtime() - ts); | ||||
|   if (++raCount % 100 == 0) | ||||
|     DEBUG_INFO("Average Copy Time: %.2fμs", runningavg_calc(ra)); | ||||
| #endif | ||||
|  | ||||
|   return true; | ||||
| } | ||||
| #endif | ||||
|  | ||||
| const uint8_t * framebuffer_get_buffer(const FrameBuffer * frame) | ||||
| { | ||||
|   return frame->data; | ||||
|   | ||||
| @@ -22,6 +22,45 @@ | ||||
| #include "common/util.h" | ||||
|  | ||||
| #include <stdlib.h> | ||||
| #include <immintrin.h> | ||||
|  | ||||
| __attribute__((target("default"))) | ||||
| void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, | ||||
|     int ystart, int yend, int dx, int dstPitch, int srcPitch, int width) | ||||
| { | ||||
|   src += ystart * srcPitch + dx; | ||||
|   dst += ystart * dstPitch + dx; | ||||
|   for (int i = ystart; i < yend; ++i) | ||||
|   { | ||||
|     memcpy(dst, src, width); | ||||
|     src += srcPitch; | ||||
|     dst += dstPitch; | ||||
|   } | ||||
| } | ||||
|  | ||||
| __attribute__((target("avx"))) | ||||
| void rectCopyUnaligned(uint8_t * dst, const uint8_t * src, | ||||
|     int ystart, int yend, int dx, int dstPitch, int srcPitch, int width) | ||||
| { | ||||
|   src += ystart * srcPitch + dx; | ||||
|   dst += ystart * dstPitch + dx; | ||||
|   for (int i = ystart; i < yend; ++i) | ||||
|   { | ||||
|     int col; | ||||
|     for(col = 0; col <= width - 32; col += 32) | ||||
|     { | ||||
|       _mm_prefetch(src + col + 256, _MM_HINT_T0); | ||||
|       __m256i srcData = _mm256_loadu_si256((__m256i*)(src + col)); | ||||
|       _mm256_storeu_si256((__m256i*)(dst + col), srcData); | ||||
|     } | ||||
|  | ||||
|     for(; col < width; ++col) | ||||
|       dst[col] = src[col]; | ||||
|  | ||||
|     src += srcPitch; | ||||
|     dst += dstPitch; | ||||
|  } | ||||
| } | ||||
|  | ||||
| struct Corner | ||||
| { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Geoffrey McRae
					Geoffrey McRae