mirror of
https://github.com/gnif/LookingGlass.git
synced 2024-11-21 21:17:19 +00:00
Revert "[common] add AVX/AVX2 memory copy implementations"
This reverts commit e61678ef1b
.
GCC only supports multi-versioning in C++
This commit is contained in:
parent
e61678ef1b
commit
750cab83a3
@ -27,8 +27,18 @@
|
|||||||
#include "common/framebuffer.h"
|
#include "common/framebuffer.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
|
inline static void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
|
||||||
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width);
|
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width)
|
||||||
|
{
|
||||||
|
src += ystart * srcPitch + dx;
|
||||||
|
dst += ystart * dstPitch + dx;
|
||||||
|
for (int i = ystart; i < yend; ++i)
|
||||||
|
{
|
||||||
|
memcpy(dst, src, width);
|
||||||
|
src += srcPitch;
|
||||||
|
dst += dstPitch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void rectsBufferToFramebuffer(FrameDamageRect * rects, int count, int bpp,
|
void rectsBufferToFramebuffer(FrameDamageRect * rects, int count, int bpp,
|
||||||
FrameBuffer * frame, int dstPitch, int height,
|
FrameBuffer * frame, int dstPitch, int height,
|
||||||
|
@ -29,7 +29,6 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#include <immintrin.h>
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
bool framebuffer_wait(const FrameBuffer * frame, size_t size)
|
bool framebuffer_wait(const FrameBuffer * frame, size_t size)
|
||||||
@ -166,9 +165,7 @@ void framebuffer_prepare(FrameBuffer * frame)
|
|||||||
atomic_store_explicit(&frame->wp, 0, memory_order_release);
|
atomic_store_explicit(&frame->wp, 0, memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((target("default")))
|
bool framebuffer_write(FrameBuffer * frame, const void * restrict src, size_t size)
|
||||||
bool framebuffer_write(FrameBuffer * frame, const void * restrict src,
|
|
||||||
size_t size)
|
|
||||||
{
|
{
|
||||||
#ifdef FB_PROFILE
|
#ifdef FB_PROFILE
|
||||||
static RunningAvg ra = NULL;
|
static RunningAvg ra = NULL;
|
||||||
@ -225,85 +222,6 @@ bool framebuffer_write(FrameBuffer * frame, const void * restrict src,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 1
|
|
||||||
__attribute__((target("avx2")))
|
|
||||||
bool framebuffer_write(FrameBuffer *frame, const void *restrict src, size_t size)
|
|
||||||
{
|
|
||||||
#ifdef FB_PROFILE
|
|
||||||
static RunningAvg ra = NULL;
|
|
||||||
static int raCount = 0;
|
|
||||||
const uint64_t ts = microtime();
|
|
||||||
if (!ra)
|
|
||||||
ra = runningavg_new(100);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
__m256i *restrict s = (__m256i *)src;
|
|
||||||
__m256i *restrict d = (__m256i *)frame->data;
|
|
||||||
size_t wp = 0;
|
|
||||||
|
|
||||||
_mm_mfence();
|
|
||||||
|
|
||||||
/* copy in chunks */
|
|
||||||
while (size > 127)
|
|
||||||
{
|
|
||||||
__m256i *_d = (__m256i *)d;
|
|
||||||
__m256i *_s = (__m256i *)s;
|
|
||||||
__m256i v1 = _mm256_stream_load_si256(_s + 0);
|
|
||||||
__m256i v2 = _mm256_stream_load_si256(_s + 1);
|
|
||||||
__m256i v3 = _mm256_stream_load_si256(_s + 2);
|
|
||||||
__m256i v4 = _mm256_stream_load_si256(_s + 3);
|
|
||||||
|
|
||||||
_mm256_stream_si256(_d + 0, v1);
|
|
||||||
_mm256_stream_si256(_d + 1, v2);
|
|
||||||
_mm256_stream_si256(_d + 2, v3);
|
|
||||||
_mm256_stream_si256(_d + 3, v4);
|
|
||||||
|
|
||||||
s += 4;
|
|
||||||
d += 4;
|
|
||||||
size -= 128;
|
|
||||||
wp += 128;
|
|
||||||
|
|
||||||
if (wp % FB_CHUNK_SIZE == 0)
|
|
||||||
atomic_store_explicit(&frame->wp, wp, memory_order_release);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size > 63)
|
|
||||||
{
|
|
||||||
__m256i *_d = (__m256i *)d;
|
|
||||||
__m256i *_s = (__m256i *)s;
|
|
||||||
__m256i v1 = _mm256_stream_load_si256(_s);
|
|
||||||
__m256i v2 = _mm256_stream_load_si256(_s + 1);
|
|
||||||
|
|
||||||
_mm256_stream_si256(_d, v1);
|
|
||||||
_mm256_stream_si256(_d + 1, v2);
|
|
||||||
|
|
||||||
s += 2;
|
|
||||||
d += 2;
|
|
||||||
size -= 64;
|
|
||||||
wp += 64;
|
|
||||||
|
|
||||||
if (wp % FB_CHUNK_SIZE == 0)
|
|
||||||
atomic_store_explicit(&frame->wp, wp, memory_order_release);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size)
|
|
||||||
{
|
|
||||||
memcpy(frame->data + wp, s, size);
|
|
||||||
wp += size;
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic_store_explicit(&frame->wp, wp, memory_order_release);
|
|
||||||
|
|
||||||
#ifdef FB_PROFILE
|
|
||||||
runningavg_push(ra, microtime() - ts);
|
|
||||||
if (++raCount % 100 == 0)
|
|
||||||
DEBUG_INFO("Average Copy Time: %.2fμs", runningavg_calc(ra));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const uint8_t * framebuffer_get_buffer(const FrameBuffer * frame)
|
const uint8_t * framebuffer_get_buffer(const FrameBuffer * frame)
|
||||||
{
|
{
|
||||||
return frame->data;
|
return frame->data;
|
||||||
|
@ -22,45 +22,6 @@
|
|||||||
#include "common/util.h"
|
#include "common/util.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <immintrin.h>
|
|
||||||
|
|
||||||
__attribute__((target("default")))
|
|
||||||
void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
|
|
||||||
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width)
|
|
||||||
{
|
|
||||||
src += ystart * srcPitch + dx;
|
|
||||||
dst += ystart * dstPitch + dx;
|
|
||||||
for (int i = ystart; i < yend; ++i)
|
|
||||||
{
|
|
||||||
memcpy(dst, src, width);
|
|
||||||
src += srcPitch;
|
|
||||||
dst += dstPitch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__attribute__((target("avx")))
|
|
||||||
void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
|
|
||||||
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width)
|
|
||||||
{
|
|
||||||
src += ystart * srcPitch + dx;
|
|
||||||
dst += ystart * dstPitch + dx;
|
|
||||||
for (int i = ystart; i < yend; ++i)
|
|
||||||
{
|
|
||||||
int col;
|
|
||||||
for(col = 0; col <= width - 32; col += 32)
|
|
||||||
{
|
|
||||||
_mm_prefetch(src + col + 256, _MM_HINT_T0);
|
|
||||||
__m256i srcData = _mm256_loadu_si256((__m256i*)(src + col));
|
|
||||||
_mm256_storeu_si256((__m256i*)(dst + col), srcData);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(; col < width; ++col)
|
|
||||||
dst[col] = src[col];
|
|
||||||
|
|
||||||
src += srcPitch;
|
|
||||||
dst += dstPitch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Corner
|
struct Corner
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user