[x86] use a proper call/ret to obtain the current IP

See: https://blogs.msdn.microsoft.com/oldnewthing/20041216-00/?p=36973
This commit is contained in:
Geoffrey McRae 2018-05-19 16:16:01 +10:00
parent f63c8043af
commit cd6caea4b0

View File

@ -30,6 +30,15 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#define OP(...) #__VA_ARGS__ "\n\t" #define OP(...) #__VA_ARGS__ "\n\t"
#ifdef __i386__
__asm__ (
".global get_pc\n\t"
"get_pc:\n\t"
"mov (%esp), %eax \n\t"
"ret"
);
#endif
inline static void memcpySSE(void *dst, const void * src, size_t length) inline static void memcpySSE(void *dst, const void * src, size_t length)
{ {
#if !defined(NATIVE_MEMCPY) && (defined(__x86_64__) || defined(__i386__)) #if !defined(NATIVE_MEMCPY) && (defined(__x86_64__) || defined(__i386__))
@ -37,12 +46,18 @@ inline static void memcpySSE(void *dst, const void * src, size_t length)
return; return;
#ifdef __x86_64__ #ifdef __x86_64__
void * end = dst + (length & ~0xFF); const void * end = dst + (length & ~0xFF);
size_t off = (15 - ((length & 0xFF) >> 4)); size_t off = (15 - ((length & 0xFF) >> 4));
off = (off < 8) ? off * 16 : 7 * 16 + (off - 7) * 10; off = (off < 8) ? off * 16 : 7 * 16 + (off - 7) * 10;
#else #else
void * end = dst + (length & ~0x7F); const void * end = dst + (length & ~0x7F);
size_t off = (7 - ((length & 0x7F) >> 4)) * 10; const size_t off = (7 - ((length & 0x7F) >> 4)) * 10;
#endif
#ifdef __x86_64__
#define REG "rax"
#else
#define REG "eax"
#endif #endif
__asm__ __volatile__ ( __asm__ __volatile__ (
@ -100,16 +115,14 @@ inline static void memcpySSE(void *dst, const void * src, size_t length)
// copy any remaining 16 byte blocks // copy any remaining 16 byte blocks
#ifdef __x86_64__ #ifdef __x86_64__
"leaq (%%rip), %[end]\n\t" "leaq (%%rip), %%rax\n\t"
"Offset_%=:\n\t"
#else #else
"call .+5 \n\t" "call get_pc\n\t"
"Offset_%=:\n\t"
"pop %[end] \n\t"
#endif #endif
"add $(BlockTable_%= - Offset_%=), %[end]\n\t" "Offset_%=:\n\t"
"add %[off],%[end] \n\t" "add $(BlockTable_%= - Offset_%=), %%" REG "\n\t"
"jmp *%[end] \n\t" "add %[off],%%" REG " \n\t"
"jmp *%%" REG " \n\t"
"BlockTable_%=:\n\t" "BlockTable_%=:\n\t"
#ifdef __x86_64__ #ifdef __x86_64__
@ -148,10 +161,11 @@ inline static void memcpySSE(void *dst, const void * src, size_t length)
"nop\n\t" "nop\n\t"
: [dst]"+r" (dst), : [dst]"+r" (dst),
[src]"+r" (src), [src]"+r" (src)
[end]"+r" (end) : [off]"r" (off),
: [off]"r" (off) [end]"r" (end)
: "xmm0", : REG,
"xmm0",
"xmm1", "xmm1",
"xmm2", "xmm2",
"xmm3", "xmm3",
@ -168,10 +182,15 @@ inline static void memcpySSE(void *dst, const void * src, size_t length)
"xmm13", "xmm13",
"xmm14", "xmm14",
"xmm15", "xmm15",
"rax",
#else
"eax",
#endif #endif
"memory" "memory"
); );
#undef REG
//copy any remaining bytes //copy any remaining bytes
for(size_t i = (length & 0xF); i; --i) for(size_t i = (length & 0xF); i; --i)
((uint8_t *)dst)[length - i] = ((uint8_t *)dst)[length - i] =