More optimizations

This commit is contained in:
Alcaro 2016-12-25 20:12:44 +01:00
parent d8e4ec5fb7
commit bee5465b02
4 changed files with 89 additions and 25 deletions

View File

@ -308,6 +308,8 @@ class null_t_impl {};
#ifdef _WIN32
void* memmem(const void * haystack, size_t haystacklen, const void * needle, size_t needlelen);
#endif
//Returns distance to first difference, or 'len' if that's smaller.
size_t memcmp_d(const void * a, const void * b, size_t len);
//msvc:

58
arlib/memcmp_d.cpp Normal file
View File

@ -0,0 +1,58 @@
#include "global.h"
size_t memcmp_d(const void * a, const void * b, size_t len)
{
const size_t alignmul = sizeof(uintptr_t);
const size_t alignmask = alignmul-1;
const uint8_t* ab = (uint8_t*)a;
const uint8_t* bb = (uint8_t*)b;
if (((uintptr_t)ab^(uintptr_t)bb)&alignmask || len<alignmul*2)
{
size_t i;
for (i=0;i<len && ab[i]==bb[i];i++) {}
return i;
}
size_t align = (-(uintptr_t)ab) & alignmask;
for (size_t i=0;i<align;i++)
{
if (ab[i]!=bb[i]) return i;
}
size_t* aa = (size_t*)(ab+align);
size_t* ba = (size_t*)(bb+align);
size_t lena = (len-align)/alignmul;
size_t i;
for (i=0;i<lena;i++)
{
if (aa[i]!=ba[i]) break;
}
i *= alignmul;
i += align;
while (i<len && ab[i]==bb[i]) i++;
return i;
}
//size_t memcmp_d(const void * a, const void * b, size_t len)
//{
//const uint8_t* ab = (uint8_t*)a;
//const uint8_t* bb = (uint8_t*)b;
//
//size_t i;
//for (i=0;i<len && ab[i]==bb[i];i++) {}
//return i;
//}
//size_t memcmp_d(const void * a, const void * b, size_t len)
//{
//size_t r1 = memcmp_dfast(a, b, len);
//size_t r2 = memcmp_dslow(a, b, len);
//if (r1!=r2)
//{
//size_t r3 = memcmp_dfast(a, b, len);
//printf("%zu:%zu:%zu\n",r1,r2,r3);
//abort();
//}
//return r2;
//}

View File

@ -92,11 +92,12 @@ exit:
//There are no known cases where LIPS wins over this.
result create(array<byte> source, arrayview<byte> target, array<byte>& patchmem)
result create(array<byte> sourcea, arrayview<byte> target, array<byte>& patchmem)
{
int truesourcelen=source.size();
int truesourcelen=sourcea.size();
int targetlen=target.size();
source.resize(target.size());
sourcea.resize(target.size());
arrayview<byte> source = sourcea;
//const unsigned char * source=sourcemem.ptr();
//const unsigned char * target=targetmem.ptr();
@ -118,7 +119,9 @@ result create(array<byte> source, arrayview<byte> target, array<byte>& patchmem)
while (offset<targetlen)
{
//skip unchanged bytes
while (offset<targetlen && source[offset]==target[offset]) offset++;
offset += memcmp_d(source.slice(offset, targetlen-offset).ptr(),
target.slice(offset, targetlen-offset).ptr(),
targetlen-offset);
//how many bytes to edit
int thislen=0;
@ -215,7 +218,7 @@ result create(array<byte> source, arrayview<byte> target, array<byte>& patchmem)
//don't copy unchanged bytes at the end of a block
if (offset+thislen!=targetlen)
{
while (target[offset+thislen-1]==source[offset+thislen-1])
while (thislen>1 && target[offset+thislen-1]==source[offset+thislen-1])
{
thislen--;
}

View File

@ -131,8 +131,8 @@ static void simpletests()
testcall(createtest(one0, one1, base+record+1, 21));
testcall(createtest(seq256, seq128, base+trunc, 23));
testcall(createtest(seq128, seq256, base+record+128, 153));
testcall(createtest(empty, seq256nul4, base+record+255+4, 282));
testcall(createtest(empty, seq256nul5, base+record+255+5, 283)); // strange how this one is bigger
testcall(createtest(empty, seq256nul4, base+record+255+6, 282)); // mistuned IPS heuristics?
testcall(createtest(empty, seq256nul5, base+record+255+6, 283)); // strange how this one is bigger
testcall(createtest(empty, seq256nul6, base+record+255+6, 282)); // guess the heuristics don't like EOF
testcall(createtest(empty, seq256nul7, base+record+255+record+1, 282));
testcall(createtest(empty, seq256b4, base+record+255+4, 282));
@ -155,7 +155,7 @@ test("IPS")
testips=true;
testbps=false;
//simpletests();
simpletests();
}
test("BPS")
@ -163,13 +163,13 @@ test("BPS")
testips=false;
testbps=true;
//simpletests();
simpletests();
}
test("the big ones")
{
testips=true;
//testbps=true;
testbps=true;
testbps=false;
array<byte> smw = file::read("patch/test/smw.sfc");
@ -187,21 +187,22 @@ test("the big ones")
assert_eq(smwhack.size(), 4194304);
testcall(createtest(smw, smwhack, 3302746, 2077386));
//array<byte> sm64hack;
//r = bps::apply(sm64_bps, sm64, sm64hack);
//assert_eq(r, e_ok);
//assert_eq(sm64hack.size(), 50331648);
//testcall(createtest(sm64, sm64hack, -1, 6788133));
//this is the only UPS test, UPS is pretty much an easter egg in Flips
//array<byte> dlhack;
//r = ups::apply(dl_ups, dl, dlhack);
//assert_eq(r, e_ok);
//assert_eq(dlhack.size(), 3145728);
//array<byte> dl2;
//r = ups::apply(dl_ups, dlhack, dl2);
//assert_eq(r, e_ok);
//assert(dl == dl2);
//testcall(createtest(dl, dlhack, 852134, 817190));
array<byte> dlhack;
r = ups::apply(dl_ups, dl, dlhack);
assert_eq(r, e_ok);
assert_eq(dlhack.size(), 3145728);
array<byte> dl2;
r = ups::apply(dl_ups, dlhack, dl2);
assert_eq(r, e_ok);
assert(dl == dl2);
testcall(createtest(dl, dlhack, 852124, 817190));
array<byte> sm64hack;
r = bps::apply(sm64_bps, sm64, sm64hack);
assert_eq(r, e_ok);
assert_eq(sm64hack.size(), 50331648);
testbps=false; // too slow
testcall(createtest(sm64, sm64hack, -1, 6788133));
}
}