mirror of
https://github.com/Alcaro/Flips.git
synced 2026-04-17 15:25:59 -05:00
Rewrite this one to Arlib API, and wipe out moremem
This commit is contained in:
parent
8d1780bb29
commit
0e31df85fd
4
Makefile
4
Makefile
|
|
@ -4,4 +4,8 @@ ARWUTF = 1
|
|||
|
||||
EXTRAOBJ += obj/divsufsort-c$(OBJSUFFIX).o
|
||||
SOURCES += patch/*.cpp
|
||||
|
||||
DOMAINS += LDSS
|
||||
SOURCES_LDSS := libdivsufsort-2.0.1/lib/divsufsort.c libdivsufsort-2.0.1/lib/sssort.c libdivsufsort-2.0.1/lib/trsort.c
|
||||
CFLAGS_LDSS := -Ilibdivsufsort-2.0.1/include -DHAVE_CONFIG_H
|
||||
include arlib/Makefile
|
||||
|
|
|
|||
|
|
@ -1,10 +1,4 @@
|
|||
#include "libbps.h"
|
||||
#include "arlib/crc32.h"
|
||||
#include "arlib/file.h"
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "patch.h"
|
||||
|
||||
//These two give minor performance penalties and will print some random stuff to stdout.
|
||||
//The former will verify the correctness of the output patch, the latter will print some performance data.
|
||||
|
|
@ -19,10 +13,6 @@
|
|||
//If it's something else, get a non-broken array calculator.
|
||||
#define EOF_IS_LAST false
|
||||
|
||||
#if defined(TEST_CORRECT) || defined(TEST_PERF)
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
//Algorithm description:
|
||||
//
|
||||
//This is heavily built upon suffix sorting; the implementation I use, libdivsufsort, claims
|
||||
|
|
@ -31,10 +21,14 @@
|
|||
//
|
||||
//The program starts by taking an equal amount of the source file and target file, concatenates that
|
||||
// with target first, and suffix sorts it.
|
||||
//It also calculates a reverse index, such that reverse[sorted[i]]==i.
|
||||
//
|
||||
//To find a match, it goes to reverse[outpos], and scans sorted[] up and down for the closest entry
|
||||
// that either starts before the current output position, or is somewhere in the source file.
|
||||
//To find a match, it finds the sortpos where sorted[sortpos]==outpos. This is a binary search; it's
|
||||
// called O(n) times, with O(log n) comparisons per iteration. Each comparison is potentially O(n),
|
||||
// but for each matched byte, another iteration is removed from the outer loop, so the comparisons
|
||||
// can be considered O(1) each; the sum is O(n log n).
|
||||
//
|
||||
//After it's found sortpos, it scans sorted[] up and down for the closest entry that either starts
|
||||
// before the current output position, or is somewhere in the source file.
|
||||
//As the source file comes last, the end-of-file marker (whose value is outside the range of a byte)
|
||||
// is guaranteed to not be in the way for a better match.
|
||||
//This is called O(n) times, and averages O(1) as at least 50% of sorted[] is in range. However, it
|
||||
|
|
@ -42,8 +36,8 @@
|
|||
//
|
||||
//It then checks which of the two candidates are superior, by checking how far they match each
|
||||
// other, and then checking if the upper one has another correct byte.
|
||||
//This is potentially O(n), but for each matched byte, another iteration is removed from the outer
|
||||
// loop, so the sum of all calls is O(n).
|
||||
//This is potentially O(n), but like the binary search, long matches reduce the outer loop. The sum
|
||||
// is O(n).
|
||||
//
|
||||
//When the program approaches the end of the sorted area, it re-sorts twice as much as last time.
|
||||
// This gives O(log n) calls to the suffix sorter.
|
||||
|
|
@ -53,15 +47,15 @@
|
|||
//
|
||||
//Many details were omitted from the above, but that's the basic setup.
|
||||
//
|
||||
//Thus, the program is O(max(n log n, n, n) = n log n) average and O(max(n log n, n^2, n) = n^2)
|
||||
// worst case.
|
||||
//Thus, the program is O(max(n log n, n log n, n, n) = n log n) average and O(max(n log n, n log n,
|
||||
// n^2, n) = n^2) worst case.
|
||||
//
|
||||
//I conclude that the task of finding, understanding and implementing a sub-O(n^2) algorithm for
|
||||
// delta patching is resolved.
|
||||
|
||||
|
||||
//Known cases where this function does not emit the optimal encoding:
|
||||
//If a match in the target file would extend further than target_search_size, it is often skipped.
|
||||
//If a match in the target file would extend further than target_search_size, it's cut off.
|
||||
// Penalty: O(log n), with extremely low constants (it'd require a >256B match to be exactly there).
|
||||
// Even for big files, the penalty is very likely to remain zero; even hitting double-digit bytes
|
||||
// would require a file designed exactly for that.
|
||||
|
|
@ -70,20 +64,23 @@
|
|||
//However, due to better heuristics and others' performance optimizations, this one still beats its
|
||||
// competitors.
|
||||
|
||||
//TODO: test multiple same-length matches
|
||||
// but only for lengths <= 64,
|
||||
|
||||
|
||||
//Possible optimizations:
|
||||
//divsufsort() takes approximately 2/3 of the total time. create_reverse_index() takes roughly a third of the remainder.
|
||||
//divsufsort() takes approximately 1/2 of the total time.
|
||||
//Each iteration takes four times as long as the previous one.
|
||||
//If each iteration takes 4 times as long as the previous one, then the last one takes 3/4 of the total time.
|
||||
//Since divsufsort+create_reverse_index doesn't depend on anything else, the last iteration can be split off to its own thread.
|
||||
//Since divsufsort doesn't depend on anything else, the last iteration can be split off to its own thread.
|
||||
//This would split it to
|
||||
//Search, non-final: 2/9 * 1/4 = 2/36
|
||||
//Search, final: 2/9 * 3/4 = 6/36
|
||||
//Sort+rev, non-final: 7/9 * 1/4 = 7/36
|
||||
//Sort+rev, final: 7/9 * 3/4 = 21/36
|
||||
//Search, non-final: 1/2 * 1/4 = 1/8
|
||||
//Search, final: 1/2 * 3/4 = 3/8
|
||||
//Sort+rev, non-final: 1/2 * 1/4 = 1/8
|
||||
//Sort+rev, final: 1/2 * 3/4 = 3/8
|
||||
//All non-final must be done sequentially. Both Sort Final and non-final must be done before Search Final can start.
|
||||
//This means the final time, if Sort Final is split off, is
|
||||
//max(7/36+2/36, 21/36) + 6/36 = 27/36 = 3/4
|
||||
//max(1/8+1/8, 3/8) + 3/8 = 6/8 = 3/4
|
||||
//of the original time.
|
||||
//Due to
|
||||
//- the considerable complexity costs (OpenMP doesn't seem able to represent the "insert a wait in
|
||||
|
|
@ -94,13 +91,13 @@
|
|||
// and that the small ones are not, as that'd starve the big one
|
||||
//I deem a possible 25% boost not worthwhile.
|
||||
|
||||
|
||||
//Both sorting algorithms claim O(1) memory use (in addition to the bytes and the output). In
|
||||
// addition to that, this algorithm uses (source.len*target.len)*(sizeof(uint8_t)+2*sizeof(off_t))
|
||||
// bytes of memory, plus the input and output files, plus the patch.
|
||||
//For most hardware, this is 9*(source.len+target.len), or 5*(source+target) for the slim one.
|
||||
//Both sorting algorithms claim O(1) memory use, in addition to the in/outputs. For most hardware,
|
||||
// this is 5*(source.len+target.len).
|
||||
//If the output is stored to disk, that's all this algorithm needs as well.
|
||||
|
||||
|
||||
namespace patch { namespace bps {
|
||||
//TODO: HEAVY cleanups needed here
|
||||
#include "sais.cpp"
|
||||
template<typename sais_index_type>
|
||||
static void sufsort(sais_index_type* SA, const uint8_t* T, sais_index_type n) {
|
||||
|
|
@ -116,7 +113,7 @@ static void sufsort(sais_index_type* SA, const uint8_t* T, sais_index_type n) {
|
|||
//I'd prefer to let them allocate from an array I give it, but divsuf doesn't allow that, and there
|
||||
// are only half a dozen allocations per call anyways.
|
||||
|
||||
//This ends up in libdivsufsort if available, otherwise lite.
|
||||
//This ends up in libdivsufsort if available, otherwise sais.cpp.
|
||||
#include "divsufsort.h"
|
||||
static void sufsort(int32_t* SA, uint8_t* T, int32_t n)
|
||||
{
|
||||
|
|
@ -140,6 +137,18 @@ template<typename T> static T max(T a, T b) { return a<b ? b : a; }
|
|||
|
||||
|
||||
namespace {
|
||||
//class filecache {
|
||||
// file& f;
|
||||
// uint32_t crc32;
|
||||
// int bytes_used;
|
||||
// uint8_t bytes[65536];
|
||||
//
|
||||
// void append(const uint8_t * data, size_t len)
|
||||
// {
|
||||
//
|
||||
// }
|
||||
//};
|
||||
|
||||
struct bps_creator {
|
||||
uint8_t* out;
|
||||
size_t outlen;
|
||||
|
|
@ -206,7 +215,7 @@ struct bps_creator {
|
|||
|
||||
size_t numtargetread;
|
||||
|
||||
bps_creator(file* source, file* target, struct mem metadata)
|
||||
bps_creator(const file& source, const file& target, struct mem metadata)
|
||||
{
|
||||
outlen = 0;
|
||||
outbuflen = 128;
|
||||
|
|
@ -220,12 +229,10 @@ struct bps_creator {
|
|||
numtargetread = 0;
|
||||
|
||||
append((const uint8_t*)"BPS1", 4);
|
||||
appendnum(source->len);
|
||||
appendnum(target->len);
|
||||
appendnum(source.size());
|
||||
appendnum(target.size());
|
||||
appendnum(metadata.len);
|
||||
append(metadata.ptr, metadata.len);
|
||||
|
||||
setProgress(NULL, NULL);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -342,23 +349,7 @@ struct bps_creator {
|
|||
}
|
||||
|
||||
|
||||
bool (*prog_func)(void* userdata, size_t done, size_t total);
|
||||
void* prog_dat;
|
||||
|
||||
static bool prog_func_null(void* userdata, size_t done, size_t total) { return true; }
|
||||
|
||||
void setProgress(bool (*progress)(void* userdata, size_t done, size_t total), void* userdata)
|
||||
{
|
||||
if (!progress) progress = prog_func_null;
|
||||
|
||||
prog_func=progress;
|
||||
prog_dat=userdata;
|
||||
}
|
||||
|
||||
bool progress(size_t done, size_t total)
|
||||
{
|
||||
return prog_func(prog_dat, done, total);
|
||||
}
|
||||
function<bool(size_t done, size_t total)> progress;
|
||||
|
||||
|
||||
void finish(const uint8_t* source, const uint8_t* target)
|
||||
|
|
@ -369,9 +360,9 @@ struct bps_creator {
|
|||
puts("ERROR: patch creates wrong ROM size"),abort();
|
||||
#endif
|
||||
|
||||
appendnum32(crc32(source, sourcelen));
|
||||
appendnum32(crc32(target, targetlen));
|
||||
appendnum32(crc32(out, outlen));
|
||||
appendnum32(crc32(arrayview<byte>(source, sourcelen)));
|
||||
appendnum32(crc32(arrayview<byte>(target, targetlen)));
|
||||
appendnum32(crc32(arrayview<byte>(out, outlen)));
|
||||
}
|
||||
|
||||
struct mem getpatch()
|
||||
|
|
@ -545,10 +536,8 @@ static void create_buckets(const uint8_t* data, off_t* index, off_t len, off_t*
|
|||
}
|
||||
|
||||
template<typename off_t>
|
||||
static off_t find_index(off_t pos, const uint8_t* data, off_t datalen, const off_t* index, const off_t* reverse, off_t* buckets)
|
||||
static off_t find_index(off_t pos, const uint8_t* data, off_t datalen, const off_t* index, const off_t* buckets)
|
||||
{
|
||||
if (reverse) return reverse[pos];
|
||||
|
||||
//if (datalen<2) return 0;
|
||||
uint16_t bucket = read2(data+pos, datalen-pos);
|
||||
//printf("p=%i b=%i\n",pos,bucket);
|
||||
|
|
@ -654,29 +643,29 @@ off_t lerp(off_t x, off_t y, float frac)
|
|||
}
|
||||
|
||||
template<typename off_t>
|
||||
static bpserror bps_create_suf_core(file* source, file* target, bool moremem, struct bps_creator * out)
|
||||
static result create_suf_core(const file& source, const file& target, struct bps_creator * out)
|
||||
{
|
||||
#define error(which) do { err = which; goto error; } while(0)
|
||||
bpserror err;
|
||||
result err;
|
||||
|
||||
size_t realsourcelen = source->len;
|
||||
size_t realtargetlen = target->len;
|
||||
size_t realsourcelen = source.size();
|
||||
size_t realtargetlen = target.size();
|
||||
|
||||
size_t overflowtest = realsourcelen + realtargetlen;
|
||||
|
||||
//source+target length is bigger than size_t
|
||||
if (overflowtest < realsourcelen) return bps_too_big;
|
||||
if (overflowtest < realsourcelen) return e_too_big;
|
||||
|
||||
//source+target doesn't fit in unsigned off_t
|
||||
if ((size_t)(off_t)overflowtest != overflowtest) return bps_too_big;
|
||||
if ((size_t)(off_t)overflowtest != overflowtest) return e_too_big;
|
||||
|
||||
//source+target doesn't fit in signed off_t
|
||||
if ((off_t)overflowtest < 0) return bps_too_big;
|
||||
if ((off_t)overflowtest < 0) return e_too_big;
|
||||
|
||||
//the mallocs would overflow
|
||||
if (realsourcelen+realtargetlen >= SIZE_MAX/sizeof(off_t)) return bps_too_big;
|
||||
if (realsourcelen+realtargetlen >= SIZE_MAX/sizeof(off_t)) return e_too_big;
|
||||
|
||||
if (realsourcelen+realtargetlen >= out->maxsize()) return bps_too_big;
|
||||
if (realsourcelen+realtargetlen >= out->maxsize()) return e_too_big;
|
||||
|
||||
|
||||
off_t sourcelen = realsourcelen;
|
||||
|
|
@ -686,19 +675,14 @@ static bpserror bps_create_suf_core(file* source, file* target, bool moremem, st
|
|||
|
||||
off_t* sorted = (off_t*)malloc(sizeof(off_t)*(realsourcelen+realtargetlen));
|
||||
|
||||
off_t* sorted_inverse = NULL;
|
||||
if (moremem) sorted_inverse = (off_t*)malloc(sizeof(off_t)*(realsourcelen+realtargetlen));
|
||||
off_t* buckets = (off_t*)malloc(sizeof(off_t)*65537);
|
||||
|
||||
off_t* buckets = NULL;
|
||||
if (!sorted_inverse) buckets = (off_t*)malloc(sizeof(off_t)*65537);
|
||||
|
||||
if (!sorted || !mem_joined || (!sorted_inverse && !buckets))
|
||||
if (!sorted || !mem_joined || !buckets)
|
||||
{
|
||||
free(mem_joined);
|
||||
free(sorted);
|
||||
free(sorted_inverse);
|
||||
free(buckets);
|
||||
return bps_out_of_mem;
|
||||
return e_out_of_mem;
|
||||
}
|
||||
|
||||
//sortedsize is how much of the target file is sorted
|
||||
|
|
@ -720,35 +704,32 @@ static bpserror bps_create_suf_core(file* source, file* target, bool moremem, st
|
|||
reindex:
|
||||
|
||||
//this isn't an exact science
|
||||
const float percSort = sorted_inverse ? 0.67 : 0.50;
|
||||
const float percInv = sorted_inverse ? 0.11 : 0.10;
|
||||
//const float percFind = sorted_inverse ? 0.22 : 0.40; // unused
|
||||
const float percSort = 0.50;
|
||||
const float percBuck = 0.10;
|
||||
//const float percFind = 0.40; // unused
|
||||
|
||||
const size_t progPreSort = lerp(prevsortedsize, sortedsize, 0);
|
||||
const size_t progPreInv = lerp(prevsortedsize, sortedsize, percSort);
|
||||
const size_t progPreFind = lerp(prevsortedsize, sortedsize, percSort+percInv);
|
||||
const size_t progPreBuck = lerp(prevsortedsize, sortedsize, percSort);
|
||||
const size_t progPreFind = lerp(prevsortedsize, sortedsize, percSort+percBuck);
|
||||
|
||||
prevsortedsize = sortedsize;
|
||||
|
||||
if (!out->progress(progPreSort, targetlen)) error(bps_canceled);
|
||||
if (out->progress(progPreSort, targetlen)) error(e_canceled);
|
||||
|
||||
if (target->read(mem_joined, 0, sortedsize) < (size_t)sortedsize) error(bps_io);
|
||||
if (source->read(mem_joined+sortedsize, 0, sourcelen) < (size_t)sourcelen) error(bps_io);
|
||||
if (target.read(arrayvieww<byte>(mem_joined, sortedsize), 0) < (size_t)sortedsize) error(e_io);
|
||||
if (source.read(arrayvieww<byte>(mem_joined+sortedsize, sourcelen), 0) < (size_t)sourcelen) error(e_io);
|
||||
out->move_target(mem_joined);
|
||||
sufsort(sorted, mem_joined, sortedsize+sourcelen);
|
||||
|
||||
if (!out->progress(progPreInv, targetlen)) error(bps_canceled);
|
||||
if (out->progress(progPreBuck, targetlen)) error(e_canceled);
|
||||
|
||||
if (sorted_inverse)
|
||||
create_reverse_index(sorted, sorted_inverse, sortedsize+sourcelen);
|
||||
else
|
||||
create_buckets(mem_joined, sorted, sortedsize+sourcelen, buckets);
|
||||
create_buckets(mem_joined, sorted, sortedsize+sourcelen, buckets);
|
||||
|
||||
if (!out->progress(progPreFind, targetlen)) error(bps_canceled);
|
||||
if (out->progress(progPreFind, targetlen)) error(e_canceled);
|
||||
}
|
||||
|
||||
off_t matchlen = 0;
|
||||
off_t matchpos = adjust_match(find_index(outpos, mem_joined, sortedsize+sourcelen, sorted, sorted_inverse, buckets),
|
||||
off_t matchpos = adjust_match(find_index(outpos, mem_joined, sortedsize+sourcelen, sorted, buckets),
|
||||
mem_joined+outpos, sortedsize-outpos,
|
||||
mem_joined,sortedsize+sourcelen, outpos,sortedsize,
|
||||
sorted, sortedsize+sourcelen,
|
||||
|
|
@ -771,11 +752,10 @@ static bpserror bps_create_suf_core(file* source, file* target, bool moremem, st
|
|||
|
||||
out->finish(mem_joined+sortedsize, mem_joined);
|
||||
|
||||
err = bps_ok;
|
||||
err = e_ok;
|
||||
|
||||
error:
|
||||
free(buckets);
|
||||
free(sorted_inverse);
|
||||
free(sorted);
|
||||
free(mem_joined);
|
||||
|
||||
|
|
@ -783,38 +763,41 @@ error:
|
|||
}
|
||||
|
||||
|
||||
template<typename T> static bpserror bps_create_suf_pick(file* source, file* target, bool moremem, struct bps_creator * bps);
|
||||
template<> bpserror bps_create_suf_pick<uint32_t>(file* source, file* target, bool moremem, struct bps_creator * bps)
|
||||
template<typename T> static result create_suf_pick(const file& source, const file& target, struct bps_creator * bps);
|
||||
template<> result create_suf_pick<uint32_t>(const file& source, const file& target, struct bps_creator * bps)
|
||||
{
|
||||
return bps_create_suf_core<int32_t>(source, target, moremem, bps);
|
||||
return create_suf_core<int32_t>(source, target, bps);
|
||||
}
|
||||
template<> bpserror bps_create_suf_pick<uint64_t>(file* source, file* target, bool moremem, struct bps_creator * bps)
|
||||
template<> result create_suf_pick<uint64_t>(const file& source, const file& target, struct bps_creator * bps)
|
||||
{
|
||||
bpserror err = bps_create_suf_core<int32_t>(source, target, moremem, bps);
|
||||
if (err==bps_too_big) err = bps_create_suf_core<int64_t>(source, target, moremem, bps);
|
||||
result err = create_suf_core<int32_t>(source, target, bps);
|
||||
if (err==e_too_big) err = create_suf_core<int64_t>(source, target, bps);
|
||||
return err;
|
||||
}
|
||||
|
||||
//This one picks a function based on 32-bit integers if that fits. This halves memory use for common inputs.
|
||||
//It also handles some stuff related to the BPS headers and footers.
|
||||
extern "C"
|
||||
bpserror bps_create_delta(file* source, file* target, struct mem metadata, struct mem * patchmem,
|
||||
bool (*progress)(void* userdata, size_t done, size_t total), void* userdata, bool moremem)
|
||||
result create(const file& source, const file& target, const file& metadata, file& patch,
|
||||
function<bool(size_t done, size_t total)> progress)
|
||||
{
|
||||
bps_creator bps(source, target, metadata);
|
||||
bps.setProgress(progress, userdata);
|
||||
mem metamem = metadata.mmap();
|
||||
bps_creator bps(source, target, metamem);
|
||||
metadata.unmap(metamem.v());
|
||||
bps.progress = progress;
|
||||
|
||||
size_t maindata = bps.outlen;
|
||||
|
||||
//off_t must be signed
|
||||
bpserror err = bps_create_suf_pick<size_t>(source, target, moremem, &bps);
|
||||
if (err!=bps_ok) return err;
|
||||
result err = create_suf_pick<size_t>(source, target, &bps);
|
||||
if (err!=e_ok) return err;
|
||||
|
||||
*patchmem = bps.getpatch();
|
||||
mem patchmem = bps.getpatch();
|
||||
patch.write(patchmem.v());
|
||||
free(patchmem.ptr);
|
||||
|
||||
while ((patchmem->ptr[maindata]&0x80) == 0x00) maindata++;
|
||||
if (maindata==patchmem->len-12-1) return bps_identical;
|
||||
return bps_ok;
|
||||
while ((patchmem.ptr[maindata]&0x80) == 0x00) maindata++;
|
||||
if (maindata==patchmem.len-12-1) return e_identical;
|
||||
return e_ok;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -876,3 +859,4 @@ printf("%i/%i=%f\n",match_len_tot,match_len_n,(float)match_len_tot/match_len_n);
|
|||
#endif
|
||||
}
|
||||
#endif
|
||||
}}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
#include "patch.h"
|
||||
|
||||
namespace patch { namespace bps {
|
||||
//TODO: HEAVY cleanups needed here
|
||||
static uint32_t read32(uint8_t * ptr)
|
||||
{
|
||||
uint32_t out;
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ struct info {
|
|||
//Deprecated
|
||||
struct mem {
|
||||
mem() : ptr(NULL), len(0) {}
|
||||
mem(uint8_t* ptr, size_t len) : ptr(ptr), len(len) {}
|
||||
mem(arrayview<byte> v) : ptr((byte*)v.ptr()), len(v.size()) {}
|
||||
arrayvieww<byte> v() { return arrayvieww<byte>(ptr, len); }
|
||||
uint8_t * ptr;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user