pokeplatinum/tools/csv2bin/Manifest.cpp
2023-07-27 20:22:38 +02:00

266 lines
7.6 KiB
C++

#include "Manifest.h"
std::map<std::pair<fs::path, std::string>, std::map<std::string, int>> HeaderCache {
{{"bool", ""}, {
{"false", 0},
{"true", 1}
}}
};
void ColumnSpec::translate_width(std::string &width, int &bytes, int &bits) {
int dotpos = width.find('.');
bytes = std::stoi(width.substr(0, dotpos));
if (dotpos == std::string::npos) {
bits = 0;
} else {
bits = std::stoi(width.substr(dotpos + 1));
}
}
void ColumnSpec::_init(int _width, const fs::path &headerfile, const std::string &prefix, int _nbits) {
width = _width;
nbits = _nbits;
if (!headerfile.empty()) {
try {
constants = HeaderCache.at({headerfile, prefix});
} catch (const std::out_of_range &e) {
std::ifstream handle(headerfile);
std::regex pattern("#define +(" + prefix + "\\w+) +(\\d+)$");
std::string line;
std::smatch results;
while (std::getline(handle, line)) {
if (std::regex_match(line, results, pattern)) {
constants[results[1]] = std::stoi(results[2]);
}
}
HeaderCache[{headerfile, prefix}] = constants;
}
}
}
ColumnSpec::ColumnSpec(int _width, const fs::path& headerfile, const std::string &prefix, int _nbits) {
_init(_width, headerfile, prefix, _nbits);
}
ColumnSpec::ColumnSpec(std::string &_width, const fs::path &headerfile, const std::string &prefix) {
int sign;
int nbit;
int nbytes;
if (_width == "skip") {
_init(skip, headerfile, prefix);
return;
}
if (_width.substr(0, 3) == "pad") {
std::string __width = _width.substr(3);
translate_width(__width, nbytes, nbit);
_init(pad | nbytes, headerfile, prefix, nbit);
return;
}
switch (_width[0]) {
case 's':
sign = -1;
break;
case 'u':
sign = 1;
break;
default:
throw std::invalid_argument("width param must be a valid fixed-width type spec");
}
std::string __width = _width.substr(1);
translate_width(__width, nbytes, nbit);
switch (nbytes) {
case 8:
case 16:
case 32:
case 64:
sign *= nbytes / 8;
break;
default:
throw std::invalid_argument("width param must be a valid fixed-width type spec");
}
_init(sign, headerfile, prefix, nbit);
}
Manifest::Manifest(const fs::path &filename, std::vector<fs::path> &header_dirs) {
read(filename, header_dirs);
}
void Manifest::read(const fs::path &filename, std::vector<fs::path> &header_dirs) {
std::ifstream strm(filename);
std::string line;
while (std::getline(strm, line)) {
line = line.substr(0, line.find_last_not_of(" \t\r\n") + 1);
fs::path headerfile;
std::string prefix;
// Tokenize with colons
std::vector<std::string> tokens;
size_t s = 0;
size_t e;
do {
e = line.find(':', s);
tokens.emplace_back(line.substr(s, e - s));
s = e + 1;
} while (e != std::string::npos);
std::string name = tokens.at(0);
try {
mapping.at(name);
throw std::invalid_argument("duplicate column in manifest: " + name);
} catch (const std::out_of_range &e) {
// discard silently, we gucci
}
std::string width = tokens.at(1);
if (tokens.size() > 2) {
if (tokens.size() > 3) {
prefix = tokens[3];
}
fs::path header_name = tokens[2];
if (header_name == "bool") {
headerfile = "bool";
} else {
for (auto &root: header_dirs) {
if (fs::exists(root / header_name)) {
headerfile = root / header_name;
break;
}
}
}
}
mapping[name] = ColumnSpec(width, headerfile, prefix);
colnames.emplace_back(name);
}
}
ColumnSpec &Manifest::operator[](const std::string &name) {
return mapping[name];
}
size_t Manifest::size(const int alignment) const {
size_t ret = 0;
size_t bitpos = 0;
for (const auto & name : colnames) {
auto &spec = mapping.at(name);
if (spec.is_skipped()) {
continue;
}
size_t bytect = spec.size();
size_t aln = spec.get_alignment();
size_t bitct = spec.num_bits();
if (bitpos != 0 && bitct == 0) {
bitpos = 0;
ret++;
}
if (bitpos == 0 && aln != 1) {
ret += aln - 1;
ret &= ~(aln - 1);
}
if (bitct != 0) {
bitpos += bitct;
if (bitpos >= 8 * bytect) {
ret += bytect;
bitpos -= 8 * bytect;
}
} else {
ret += bytect;
}
}
// Word align
if (alignment != 0) {
ret += alignment - 1;
ret &= ~(alignment - 1);
}
return ret;
}
BufferedRowConverter::BufferedRowConverter(Manifest &_manifest, CsvFile &_csvFile, unsigned char _padval):
manifest(_manifest),
csvFile(_csvFile),
padval(_padval)
{
buffer.resize(manifest.size());
carriage_return();
byte_cursor = 0;
bit_cursor = 0;
row_cursor = 0;
}
std::ifstream &operator>>(std::ifstream &strm, BufferedRowConverter &cvtr) {
std::ios::iostate state = strm.rdstate();
size_t pos = strm.tellg();
strm.read((char *)cvtr.buffer.data(), cvtr.buffer.size());
cvtr.to_strings();
cvtr++;
return strm;
}
std::ofstream &operator<<(std::ofstream &strm, BufferedRowConverter &cvtr) {
cvtr.to_bytes();
strm.write((char *)cvtr.buffer.data(), cvtr.buffer.size());
cvtr++;
return strm;
}
void BufferedRowConverter::to_strings() {
if (row_cursor >= csvFile.nrow()) {
throw std::out_of_range("invalid row idx");
}
std::vector<std::string> &row = csvFile[row_cursor];
size_t column_i = 0;
for (const auto colname : manifest.colnames) {
const ColumnSpec &spec = manifest[colname];
if (spec.is_skipped()) {
row.at(column_i++) = spec[row_cursor];
continue;
} else {
align(spec.size(), spec.num_bits());
unsigned long long val = get(spec.type(), spec.num_bits());
if (spec.is_padding()) {
if (val != 0) {
std::cerr << "csv2bin warning: nonzero data in padding field may result in data loss" << std::endl;
}
} else {
row.at(column_i++) = spec[val];
}
advance(spec.size(), spec.num_bits());
}
}
}
void BufferedRowConverter::to_bytes() {
if (row_cursor >= csvFile.nrow()) {
throw std::out_of_range("invalid row idx");
}
std::vector<std::string> &row = csvFile[row_cursor];
size_t column_i = 0;
for (const auto colname : manifest.colnames) {
const ColumnSpec &spec = manifest[colname];
if (spec.is_skipped()) {
column_i++;
continue;
} else {
align(spec.get_alignment(), spec.num_bits());
unsigned long long val;
if (spec.is_padding()) {
val = 0;
} else {
val = spec[row.at(column_i++)];
}
set(val, spec.type(), spec.num_bits());
advance(spec.size(), spec.num_bits());
}
}
if (bit_cursor != 0) {
const ColumnSpec &spec = manifest[*manifest.colnames.crbegin()];
byte_cursor += spec.size();
}
while (byte_cursor < buffer.size()) {
buffer[byte_cursor++] = padval;
}
}