#include "Manifest.h" std::map, std::map> HeaderCache { {{"bool", ""}, { {"false", 0}, {"true", 1} }} }; void ColumnSpec::translate_width(std::string &width, int &bytes, int &bits) { int dotpos = width.find('.'); bytes = std::stoi(width.substr(0, dotpos)); if (dotpos == std::string::npos) { bits = 0; } else { bits = std::stoi(width.substr(dotpos + 1)); } } void ColumnSpec::_init(int _width, const fs::path &headerfile, const std::string &prefix, int _nbits) { width = _width; nbits = _nbits; if (!headerfile.empty()) { try { constants = HeaderCache.at({headerfile, prefix}); } catch (const std::out_of_range &e) { std::ifstream handle(headerfile); std::regex pattern("#define +(" + prefix + "\\w+) +(\\d+)$"); std::string line; std::smatch results; while (std::getline(handle, line)) { if (std::regex_match(line, results, pattern)) { constants[results[1]] = std::stoi(results[2]); } } HeaderCache[{headerfile, prefix}] = constants; } } } ColumnSpec::ColumnSpec(int _width, const fs::path& headerfile, const std::string &prefix, int _nbits) { _init(_width, headerfile, prefix, _nbits); } ColumnSpec::ColumnSpec(std::string &_width, const fs::path &headerfile, const std::string &prefix) { int sign; int nbit; int nbytes; if (_width == "skip") { _init(skip, headerfile, prefix); return; } if (_width.substr(0, 3) == "pad") { std::string __width = _width.substr(3); translate_width(__width, nbytes, nbit); _init(pad | nbytes, headerfile, prefix, nbit); return; } switch (_width[0]) { case 's': sign = -1; break; case 'u': sign = 1; break; default: throw std::invalid_argument("width param must be a valid fixed-width type spec"); } std::string __width = _width.substr(1); translate_width(__width, nbytes, nbit); switch (nbytes) { case 8: case 16: case 32: case 64: sign *= nbytes / 8; break; default: throw std::invalid_argument("width param must be a valid fixed-width type spec"); } _init(sign, headerfile, prefix, nbit); } Manifest::Manifest(const fs::path &filename, std::vector &header_dirs) { read(filename, header_dirs); } void Manifest::read(const fs::path &filename, std::vector &header_dirs) { std::ifstream strm(filename); std::string line; while (std::getline(strm, line)) { line = line.substr(0, line.find_last_not_of(" \t\r\n") + 1); fs::path headerfile; std::string prefix; // Tokenize with colons std::vector tokens; size_t s = 0; size_t e; do { e = line.find(':', s); tokens.emplace_back(line.substr(s, e - s)); s = e + 1; } while (e != std::string::npos); std::string name = tokens.at(0); try { mapping.at(name); throw std::invalid_argument("duplicate column in manifest: " + name); } catch (const std::out_of_range &e) { // discard silently, we gucci } std::string width = tokens.at(1); if (tokens.size() > 2) { if (tokens.size() > 3) { prefix = tokens[3]; } fs::path header_name = tokens[2]; if (header_name == "bool") { headerfile = "bool"; } else { for (auto &root: header_dirs) { if (fs::exists(root / header_name)) { headerfile = root / header_name; break; } } } } mapping[name] = ColumnSpec(width, headerfile, prefix); colnames.emplace_back(name); } } ColumnSpec &Manifest::operator[](const std::string &name) { return mapping[name]; } size_t Manifest::size(const int alignment) const { size_t ret = 0; size_t bitpos = 0; for (const auto & name : colnames) { auto &spec = mapping.at(name); if (spec.is_skipped()) { continue; } size_t bytect = spec.size(); size_t aln = spec.get_alignment(); size_t bitct = spec.num_bits(); if (bitpos != 0 && bitct == 0) { bitpos = 0; ret++; } if (bitpos == 0 && aln != 1) { ret += aln - 1; ret &= ~(aln - 1); } if (bitct != 0) { bitpos += bitct; if (bitpos >= 8 * bytect) { ret += bytect; bitpos -= 8 * bytect; } } else { ret += bytect; } } // Word align if (alignment != 0) { ret += alignment - 1; ret &= ~(alignment - 1); } return ret; } BufferedRowConverter::BufferedRowConverter(Manifest &_manifest, CsvFile &_csvFile, unsigned char _padval): manifest(_manifest), csvFile(_csvFile), padval(_padval) { buffer.resize(manifest.size()); carriage_return(); byte_cursor = 0; bit_cursor = 0; row_cursor = 0; } std::ifstream &operator>>(std::ifstream &strm, BufferedRowConverter &cvtr) { std::ios::iostate state = strm.rdstate(); size_t pos = strm.tellg(); strm.read((char *)cvtr.buffer.data(), cvtr.buffer.size()); cvtr.to_strings(); cvtr++; return strm; } std::ofstream &operator<<(std::ofstream &strm, BufferedRowConverter &cvtr) { cvtr.to_bytes(); strm.write((char *)cvtr.buffer.data(), cvtr.buffer.size()); cvtr++; return strm; } void BufferedRowConverter::to_strings() { if (row_cursor >= csvFile.nrow()) { throw std::out_of_range("invalid row idx"); } std::vector &row = csvFile[row_cursor]; size_t column_i = 0; for (const auto colname : manifest.colnames) { const ColumnSpec &spec = manifest[colname]; if (spec.is_skipped()) { row.at(column_i++) = spec[row_cursor]; continue; } else { align(spec.size(), spec.num_bits()); unsigned long long val = get(spec.type(), spec.num_bits()); if (spec.is_padding()) { if (val != 0) { std::cerr << "csv2bin warning: nonzero data in padding field may result in data loss" << std::endl; } } else { row.at(column_i++) = spec[val]; } advance(spec.size(), spec.num_bits()); } } } void BufferedRowConverter::to_bytes() { if (row_cursor >= csvFile.nrow()) { throw std::out_of_range("invalid row idx"); } std::vector &row = csvFile[row_cursor]; size_t column_i = 0; for (const auto colname : manifest.colnames) { const ColumnSpec &spec = manifest[colname]; if (spec.is_skipped()) { column_i++; continue; } else { align(spec.get_alignment(), spec.num_bits()); unsigned long long val; if (spec.is_padding()) { val = 0; } else { val = spec[row.at(column_i++)]; } set(val, spec.type(), spec.num_bits()); advance(spec.size(), spec.num_bits()); } } if (bit_cursor != 0) { const ColumnSpec &spec = manifest[*manifest.colnames.crbegin()]; byte_cursor += spec.size(); } while (byte_cursor < buffer.size()) { buffer[byte_cursor++] = padval; } }