diff --git a/.gitignore b/.gitignore index b92c391471..26b6ef123b 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ /subprojects/meson-*/ /subprojects/nitrorom/ /subprojects/metroskrew/ +/subprojects/yyjson-*/ /subprojects/.wraplock # Legacy mwrap compiler diff --git a/subprojects/packagefiles/yyjson_patch/always-native.patch b/subprojects/packagefiles/yyjson_patch/always-native.patch new file mode 100644 index 0000000000..9d85d856cd --- /dev/null +++ b/subprojects/packagefiles/yyjson_patch/always-native.patch @@ -0,0 +1,12 @@ +diff --git a/meson.build b/meson.build +index c16d925..e67cbaf 100644 +--- a/meson.build ++++ b/meson.build +@@ -32,6 +32,7 @@ + c_args: [c_args], + include_directories: yyjson_inc, + version: meson.project_version(), ++ native: true, + ) + + yyjson_dep = declare_dependency( diff --git a/subprojects/packagefiles/yyjson_patch/arm-f64_pow10_table-unused.patch b/subprojects/packagefiles/yyjson_patch/arm-f64_pow10_table-unused.patch new file mode 100644 index 0000000000..39e72949da --- /dev/null +++ b/subprojects/packagefiles/yyjson_patch/arm-f64_pow10_table-unused.patch @@ -0,0 +1,21 @@ +diff --git a/src/yyjson.c b/src/yyjson.c +index 837997c..5bd56e6 100644 +--- a/src/yyjson.c 2026-03-16 07:30:08 ++++ b/src/yyjson.c 2026-03-16 07:30:18 +@@ -1189,12 +1189,16 @@ + + /** Maximum pow10 exponent that can be represented exactly as a float64. */ + #define F64_POW10_MAX_EXACT_EXP 22 ++ ++#if YYJSON_DOUBLE_MATH_CORRECT + + /** Cached pow10 table. */ + static const f64 f64_pow10_table[F64_POW10_MAX_EXACT_EXP + 1] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, + 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 + }; ++ ++#endif + + /** Maximum pow10 exponent that can be represented exactly as a uint64. */ + #define U64_POW10_MAX_EXACT_EXP 19 diff --git a/subprojects/packagefiles/yyjson_patch/feat-value-spans.patch b/subprojects/packagefiles/yyjson_patch/feat-value-spans.patch new file mode 100644 index 0000000000..70e6d4685a --- /dev/null +++ b/subprojects/packagefiles/yyjson_patch/feat-value-spans.patch @@ -0,0 +1,1029 @@ +diff --git a/src/yyjson.c b/src/yyjson.c +index c16d925..e67cbaf 100644 +--- a/src/yyjson.c ++++ b/src/yyjson.c +@@ -18,6 +18,9 @@ + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. ++ ++ Additional changes to add per-value byte-spans made by ++ Copyright (c) 2025 + *============================================================================*/ + + #include "yyjson.h" +@@ -3101,10 +3104,12 @@ static_inline bool has_rflag(yyjson_read_flag flg, yyjson_read_flag chk, + *============================================================================*/ + + /** Read `true` literal, `*ptr[0]` should be `t`. */ +-static_inline bool read_true(u8 **ptr, yyjson_val *val) { ++static_inline bool read_true(u8 **ptr, yyjson_val *val, u8 *beg) { + u8 *cur = *ptr; + if (likely(byte_match_4(cur, "true"))) { + val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE; ++ val->beg = cur - beg; ++ val->end = val->beg + 3; + *ptr = cur + 4; + return true; + } +@@ -3112,10 +3117,12 @@ static_inline bool read_true(u8 **ptr, yyjson_val *val) { + } + + /** Read `false` literal, `*ptr[0]` should be `f`. */ +-static_inline bool read_false(u8 **ptr, yyjson_val *val) { ++static_inline bool read_false(u8 **ptr, yyjson_val *val, u8 *beg) { + u8 *cur = *ptr; + if (likely(byte_match_4(cur + 1, "alse"))) { + val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE; ++ val->beg = cur - beg; ++ val->end = val->beg + 4; + *ptr = cur + 5; + return true; + } +@@ -3123,10 +3130,12 @@ static_inline bool read_false(u8 **ptr, yyjson_val *val) { + } + + /** Read `null` literal, `*ptr[0]` should be `n`. */ +-static_inline bool read_null(u8 **ptr, yyjson_val *val) { ++static_inline bool read_null(u8 **ptr, yyjson_val *val, u8 *beg) { + u8 *cur = *ptr; + if (likely(byte_match_4(cur, "null"))) { + val->tag = YYJSON_TYPE_NULL; ++ val->beg = cur - beg; ++ val->end = val->beg + 4; + *ptr = cur + 4; + return true; + } +@@ -3134,7 +3143,7 @@ static_inline bool read_null(u8 **ptr, yyjson_val *val) { + } + + /** Read `Inf` or `Infinity` literal (ignoring case). */ +-static_inline bool read_inf(u8 **ptr, u8 **pre, ++static_inline bool read_inf(u8 **ptr, u8 **pre, u8 *beg, + yyjson_read_flag flg, yyjson_val *val) { + u8 *hdr = *ptr; + u8 *cur = *ptr; +@@ -3163,9 +3172,13 @@ static_inline bool read_inf(u8 **ptr, u8 **pre, + *pre = cur; /* save end position for current raw string */ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; + val->uni.str = (const char *)hdr; ++ val->beg = hdr - beg; ++ val->end = cur - beg - 1; + } else { + val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; + val->uni.u64 = f64_bits_inf(sign); ++ val->beg = hdr - beg; ++ val->end = cur - beg - 1; + } + return true; + } +@@ -3173,7 +3186,7 @@ static_inline bool read_inf(u8 **ptr, u8 **pre, + } + + /** Read `NaN` literal (ignoring case). */ +-static_inline bool read_nan(u8 **ptr, u8 **pre, ++static_inline bool read_nan(u8 **ptr, u8 **pre, u8 *beg, + yyjson_read_flag flg, yyjson_val *val) { + u8 *hdr = *ptr; + u8 *cur = *ptr; +@@ -3191,9 +3204,13 @@ static_inline bool read_nan(u8 **ptr, u8 **pre, + *pre = cur; /* save end position for current raw string */ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; + val->uni.str = (const char *)hdr; ++ val->beg = hdr - beg; ++ val->end = cur - beg - 1; + } else { + val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; + val->uni.u64 = f64_bits_nan(sign); ++ val->beg = hdr - beg; ++ val->end = cur - beg - 1; + } + return true; + } +@@ -3201,15 +3218,15 @@ static_inline bool read_nan(u8 **ptr, u8 **pre, + } + + /** Read `Inf`, `Infinity` or `NaN` literal (ignoring case). */ +-static_inline bool read_inf_or_nan(u8 **ptr, u8 **pre, ++static_inline bool read_inf_or_nan(u8 **ptr, u8 **pre, u8 *beg, + yyjson_read_flag flg, yyjson_val *val) { +- if (read_inf(ptr, pre, flg, val)) return true; +- if (read_nan(ptr, pre, flg, val)) return true; ++ if (read_inf(ptr, pre, beg, flg, val)) return true; ++ if (read_nan(ptr, pre, beg, flg, val)) return true; + return false; + } + + /** Read a JSON number as raw string. */ +-static_noinline bool read_num_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg, ++static_noinline bool read_num_raw(u8 **ptr, u8 **pre, u8 *beg, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { + #define return_err(_pos, _msg) do { \ + *msg = _msg; *end = _pos; return false; \ +@@ -3218,6 +3235,8 @@ static_noinline bool read_num_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg, + #define return_raw() do { \ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ + val->uni.str = (const char *)hdr; \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + **pre = '\0'; *pre = cur; *end = cur; return true; \ + } while (false) + +@@ -3240,7 +3259,7 @@ static_noinline bool read_num_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg, + } + } + if (has_allow(INF_AND_NAN)) { +- if (read_inf_or_nan(ptr, pre, flg, val)) return true; ++ if (read_inf_or_nan(ptr, pre, beg, flg, val)) return true; + } + return_err(cur, "no digit after sign"); + } +@@ -3293,7 +3312,7 @@ read_double: + } + + /** Read a hex number. */ +-static_noinline bool read_num_hex(u8 **ptr, u8 **pre, yyjson_read_flag flg, ++static_noinline bool read_num_hex(u8 **ptr, u8 **pre, u8 *beg, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { + u8 *hdr = *ptr; + u8 *cur = *ptr; +@@ -3331,6 +3350,8 @@ static_noinline bool read_num_hex(u8 **ptr, u8 **pre, yyjson_read_flag flg, + **pre = '\0'; + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; + val->uni.str = (const char *)hdr; ++ val->beg = hdr - beg; ++ val->end = cur - beg - 1; + *pre = cur; *end = cur; + return true; + } +@@ -3338,6 +3359,8 @@ static_noinline bool read_num_hex(u8 **ptr, u8 **pre, yyjson_read_flag flg, + + val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); + val->uni.u64 = (u64)(sign ? (u64)(~(sig) + 1) : (u64)(sig)); ++ val->beg = hdr - beg; ++ val->end = cur + i - beg - 1; + *end = cur + i; + return true; + } +@@ -3813,7 +3836,7 @@ static_inline u64 diy_fp_to_ieee_raw(diy_fp fp) { + number is infinite, the return value is based on flag. + 3. This function (with inline attribute) may generate a lot of instructions. + */ +-static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, ++static_inline bool read_num(u8 **ptr, u8 **pre, u8 *beg, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { + #define return_err(_pos, _msg) do { \ + *msg = _msg; \ +@@ -3824,24 +3847,32 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + #define return_0() do { \ + val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3); \ + val->uni.u64 = 0; \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + + #define return_i64(_v) do { \ + val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3); \ + val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + + #define return_f64(_v) do { \ + val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ + val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v); \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + + #define return_f64_bin(_v) do { \ + val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ + val->uni.u64 = ((u64)sign << 63) | (u64)(_v); \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + +@@ -3855,6 +3886,8 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + **pre = '\0'; /* add null-terminator for previous raw string */ \ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ + val->uni.str = (const char *)hdr; \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *pre = cur; *end = cur; return true; \ + } while (false) + +@@ -3878,7 +3911,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + + /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */ + if (has_flg(NUMBER_AS_RAW)) { +- return read_num_raw(ptr, pre, flg, val, msg); ++ return read_num_raw(ptr, pre, beg, flg, val, msg); + } + + sign = (*hdr == '-'); +@@ -3897,14 +3930,14 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + } + } + if (has_allow(INF_AND_NAN)) { +- if (read_inf_or_nan(ptr, pre, flg, val)) return true; ++ if (read_inf_or_nan(ptr, pre, beg, flg, val)) return true; + } + return_err(cur, "no digit after sign"); + } + /* begin with 0 */ + if (likely(!char_is_digit_or_fp(*++cur))) { + if (has_allow(EXT_NUMBER) && char_to_lower(*cur) == 'x') { /* hex */ +- return read_num_hex(ptr, pre, flg, val, msg); ++ return read_num_hex(ptr, pre, beg, flg, val, msg); + } + return_0(); + } +@@ -4447,7 +4480,7 @@ digi_finish: + This is a fallback function if the custom number reader is disabled. + This function use libc's strtod() to read floating-point number. + */ +-static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, ++static_inline bool read_num(u8 **ptr, u8 **pre, u8 *beg, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { + #define return_err(_pos, _msg) do { \ + *msg = _msg; \ +@@ -4458,24 +4491,32 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + #define return_0() do { \ + val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); \ + val->uni.u64 = 0; \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + + #define return_i64(_v) do { \ + val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); \ + val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + + #define return_f64(_v) do { \ + val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ + val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v); \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + + #define return_f64_bin(_v) do { \ + val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ + val->uni.u64 = ((u64)sign << 63) | (u64)(_v); \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + *end = cur; return true; \ + } while (false) + +@@ -4488,6 +4529,8 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + #define return_raw() do { \ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ + val->uni.str = (const char *)hdr; \ ++ val->beg = hdr - beg; \ ++ val->end = cur - beg - 1; \ + **pre = '\0'; *pre = cur; *end = cur; return true; \ + } while (false) + +@@ -4501,7 +4544,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + + /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */ + if (has_flg(NUMBER_AS_RAW)) { +- return read_num_raw(ptr, pre, flg, val, msg); ++ return read_num_raw(ptr, pre, beg, flg, val, msg); + } + + sign = (*hdr == '-'); +@@ -4521,7 +4564,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + } + } + if (has_allow(INF_AND_NAN)) { +- if (read_inf_or_nan(ptr, pre, flg, val)) return true; ++ if (read_inf_or_nan(ptr, pre, beg, flg, val)) return true; + } + return_err(cur, "no digit after sign"); + } +@@ -4533,7 +4576,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, + if (!char_is_fp(*cur)) { + if (has_allow(EXT_NUMBER) && + (*cur == 'x' || *cur == 'X')) { /* hex integer */ +- return read_num_hex(ptr, pre, flg, val, msg); ++ return read_num_hex(ptr, pre, beg, flg, val, msg); + } + return_0(); + } +@@ -4721,8 +4764,9 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr, const char **msg) { + @param con Continuation for incremental parsing. + @return Whether success. + */ +-static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg, +- yyjson_val *val, const char **msg, u8 *con[2]) { ++static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *beg, u8 *eof, ++ yyjson_read_flag flg, yyjson_val *val, ++ const char **msg, u8 *con[2]) { + /* + GCC may sometimes load variables into registers too early, causing + unnecessary instructions and performance degradation. This inline assembly +@@ -4808,6 +4852,8 @@ skip_ascii_end: + val->tag = ((u64)(src - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR | + (quo == '"' ? YYJSON_SUBTYPE_NOESC : 0); + val->uni.str = (const char *)hdr; ++ val->beg = hdr - beg - 1; ++ val->end = src - beg + 1; + *src = '\0'; + *end = src + 1; + if (con) con[0] = con[1] = NULL; +@@ -4930,6 +4976,8 @@ copy_escape: + } else if (likely(*src == quo)) { + val->tag = ((u64)(dst - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; + val->uni.str = (const char *)hdr; ++ val->beg = hdr - beg - 1; ++ val->end = src - beg + 1; + *dst = '\0'; + *end = src + 1; + if (con) con[0] = con[1] = NULL; +@@ -5039,23 +5087,25 @@ copy_utf8: + #undef return_err + } + +-static_inline bool read_str(u8 **ptr, u8 *eof, yyjson_read_flag flg, ++static_inline bool read_str(u8 **ptr, u8 *beg, u8 *eof, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { +- return read_str_opt('\"', ptr, eof, flg, val, msg, NULL); ++ return read_str_opt('\"', ptr, beg, eof, flg, val, msg, NULL); + } + +-static_inline bool read_str_con(u8 **ptr, u8 *eof, yyjson_read_flag flg, +- yyjson_val *val, const char **msg, u8 **con) { +- return read_str_opt('\"', ptr, eof, flg, val, msg, con); ++static_inline bool read_str_con(u8 **ptr, u8 *beg, u8 *eof, ++ yyjson_read_flag flg, yyjson_val *val, ++ const char **msg, u8 **con) { ++ return read_str_opt('\"', ptr, beg, eof, flg, val, msg, con); + } + +-static_noinline bool read_str_sq(u8 **ptr, u8 *eof, yyjson_read_flag flg, +- yyjson_val *val, const char **msg) { +- return read_str_opt('\'', ptr, eof, flg, val, msg, NULL); ++static_noinline bool read_str_sq(u8 **ptr, u8 *beg, u8 *eof, ++ yyjson_read_flag flg, yyjson_val *val, ++ const char **msg) { ++ return read_str_opt('\'', ptr, beg, eof, flg, val, msg, NULL); + } + + /** Read unquoted key (identifier name). */ +-static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg, ++static_noinline bool read_str_id(u8 **ptr, u8 *beg, u8 *eof, yyjson_read_flag flg, + u8 **pre, yyjson_val *val, const char **msg) { + #define return_err(_end, _msg) do { \ + *msg = _msg; \ +@@ -5067,6 +5117,8 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg, + val->tag = ((u64)(_str_end - hdr) << YYJSON_TAG_BIT) | \ + (u64)(YYJSON_TYPE_STR); \ + val->uni.str = (const char *)hdr; \ ++ val->beg = hdr - beg; \ ++ val->end = _str_end - beg - 1; \ + *pre = _str_end; *end = _cur_end; \ + return true; \ + } while (false) +@@ -5245,33 +5297,33 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *eof, + val = val_hdr + hdr_len; + + if (char_is_num(*cur)) { +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto doc_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto doc_end; + goto fail_number; + } + if (*cur == '"') { +- if (likely(read_str(&cur, eof, flg, val, &msg))) goto doc_end; ++ if (likely(read_str(&cur, hdr, eof, flg, val, &msg))) goto doc_end; + goto fail_string; + } + if (*cur == 't') { +- if (likely(read_true(&cur, val))) goto doc_end; ++ if (likely(read_true(&cur, val, hdr))) goto doc_end; + goto fail_literal_true; + } + if (*cur == 'f') { +- if (likely(read_false(&cur, val))) goto doc_end; ++ if (likely(read_false(&cur, val, hdr))) goto doc_end; + goto fail_literal_false; + } + if (*cur == 'n') { +- if (likely(read_null(&cur, val))) goto doc_end; ++ if (likely(read_null(&cur, val, hdr))) goto doc_end; + if (has_allow(INF_AND_NAN)) { +- if (read_nan(&cur, pre, flg, val)) goto doc_end; ++ if (read_nan(&cur, pre, hdr, flg, val)) goto doc_end; + } + goto fail_literal_null; + } + if (has_allow(INF_AND_NAN)) { +- if (read_inf_or_nan(&cur, pre, flg, val)) goto doc_end; ++ if (read_inf_or_nan(&cur, pre, hdr, flg, val)) goto doc_end; + } + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { +- if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto doc_end; ++ if (likely(read_str_sq(&cur, hdr, eof, flg, val, &msg))) goto doc_end; + goto fail_string; + } + goto fail_character; +@@ -5383,10 +5435,12 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *eof, + if (*cur++ == '{') { + ctn->tag = YYJSON_TYPE_OBJ; + ctn->uni.ofs = 0; ++ ctn->beg = cur - hdr; + goto obj_key_begin; + } else { + ctn->tag = YYJSON_TYPE_ARR; + ctn->uni.ofs = 0; ++ ctn->beg = cur - hdr; + goto arr_val_begin; + } + +@@ -5399,6 +5453,7 @@ arr_begin: + val_incr(); + val->tag = YYJSON_TYPE_ARR; + val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); ++ val->beg = cur - hdr - 1; + + /* push the new array value as current container */ + ctn = val; +@@ -5416,33 +5471,33 @@ arr_val_begin: + if (char_is_num(*cur)) { + val_incr(); + ctn_len++; +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto arr_val_end; + goto fail_number; + } + if (*cur == '"') { + val_incr(); + ctn_len++; +- if (likely(read_str(&cur, eof, flg, val, &msg))) goto arr_val_end; ++ if (likely(read_str(&cur, hdr, eof, flg, val, &msg))) goto arr_val_end; + goto fail_string; + } + if (*cur == 't') { + val_incr(); + ctn_len++; +- if (likely(read_true(&cur, val))) goto arr_val_end; ++ if (likely(read_true(&cur, val, hdr))) goto arr_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val_incr(); + ctn_len++; +- if (likely(read_false(&cur, val))) goto arr_val_end; ++ if (likely(read_false(&cur, val, hdr))) goto arr_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val_incr(); + ctn_len++; +- if (likely(read_null(&cur, val))) goto arr_val_end; ++ if (likely(read_null(&cur, val, hdr))) goto arr_val_end; + if (has_allow(INF_AND_NAN)) { +- if (read_nan(&cur, pre, flg, val)) goto arr_val_end; ++ if (read_nan(&cur, pre, hdr, flg, val)) goto arr_val_end; + } + goto fail_literal_null; + } +@@ -5461,13 +5516,13 @@ arr_val_begin: + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + val_incr(); + ctn_len++; +- if (read_inf_or_nan(&cur, pre, flg, val)) goto arr_val_end; ++ if (read_inf_or_nan(&cur, pre, hdr, flg, val)) goto arr_val_end; + goto fail_character_val; + } + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; +- if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto arr_val_end; ++ if (likely(read_str_sq(&cur, hdr, eof, flg, val, &msg))) goto arr_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { +@@ -5502,6 +5557,7 @@ arr_end: + /* save the next sibling value offset */ + ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); + ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR; ++ ctn->end = cur - hdr - 1; + if (unlikely(ctn == ctn_parent)) goto doc_end; + + /* pop parent as current container */ +@@ -5521,6 +5577,7 @@ obj_begin: + val->tag = YYJSON_TYPE_OBJ; + /* offset to the parent */ + val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); ++ val->beg = cur - hdr - 1; + ctn = val; + ctn_len = 0; + +@@ -5528,7 +5585,7 @@ obj_key_begin: + if (likely(*cur == '"')) { + val_incr(); + ctn_len++; +- if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_key_end; ++ if (likely(read_str(&cur, hdr, eof, flg, val, &msg))) goto obj_key_end; + goto fail_string; + } + if (likely(*cur == '}')) { +@@ -5545,13 +5602,13 @@ obj_key_begin: + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; +- if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_key_end; ++ if (likely(read_str_sq(&cur, hdr, eof, flg, val, &msg))) goto obj_key_end; + goto fail_string; + } + if (has_allow(UNQUOTED_KEY) && char_is_id_start(*cur)) { + val_incr(); + ctn_len++; +- if (read_str_id(&cur, eof, flg, pre, val, &msg)) goto obj_key_end; ++ if (read_str_id(&cur, hdr, eof, flg, pre, val, &msg)) goto obj_key_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { +@@ -5579,13 +5636,13 @@ obj_val_begin: + if (*cur == '"') { + val++; + ctn_len++; +- if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_val_end; ++ if (likely(read_str(&cur, hdr, eof, flg, val, &msg))) goto obj_val_end; + goto fail_string; + } + if (char_is_num(*cur)) { + val++; + ctn_len++; +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto obj_val_end; + goto fail_number; + } + if (*cur == '{') { +@@ -5599,21 +5656,21 @@ obj_val_begin: + if (*cur == 't') { + val++; + ctn_len++; +- if (likely(read_true(&cur, val))) goto obj_val_end; ++ if (likely(read_true(&cur, val, hdr))) goto obj_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val++; + ctn_len++; +- if (likely(read_false(&cur, val))) goto obj_val_end; ++ if (likely(read_false(&cur, val, hdr))) goto obj_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val++; + ctn_len++; +- if (likely(read_null(&cur, val))) goto obj_val_end; ++ if (likely(read_null(&cur, val, hdr))) goto obj_val_end; + if (has_allow(INF_AND_NAN)) { +- if (read_nan(&cur, pre, flg, val)) goto obj_val_end; ++ if (read_nan(&cur, pre, hdr, flg, val)) goto obj_val_end; + } + goto fail_literal_null; + } +@@ -5625,13 +5682,13 @@ obj_val_begin: + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + val++; + ctn_len++; +- if (read_inf_or_nan(&cur, pre, flg, val)) goto obj_val_end; ++ if (read_inf_or_nan(&cur, pre, hdr, flg, val)) goto obj_val_end; + goto fail_character_val; + } + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val++; + ctn_len++; +- if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_val_end; ++ if (likely(read_str_sq(&cur, hdr, eof, flg, val, &msg))) goto obj_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { +@@ -5665,6 +5722,7 @@ obj_end: + /* point to the next value */ + ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); + ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ; ++ ctn->end = cur - hdr - 1; + if (unlikely(ctn == ctn_parent)) goto doc_end; + ctn = ctn_parent; + ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); +@@ -5787,11 +5845,13 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *eof, + if (*cur++ == '{') { + ctn->tag = YYJSON_TYPE_OBJ; + ctn->uni.ofs = 0; ++ ctn->beg = cur - hdr - 1; + if (*cur == '\n') cur++; + goto obj_key_begin; + } else { + ctn->tag = YYJSON_TYPE_ARR; + ctn->uni.ofs = 0; ++ ctn->beg = cur - hdr - 1; + if (*cur == '\n') cur++; + goto arr_val_begin; + } +@@ -5805,6 +5865,7 @@ arr_begin: + val_incr(); + val->tag = YYJSON_TYPE_ARR; + val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); ++ val->beg = cur - hdr - 1; + + /* push the new array value as current container */ + ctn = val; +@@ -5835,33 +5896,33 @@ arr_val_begin: + if (char_is_num(*cur)) { + val_incr(); + ctn_len++; +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto arr_val_end; + goto fail_number; + } + if (*cur == '"') { + val_incr(); + ctn_len++; +- if (likely(read_str(&cur, eof, flg, val, &msg))) goto arr_val_end; ++ if (likely(read_str(&cur, hdr, eof, flg, val, &msg))) goto arr_val_end; + goto fail_string; + } + if (*cur == 't') { + val_incr(); + ctn_len++; +- if (likely(read_true(&cur, val))) goto arr_val_end; ++ if (likely(read_true(&cur, val, hdr))) goto arr_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val_incr(); + ctn_len++; +- if (likely(read_false(&cur, val))) goto arr_val_end; ++ if (likely(read_false(&cur, val, hdr))) goto arr_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val_incr(); + ctn_len++; +- if (likely(read_null(&cur, val))) goto arr_val_end; ++ if (likely(read_null(&cur, val, hdr))) goto arr_val_end; + if (has_allow(INF_AND_NAN)) { +- if (read_nan(&cur, pre, flg, val)) goto arr_val_end; ++ if (read_nan(&cur, pre, hdr, flg, val)) goto arr_val_end; + } + goto fail_literal_null; + } +@@ -5880,13 +5941,13 @@ arr_val_begin: + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + val_incr(); + ctn_len++; +- if (read_inf_or_nan(&cur, pre, flg, val)) goto arr_val_end; ++ if (read_inf_or_nan(&cur, pre, hdr, flg, val)) goto arr_val_end; + goto fail_character_val; + } + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; +- if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto arr_val_end; ++ if (likely(read_str_sq(&cur, hdr, eof, flg, val, &msg))) goto arr_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { +@@ -5925,6 +5986,7 @@ arr_end: + /* save the next sibling value offset */ + ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); + ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR; ++ ctn->end = cur - hdr - 1; + if (unlikely(ctn == ctn_parent)) goto doc_end; + + /* pop parent as current container */ +@@ -5945,6 +6007,7 @@ obj_begin: + val->tag = YYJSON_TYPE_OBJ; + /* offset to the parent */ + val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); ++ val->beg = cur - hdr - 1; + ctn = val; + ctn_len = 0; + if (*cur == '\n') cur++; +@@ -5964,7 +6027,7 @@ obj_key_begin: + if (likely(*cur == '"')) { + val_incr(); + ctn_len++; +- if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_key_end; ++ if (likely(read_str(&cur, hdr, eof, flg, val, &msg))) goto obj_key_end; + goto fail_string; + } + if (likely(*cur == '}')) { +@@ -5981,13 +6044,13 @@ obj_key_begin: + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; +- if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_key_end; ++ if (likely(read_str_sq(&cur, hdr, eof, flg, val, &msg))) goto obj_key_end; + goto fail_string; + } + if (has_allow(UNQUOTED_KEY) && char_is_id_start(*cur)) { + val_incr(); + ctn_len++; +- if (read_str_id(&cur, eof, flg, pre, val, &msg)) goto obj_key_end; ++ if (read_str_id(&cur, hdr, eof, flg, pre, val, &msg)) goto obj_key_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { +@@ -6019,13 +6082,13 @@ obj_val_begin: + if (*cur == '"') { + val++; + ctn_len++; +- if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_val_end; ++ if (likely(read_str(&cur, hdr, eof, flg, val, &msg))) goto obj_val_end; + goto fail_string; + } + if (char_is_num(*cur)) { + val++; + ctn_len++; +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto obj_val_end; + goto fail_number; + } + if (*cur == '{') { +@@ -6039,21 +6102,21 @@ obj_val_begin: + if (*cur == 't') { + val++; + ctn_len++; +- if (likely(read_true(&cur, val))) goto obj_val_end; ++ if (likely(read_true(&cur, val, hdr))) goto obj_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val++; + ctn_len++; +- if (likely(read_false(&cur, val))) goto obj_val_end; ++ if (likely(read_false(&cur, val, hdr))) goto obj_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val++; + ctn_len++; +- if (likely(read_null(&cur, val))) goto obj_val_end; ++ if (likely(read_null(&cur, val, hdr))) goto obj_val_end; + if (has_allow(INF_AND_NAN)) { +- if (read_nan(&cur, pre, flg, val)) goto obj_val_end; ++ if (read_nan(&cur, pre, hdr, flg, val)) goto obj_val_end; + } + goto fail_literal_null; + } +@@ -6065,13 +6128,13 @@ obj_val_begin: + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + val++; + ctn_len++; +- if (read_inf_or_nan(&cur, pre, flg, val)) goto obj_val_end; ++ if (read_inf_or_nan(&cur, pre, hdr, flg, val)) goto obj_val_end; + goto fail_character_val; + } + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val++; + ctn_len++; +- if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_val_end; ++ if (likely(read_str_sq(&cur, hdr, eof, flg, val, &msg))) goto obj_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { +@@ -6109,6 +6172,7 @@ obj_end: + /* point to the next value */ + ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); + ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ; ++ ctn->end = cur - hdr - 1; + if (unlikely(ctn == ctn_parent)) goto doc_end; + ctn = ctn_parent; + ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); +@@ -6228,9 +6292,9 @@ yyjson_doc *yyjson_read_opts(char *dat, usize len, + /* read json document */ + if (likely(char_is_ctn(*cur))) { + if (char_is_space(cur[1]) && char_is_space(cur[2])) { +- doc = read_root_pretty(hdr, cur, eof, alc, flg, err); ++ doc = read_root_pretty(hdr, cur, eof, alc, flg, err); // read function + } else { +- doc = read_root_minify(hdr, cur, eof, alc, flg, err); ++ doc = read_root_minify(hdr, cur, eof, alc, flg, err); // read function + } + } else { + doc = read_root_single(hdr, cur, eof, alc, flg, err); +@@ -6425,7 +6489,7 @@ const char *yyjson_read_number(const char *dat, + #endif + + #if YYJSON_DISABLE_FAST_FP_CONV +- if (!read_num(&cur, pre, flg, val, &msg)) { ++ if (!read_num(&cur, pre, hdr, flg, val, &msg)) { + if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr); + return_err(cur, INVALID_NUMBER, msg); + } +@@ -6433,7 +6497,7 @@ const char *yyjson_read_number(const char *dat, + if (yyjson_is_raw(val)) val->uni.str = dat; + return dat + (cur - hdr); + #else +- if (!read_num(&cur, pre, flg, val, &msg)) { ++ if (!read_num(&cur, pre, hdr, flg, val, &msg)) { + return_err(cur, INVALID_NUMBER, msg); + } + return (const char *)cur; +@@ -6703,23 +6767,23 @@ doc_begin: + goto arr_val_begin; + } + if (char_is_num(*cur)) { +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto doc_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto doc_end; + goto fail_number; + } + if (*cur == '"') { +- if (likely(read_str_con(&cur, end, flg, val, &msg, con))) goto doc_end; ++ if (likely(read_str_con(&cur, hdr, end, flg, val, &msg, con))) goto doc_end; + goto fail_string; + } + if (*cur == 't') { +- if (likely(read_true(&cur, val))) goto doc_end; ++ if (likely(read_true(&cur, val, hdr))) goto doc_end; + goto fail_literal_true; + } + if (*cur == 'f') { +- if (likely(read_false(&cur, val))) goto doc_end; ++ if (likely(read_false(&cur, val, hdr))) goto doc_end; + goto fail_literal_false; + } + if (*cur == 'n') { +- if (likely(read_null(&cur, val))) goto doc_end; ++ if (likely(read_null(&cur, val, hdr))) goto doc_end; + goto fail_literal_null; + } + +@@ -6760,32 +6824,32 @@ arr_val_continue: + if (char_is_num(*cur)) { + val_incr(); + ctn_len++; +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_maybe_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto arr_val_maybe_end; + goto fail_number; + } + if (*cur == '"') { + val_incr(); + ctn_len++; +- if (likely(read_str_con(&cur, end, flg, val, &msg, con))) ++ if (likely(read_str_con(&cur, hdr, end, flg, val, &msg, con))) + goto arr_val_end; + goto fail_string; + } + if (*cur == 't') { + val_incr(); + ctn_len++; +- if (likely(read_true(&cur, val))) goto arr_val_end; ++ if (likely(read_true(&cur, val, hdr))) goto arr_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val_incr(); + ctn_len++; +- if (likely(read_false(&cur, val))) goto arr_val_end; ++ if (likely(read_false(&cur, val, hdr))) goto arr_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val_incr(); + ctn_len++; +- if (likely(read_null(&cur, val))) goto arr_val_end; ++ if (likely(read_null(&cur, val, hdr))) goto arr_val_end; + goto fail_literal_null; + } + if (*cur == ']') { +@@ -6856,7 +6920,7 @@ obj_key_continue: + if (likely(*cur == '"')) { + val_incr(); + ctn_len++; +- if (likely(read_str_con(&cur, end, flg, val, &msg, con))) ++ if (likely(read_str_con(&cur, hdr, end, flg, val, &msg, con))) + goto obj_key_end; + goto fail_string; + } +@@ -6890,14 +6954,14 @@ obj_val_continue: + if (*cur == '"') { + val++; + ctn_len++; +- if (likely(read_str_con(&cur, end, flg, val, &msg, con))) ++ if (likely(read_str_con(&cur, hdr, end, flg, val, &msg, con))) + goto obj_val_end; + goto fail_string; + } + if (char_is_num(*cur)) { + val++; + ctn_len++; +- if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_maybe_end; ++ if (likely(read_num(&cur, pre, hdr, flg, val, &msg))) goto obj_val_maybe_end; + goto fail_number; + } + if (*cur == '{') { +@@ -6911,19 +6975,19 @@ obj_val_continue: + if (*cur == 't') { + val++; + ctn_len++; +- if (likely(read_true(&cur, val))) goto obj_val_end; ++ if (likely(read_true(&cur, val, hdr))) goto obj_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val++; + ctn_len++; +- if (likely(read_false(&cur, val))) goto obj_val_end; ++ if (likely(read_false(&cur, val, hdr))) goto obj_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val++; + ctn_len++; +- if (likely(read_null(&cur, val))) goto obj_val_end; ++ if (likely(read_null(&cur, val, hdr))) goto obj_val_end; + goto fail_literal_null; + } + if (char_is_space(*cur)) { +diff --git a/src/yyjson.h b/src/yyjson.h +index 5eb6d46..e172efc 100644 +--- a/src/yyjson.h ++++ b/src/yyjson.h +@@ -18,12 +18,18 @@ + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. ++ ++ Additional changes to add per-value byte-spans made by ++ Copyright (c) 2025 + *============================================================================*/ + + /** + @file yyjson.h + @date 2019-03-09 + @author YaoYuan ++ ++ Additional modifications made by github.com/lhearachel to support the location ++ of value-spans. + */ + + #ifndef YYJSON_H +@@ -1094,17 +1100,19 @@ yyjson_api_inline size_t yyjson_read_max_memory_usage(size_t len, + for example: "[1,2,3,4]" size is 9, value count is 5. + 2. Some broken JSON may cost more memory during reading, but fail at end, + for example: "[[[[[[[[". +- 3. yyjson use 16 bytes per value, see struct yyjson_val. ++ 3. yyjson use 32 bytes per value, see struct yyjson_val. + 4. yyjson use dynamic memory with a growth factor of 1.5. + +- The max memory size is (json_size / 2 * 16 * 1.5 + padding). ++ The max memory size is (json_size / 2 * 32 * 1.5 + padding). + */ +- size_t mul = (size_t)12 + !(flg & YYJSON_READ_INSITU); ++#define memsize (size_t)(32 * 3 / 4) ++ size_t mul = memsize + !(flg & YYJSON_READ_INSITU); + size_t pad = 256; + size_t max = (size_t)(~(size_t)0); + if (flg & YYJSON_READ_STOP_WHEN_DONE) len = len < 256 ? 256 : len; + if (len >= (max - pad - mul) / mul) return 0; + return len * mul + pad; ++#undef memsize + } + + /** +@@ -4760,11 +4768,13 @@ typedef union yyjson_val_uni { + } yyjson_val_uni; + + /** +- Immutable JSON value, 16 bytes. ++ Immutable JSON value, 32 bytes. + */ + struct yyjson_val { + uint64_t tag; /**< type, subtype and length */ + yyjson_val_uni uni; /**< payload */ ++ size_t beg; /**< beginning of value-span */ ++ size_t end; /**< ending of value-span */ + }; + + struct yyjson_doc { +@@ -5233,6 +5243,14 @@ yyjson_api_inline const char *yyjson_get_type_desc(yyjson_val *val) { + } + } + ++yyjson_api_inline size_t yyjson_dist_beg(yyjson_val *val) { ++ return val == NULL ? 0 : val->beg; ++} ++ ++yyjson_api_inline size_t yyjson_dist_end(yyjson_val *val) { ++ return val == NULL ? 0 : val->end; ++} ++ + yyjson_api_inline const char *yyjson_get_raw(yyjson_val *val) { + return yyjson_is_raw(val) ? unsafe_yyjson_get_raw(val) : NULL; + } diff --git a/subprojects/packagefiles/yyjson_patch/fix-trailing-comma-errpos.patch b/subprojects/packagefiles/yyjson_patch/fix-trailing-comma-errpos.patch new file mode 100644 index 0000000000..911d1f2ef0 --- /dev/null +++ b/subprojects/packagefiles/yyjson_patch/fix-trailing-comma-errpos.patch @@ -0,0 +1,80 @@ +diff --git a/src/yyjson.c b/src/yyjson.c +index 837997c..5bd56e6 100644 +--- a/src/yyjson.c ++++ b/src/yyjson.c +@@ -5468,7 +5468,7 @@ arr_val_begin: + cur++; + if (likely(ctn_len == 0)) goto arr_end; + if (has_allow(TRAILING_COMMAS)) goto arr_end; +- while (*cur != ',') cur--; ++ do { cur--; } while (*cur != ','); + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { +@@ -5562,7 +5562,7 @@ obj_key_begin: + cur++; + if (likely(ctn_len == 0)) goto obj_end; + if (has_allow(TRAILING_COMMAS)) goto obj_end; +- while (*cur != ',') cur--; ++ do { cur--; } while (*cur != ','); + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { +@@ -5910,7 +5910,7 @@ arr_val_begin: + cur++; + if (likely(ctn_len == 0)) goto arr_end; + if (has_allow(TRAILING_COMMAS)) goto arr_end; +- while (*cur != ',') cur--; ++ do { cur--; } while (*cur != ','); + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { +@@ -6022,7 +6022,7 @@ obj_key_begin: + cur++; + if (likely(ctn_len == 0)) goto obj_end; + if (has_allow(TRAILING_COMMAS)) goto obj_end; +- while (*cur != ',') cur--; ++ do { cur--; } while (*cur != ','); + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { +@@ -6858,7 +6858,7 @@ arr_val_continue: + if (*cur == ']') { + cur++; + if (likely(ctn_len == 0)) goto arr_end; +- while (*cur != ',') cur--; ++ do { cur--; } while (*cur != ','); + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { +@@ -6940,7 +6940,7 @@ obj_key_continue: + if (likely(*cur == '}')) { + cur++; + if (likely(ctn_len == 0)) goto obj_end; +- while (*cur != ',') cur--; ++ do { cur--; } while (*cur != ','); + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { +diff --git a/test/test_err_code.c b/test/test_err_code.c +index 21f9414..bebd462 100644 +--- a/test/test_err_code.c ++++ b/test/test_err_code.c +@@ -307,6 +307,17 @@ static void test_read_err_code(void) { + + + ++ // ------------------------------------------------------------------------- ++ // Special case: object member is an array with a trailing comma ++ str = "{\"array\":[1,],\"integer\":35}"; ++ // ^ trailing comma is not allowed ++ memset(&err, -1, sizeof(err)); ++ yyjson_doc_free(yyjson_read_opts((char *)str, strlen(str), 0, NULL, &err)); ++ yy_assert(err.code == YYJSON_READ_ERROR_JSON_STRUCTURE); ++ yy_assert(err.pos == strlen(str) - 16); ++ ++ ++ + // ------------------------------------------------------------------------- + // Invalid comment, such as unclosed multi-line comment. + #if !YYJSON_DISABLE_NON_STANDARD diff --git a/subprojects/yyjson.wrap b/subprojects/yyjson.wrap new file mode 100644 index 0000000000..d2c5eabb88 --- /dev/null +++ b/subprojects/yyjson.wrap @@ -0,0 +1,15 @@ +[wrap-file] +directory = yyjson-0.12.0 +source_url = https://github.com/ibireme/yyjson/archive/refs/tags/0.12.0.tar.gz +source_filename = yyjson-0.12.0.tar.gz +source_hash = b16246f617b2a136c78d73e5e2647c6f1de1313e46678062985bdcf1f40bb75d +patch_filename = yyjson_0.12.0-1_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/yyjson_0.12.0-1/get_patch +patch_hash = 014582b328e13671dea64fcb49a795e70142360216a505212b8045134781b3b5 +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/yyjson_0.12.0-1/yyjson-0.12.0.tar.gz +wrapdb_version = 0.12.0-1 + +diff_files = yyjson_patch/always-native.patch, yyjson_patch/fix-trailing-comma-errpos.patch, yyjson_patch/feat-value-spans.patch, yyjson_patch/arm-f64_pow10_table-unused.patch + +[provide] +dependency_names = yyjson diff --git a/tools/dataproc/lib/dataproc.c b/tools/dataproc/lib/dataproc.c new file mode 100644 index 0000000000..8ed12eda0c --- /dev/null +++ b/tools/dataproc/lib/dataproc.c @@ -0,0 +1,830 @@ +#include "dataproc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "json.h" +#include "private.h" + +static int (*read_func)(datafile_t *df) = NULL; +static void (*free_func)(datafile_t *df) = NULL; +static void* (*get_root_func)(void *ctx) = NULL; +static enum nodetype (*get_type_func)(void *node) = NULL; +static span_t (*get_span_func)(void *node) = NULL; +static bool (*get_bool_func)(void *node) = NULL; +static int (*get_int_func)(void *node) = NULL; +static int64_t (*get_i64_func)(void *node) = NULL; +static uint64_t (*get_u64_func)(void *node) = NULL; +static double (*get_float_func)(void *node) = NULL; +static const char* (*get_string_func)(void *node) = NULL; +static size_t (*get_length_func)(void *node) = NULL; +static void* (*get_elem_func)(void *arr, size_t i) = NULL; +static void* (*get_memb_func)(void *obj, const char *k) = NULL; + +static const char *hl_note = "\033[1;36m"; // bold + cyan +static const char *hl_warning = "\033[1;33m"; // bold + yellow +static const char *hl_error = "\033[1;31m"; // bold + red +static const char *hl_emphasis = "\033[1;39m"; // bold + default color +static const char *hl_reset = "\033[0m"; // clear all formatting + +#define REGISTRY_SIZE 64 + +typedef struct regentry regentry_t; +struct regentry { + const char *type; + lookup_t *table; + size_t size; +}; + +static regentry_t registry[REGISTRY_SIZE] = { 0 }; + +static int slurp(datafile_t *df, const char *filename); + +// ======================================= POOL ALLOCATOR ======================================= // + +#define MEMPAGE_CAPACITY 4096 + +typedef struct mempool mempool_t; +typedef struct mempage mempage_t; + +struct mempool { + mempage_t *head; + mempage_t *tail; +}; + +struct mempage { + char *beg; + char *end; + + mempage_t *next; +}; + +static mempage_t* page_new(void) { + mempage_t *page = malloc(sizeof(*page)); + page->beg = malloc(MEMPAGE_CAPACITY); + page->end = page->beg; + page->next = NULL; + + return page; +} + +static mempool_t* pool_new(void) { + mempage_t *page = page_new(); + mempool_t *pool = malloc(sizeof(*pool)); + pool->head = page; + pool->tail = page; + return pool; +} + +#define palloc(pool, T, count) pool_alloc(pool, count, sizeof(T), _Alignof(T)) + +static void* pool_alloc(mempool_t *pool, size_t items, size_t size, size_t align) { + assert(items * size <= MEMPAGE_CAPACITY && "total memory request exceeds maximum pagesize"); + + ptrdiff_t padding = -(uintptr_t)pool->tail->beg & (align - 1); + ptrdiff_t used = pool->tail->end - pool->tail->beg + padding; + ptrdiff_t available = MEMPAGE_CAPACITY - used; + if (available < 0 || items * size > (size_t)available){ + pool->tail->next = page_new(); + pool->tail = pool->tail->next; + } + + void *ptr = pool->tail->end + padding; + pool->tail->end += padding + (items * size); + return memset(ptr, 0, items * size); +} + +static void pool_free(mempool_t *pool) { + if (!pool) return; + + mempage_t *curr = pool->head; + while (curr) { + mempage_t *next = curr->next; + free(curr->beg); + free(curr); + curr = next; + } + + free(pool); +} + +// ========================================= DIAGNOSTICS ======================================== // + +#define DIAGSIZE 255 + +static char* make_node_diagnostic(datanode_t *dn, const char *fmt, va_list args) { + size_t pathlen = strlen(dn->path); // prepend the node's path and a colon + char *message = malloc(DIAGSIZE + pathlen + 3); + memcpy(message, dn->path, pathlen); + message[pathlen + 0] = ':'; + message[pathlen + 1] = ' '; + vsnprintf(message + pathlen + 2, DIAGSIZE + 1, fmt, args); + + return message; +} + +static void push_diagnostic(datafile_t *df, span_t span, enum diaglevel level, const char *message) { + diagnostic_t *next = malloc(sizeof(*next)); + next->span = span; + next->message = message; + next->level = level; + next->next = NULL; + next->prev = df->diag_tail; + + if (df->diag_tail == NULL) { + df->diag_head = next; + df->diag_tail = next; + } else { + df->diag_tail->next = next; + df->diag_tail = next; + } +} + +void dp_error(datanode_t *dn, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + push_diagnostic(dn->file, get_span_func(dn->node), DIAG_ERROR, make_node_diagnostic(dn, fmt, args)); + va_end(args); +} + +void dp_warn(datanode_t *dn, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + push_diagnostic(dn->file, get_span_func(dn->node), DIAG_WARNING, make_node_diagnostic(dn, fmt, args)); + va_end(args); +} + +void dp_note(datanode_t *dn, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + push_diagnostic(dn->file, get_span_func(dn->node), DIAG_NOTE, make_node_diagnostic(dn, fmt, args)); + va_end(args); +} + +void dp_gerror(datafile_t *df, size_t beg, size_t end, const char *errfmt, ...) { + va_list args; + va_start(args, errfmt); + + char *message = malloc(DIAGSIZE + 1); + vsnprintf(message, DIAGSIZE + 1, errfmt, args); + push_diagnostic(df, (span_t){ beg, end }, DIAG_ERROR, message); + + va_end(args); +} + +// ======================================= IMPLEMENTATION ======================================= // + +int dp_init(enum format format) { + const char *nocolor = getenv("NO_COLOR"); + if (nocolor != NULL && nocolor[0] != 0) { + hl_error = ""; + hl_emphasis = ""; + hl_reset = ""; + } + + switch (format) { + case DATAPROC_F_JSON: + read_func = json_read; + free_func = json_free; + get_root_func = json_get_root; + get_type_func = json_get_type; + get_span_func = json_get_span; + get_bool_func = json_get_bool; + get_int_func = json_get_int; + get_i64_func = json_get_i64; + get_u64_func = json_get_u64; + get_float_func = json_get_float; + get_string_func = json_get_string; + get_length_func = json_get_length; + get_elem_func = json_get_element; + get_memb_func = json_get_member; + break; + + default: return DATAPROC_E_UNKFORMAT; + } + + assert(read_func && "read_func must be loaded"); + assert(free_func && "free_func must be loaded"); + assert(get_root_func && "get_root_func must be loaded"); + assert(get_type_func && "get_type_func must be loaded"); + assert(get_span_func && "get_span_func must be loaded"); + assert(get_bool_func && "get_bool_func must be loaded"); + assert(get_int_func && "get_int_func must be loaded"); + assert(get_i64_func && "get_i64_func must be loaded"); + assert(get_u64_func && "get_u64_func must be loaded"); + assert(get_float_func && "get_float_func must be loaded"); + assert(get_string_func && "get_string_func must be loaded"); + assert(get_length_func && "get_length_func must be loaded"); + assert(get_elem_func && "get_elem_func must be loaded"); + assert(get_memb_func && "get_memb_func must be loaded"); + return DATAPROC_E_NONE; +} + +int dp_register(lookup_t *table, size_t size, const char *type) { + for (int i = 0; i < REGISTRY_SIZE; i++) { + regentry_t *entry = ®istry[i]; + if (entry->type == NULL) { + entry->type = type; + entry->table = table; + entry->size = size; + return DATAPROC_E_NONE; + } + } + + return DATAPROC_E_REGFULL; +} + +static int slurp(datafile_t *df, const char *filename) { + assert(df && "df pointer must not be NULL"); + assert(filename && "filename must not be NULL"); + + df->filename = filename; + df->source = NULL; + + FILE *f = fopen(filename, "rb"); + if (f == NULL) { + dp_gerror(df, 0, 0, "could not open file '%s': %s", filename, strerror(errno)); + return DATAPROC_E_FOPEN; + } + + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + fseek(f, 0, SEEK_SET); + + if (fsize == 0) { fclose(f); return DATAPROC_E_NONE; } + if (fsize < 0) { + dp_gerror(df, 0, 0, "invalid size for file '%s': %s", filename, strerror(errno)); + fclose(f); + return DATAPROC_E_FSIZE; + } + + char *buf = malloc(fsize + 1); + if (buf == NULL) { + dp_gerror(df, 0, 0, "allocation failure for file '%s': %s", filename, strerror(errno)); + fclose(f); + return DATAPROC_E_ALLOC; + } + + fread(buf, 1, fsize, f); + fclose(f); + buf[fsize] = 0; + + df->source = buf; + df->size = fsize; + + assert(df->filename && "df should know its filename"); + assert(df->source && "df should point to file contents"); + return DATAPROC_E_NONE; +} + +int dp_load(datafile_t *df, const char *filename) { + df->pool = pool_new(); + return slurp(df, filename) || read_func(df); +} + +void dp_free(datafile_t *df) { + assert(df && "df pointer must not be NULL"); + + free_func(df); + free((char *)df->source); + + diagnostic_t *curr = df->diag_head, *next = NULL; + while (curr != NULL) { + next = curr->next; + free((char *)curr->message); + free(curr); + curr = next; + } + + pool_free(df->pool); + + df->source = NULL; + df->diag_head = NULL; + df->diag_tail = NULL; + df->pool = NULL; +} + +typedef struct errpos errpos_t; +struct errpos { + size_t line; + size_t col; + size_t line_head; +}; + +static errpos_t locate(datafile_t *df, diagnostic_t *perr) { + errpos_t pos = { .line = 1, .col = 1, .line_head = 0, }; + const char *p = df->source; + + while (p && *p && (size_t)(p - df->source) < perr->span.beg) { + if (*p == '\n') { + pos.line += 1; + pos.col = 1; + pos.line_head = p - df->source + 1; + } + else pos.col++; + + p++; + } + + return pos; +} + +#define HL_DIAG(...) hl_diag, __VA_ARGS__, hl_reset +#define HL_EMPHASIS(...) hl_emphasis, __VA_ARGS__, hl_reset +#define max(cur, try) cur < try ? try : cur + +enum diaglevel dp_report(datafile_t *df) { + assert(df && "df pointer must not be NULL"); + + enum diaglevel max_sev = DIAG_NOTE; + for (diagnostic_t *perr = df->diag_head; perr; perr = perr->next) { + max_sev = max(max_sev, perr->level); + + const char *prefix = NULL, *hl_diag = NULL; + switch (perr->level) { + case DIAG_NOTE: prefix = "note:"; hl_diag = hl_note; break; + case DIAG_WARNING: prefix = "warning:"; hl_diag = hl_warning; break; + case DIAG_ERROR: prefix = "error:"; hl_diag = hl_error; break; + } + + if (perr->span.beg == 0 && perr->span.end == 0) { + fprintf(stderr, "%s%s:%s %s%s%s %s\n", + HL_EMPHASIS(df->filename), + HL_DIAG(prefix), + perr->message); + } + else { + errpos_t pos = locate(df, perr); + fprintf(stderr, "%s%s:%zu:%zu:%s %s%s%s %s\n", + HL_EMPHASIS(df->filename, pos.line, pos.col), + HL_DIAG(prefix), + perr->message); + + const char *p = df->source + pos.line_head; + const char *e = p; + while (e && *e && *e != '\n') e++; + + // TODO: multi-line support for previews + size_t line_tail = e - df->source; + int len_prefix = (int)(perr->span.beg - pos.line_head); + int len_error = perr->span.end < line_tail + ? (int)(perr->span.end - perr->span.beg) + : (int)(line_tail - perr->span.beg); + int len_suffix = (int)(line_tail - pos.line_head - len_prefix - len_error); + fprintf(stderr, " %4zu | %.*s%s%.*s%s%.*s\n", + pos.line, + len_prefix, p, + HL_DIAG(len_error, p + len_prefix), + len_suffix, p + len_prefix + len_error); + fprintf(stderr, " | "); + for (int i = 0; i < len_prefix; i++) fputc(' ', stderr); + fputs(hl_diag, stderr); + for (int i = 0; i < len_error; i++) fputc('^', stderr); + fputs(hl_reset, stderr); + for (int i = 0; i < len_suffix; i++) fputc(' ', stderr); + fputc('\n', stderr); + } + } + + return max_sev; +} + +static inline bool is_word(char c) { + return c == '_' + || (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9'); +} + +static char* keytok2(const char *s) { + assert(s && "input string must not be NULL"); + while (s && *s && is_word(*s)) s++; + return (char *)s; +} + +#define MAX_TOKLEN (1 << 8) + +static bool queriable(enum nodetype type) { + return type == DATAPROC_T_ARRAY || type == DATAPROC_T_OBJECT; +} + +static datanode_t dp_arrelem_internal(datanode_t dn, size_t i, bool error); +static datanode_t dp_objmemb_internal(datanode_t dn, const char *k, bool error); + +static const char* typename(enum nodetype type) { + switch (type) { + case DATAPROC_T_NONE: return "(none)"; + case DATAPROC_T_NULL: return "a null-literal"; + case DATAPROC_T_BOOLEAN: return "a boolean"; + case DATAPROC_T_INT: return "an integer"; + case DATAPROC_T_FLOAT: return "a float"; + case DATAPROC_T_STRING: return "a string"; + case DATAPROC_T_ARRAY: return "an array"; + case DATAPROC_T_OBJECT: return "an object"; + case DATAPROC_T_MAPPED: return "a mapped constant"; + case DATAPROC_T_ERR: return "(error)"; + default: abort(); + } +} + +enum queryerr { + DATAPROC_Q_OK = 0, + DATAPROC_Q_BADTOKEN, // hit an unexpected token + DATAPROC_Q_RBRACE, // missing a closing r-brace for array-accessor + DATAPROC_Q_ARRINDEX, // array index is not a non-negative integer +}; + +static const char* query_errmsg(enum queryerr code) { + switch (code) { + case DATAPROC_Q_OK: return "(query ok)"; + case DATAPROC_Q_BADTOKEN: return "unexpected access-delimiting token"; + case DATAPROC_Q_RBRACE: return "missing closing-brace ']'"; + case DATAPROC_Q_ARRINDEX: return "array index must be a non-negative integer"; + default: abort(); + } +} + +typedef struct querynode querynode_t; +struct querynode { + bool is_object; + union { + const char *key; + size_t idx; + }; + + querynode_t *next; +}; + +static void free_query(querynode_t *q) { + querynode_t *curr = q, *next = NULL; + while (curr != NULL) { + next = curr->next; + free(curr); + curr = next; + } +} + +static enum queryerr parse_query(char *path, querynode_t **out) { +#define errout(ec) do { errc = ec; goto errcleanup; } while (0) + + assert(path && "path query must not be NULL"); + assert(out && "out pointer must not be NULL"); + + if (*path == '$') path++; + + int errc = DATAPROC_Q_OK; + if (*path == '.' && *(path + 1) == 0) { // special case: "." returns the root + *out = NULL; + return errc; + } + + querynode_t *head = calloc(1, sizeof(*head)); + querynode_t *tail = head; + + while (*path) { + char *start = path; // '.' or '[' + char *token = path + 1; + char *delim = keytok2(token); + + switch (*start) { + case '.': + tail->is_object = true; + tail->key = token; + break; + + case '[': + if (*delim != ']') errout(DATAPROC_Q_RBRACE); + tail->is_object = false; + + char *end = NULL; + long idx = strtol(token, &end, 10); + tail->idx = idx; + + if (end != delim || idx < 0) errout(DATAPROC_Q_ARRINDEX); + break; + + default: errout(DATAPROC_Q_BADTOKEN); + } + + switch (*delim) { + case '\0': // fall-through + case '.': // fall-through + case '[': path = delim; break; + case ']': path = delim + 1; break; + default: errout(DATAPROC_Q_BADTOKEN); + } + + *start = 0; // terminate the previous token (in case it's a key-string) + if (*path) { // setup for the next token, if needed + tail->next = calloc(1, sizeof(*tail->next)); + tail = tail->next; + } + } + + *out = head; + return DATAPROC_Q_OK; + +#undef errout + +errcleanup: + free_query(head); + return errc; +} + +static datanode_t dp_query(datafile_t *df, const char *path, bool error) { + assert(df && "df pointer must not be NULL"); + assert(path && "path query must not be NULL"); + + datanode_t dn = { .file = df, .path = (char *)path, .node = NULL }; + querynode_t *query = NULL; + char *pathcpy = calloc(strlen(path) + 1, 1); + memcpy(pathcpy, path, strlen(path)); + + int errc = parse_query(pathcpy, &query); + if (errc) { + // NOTE: query syntax errors cannot be ignored + dp_gerror(df, 0, 0, "%s: query syntax error: %s", path, query_errmsg(errc)); + goto cleanup; + } + + dn.node = get_root_func(df->ctx); + dn.type = get_type_func(dn.node); + for (querynode_t *curr = query; curr; curr = curr->next) { + if (!queriable(dn.type)) { + if (error) dp_error(&dn, "expected an array or object, but got %s", typename(dn.type)); + dn.type = DATAPROC_T_ERR; + goto cleanup; + } + + dn = curr->is_object + ? dp_objmemb_internal(dn, curr->key, error) + : dp_arrelem_internal(dn, curr->idx, error); + if (dn.type == DATAPROC_T_ERR) goto cleanup; + } + +cleanup: + free(pathcpy); + free_query(query); + return dn; +} + +datanode_t dp_try(datafile_t *df, const char *path) { + return dp_query(df, path, false); +} + +datanode_t dp_get(datafile_t *df, const char *path) { + return dp_query(df, path, true); +} + +#define err_return(e, r) do { dn.file->errc = e; return r; } while (0) +#define asserttype(expected, retval) \ + do { \ + if (dn.type == DATAPROC_T_ERR) return retval; \ + if (dn.type != expected) { \ + dp_error( \ + &dn, \ + "expected %s, but got %s", \ + typename(expected), typename(dn.type) \ + ); \ + return retval; \ + } \ + } while (0) + +bool dp_bool(datanode_t dn) { + asserttype(DATAPROC_T_BOOLEAN, false); + return get_bool_func(dn.node); +} + +long dp_int(datanode_t dn) { + if (dn.type == DATAPROC_T_MAPPED) return dn.mapped; + + asserttype(DATAPROC_T_INT, 0); + return get_int_func(dn.node); +} + +#pragma GCC diagnostic push // Disable warnings that `i < 0` is impossible for unsigned types +#pragma GCC diagnostic ignored "-Wtype-limits" + +#define assert_intrange(min, max) \ + do { \ + if (i < min || i > max) { \ + dp_error( \ + &dn, \ + "expected a value in the range [%d,%u]", \ + min, max \ + ); \ + return 0; \ + } \ + } while (0) + +int8_t dp_s8(datanode_t dn) { + long i = dp_int(dn); + assert_intrange(INT8_MIN, INT8_MAX); + return (int8_t)(i & 0xFF); +} + +int16_t dp_s16(datanode_t dn) { + long i = dp_int(dn); + assert_intrange(INT16_MIN, INT16_MAX); + return (int16_t)(i & 0xFFFF); +} + +int32_t dp_s32(datanode_t dn) { + long i = dp_int(dn); + assert_intrange(INT32_MIN, INT32_MAX); + return (int32_t)(i & 0xFFFFFFFF); +} + +int64_t dp_s64(datanode_t dn) { + if (dn.type == DATAPROC_T_MAPPED) return dn.mapped; + + asserttype(DATAPROC_T_INT, 0); + return get_i64_func(dn.node); +} + +uint8_t dp_u8(datanode_t dn) { + uint64_t i = dp_u64(dn); + assert_intrange(0, UINT8_MAX); + return (uint8_t)(i & 0xFF); +} + +uint8_t dp_u8range(datanode_t dn, uint8_t min, uint8_t max) { + uint64_t i = dp_u64(dn); + assert_intrange(min, max); + return (uint8_t)(i & 0xFF); +} + +uint16_t dp_u16(datanode_t dn) { + uint64_t i = dp_u64(dn); + assert_intrange(0, UINT16_MAX); + return (uint16_t)(i & 0xFFFF); +} + +uint16_t dp_u16range(datanode_t dn, uint16_t min, uint16_t max) { + uint64_t i = dp_u64(dn); + assert_intrange(min, max); + return (uint16_t)(i & 0xFFFF); +} + +uint32_t dp_u32(datanode_t dn) { + uint64_t i = dp_u64(dn); + assert_intrange(0, UINT32_MAX); + return (uint32_t)(i & 0xFFFFFFFF); +} + +uint32_t dp_u32range(datanode_t dn, uint32_t min, uint32_t max) { + uint64_t i = dp_u64(dn); + assert_intrange(min, max); + return (uint32_t)(i & 0xFFFFFFFF); +} + +uint64_t dp_u64(datanode_t dn) { + if (dn.type == DATAPROC_T_MAPPED) return dn.mapped; + + asserttype(DATAPROC_T_INT, 0); + return get_u64_func(dn.node); +} + +#pragma GCC diagnostic pop + +double dp_float(datanode_t dn) { + asserttype(DATAPROC_T_FLOAT, 0.0); + return get_float_func(dn.node); +} + +const char* dp_string(datanode_t dn) { + asserttype(DATAPROC_T_STRING, NULL); + return get_string_func(dn.node); +} + +size_t dp_arrlen(datanode_t dn) { + asserttype(DATAPROC_T_ARRAY, 0); + return get_length_func(dn.node); +} + +// Internal call here is only for use by dp_get, which has a direct path to the node +// and thus should not allocate an updated path-string +static datanode_t dp_arrelem_internal(datanode_t dn, size_t i, bool error) { + asserttype(DATAPROC_T_ARRAY, (dn.node = NULL, dn.type = DATAPROC_T_ERR, dn)); + + void *child = get_elem_func(dn.node, i); + if (child == NULL && error) { + dp_error(&dn, "array index %zu is out-of-bounds", i); + } + + return (datanode_t){ + .file = dn.file, + .path = dn.path, + .node = child, + .type = child != NULL ? get_type_func(child) : DATAPROC_T_ERR, + }; +} + +datanode_t dp_arrelem(datanode_t dn, size_t i) { + datanode_t elem = dp_arrelem_internal(dn, i, true); + if (elem.type != DATAPROC_T_ERR) { + size_t size = snprintf(NULL, 0, "%s[%zu]", dn.path, i); + elem.path = palloc(dn.file->pool, char, size + 1); + snprintf(elem.path, size + 1, "%s[%zu]", dn.path, i); + } + + return elem; +} + +// Internal call here is only for use by dp_get, which has a direct path to the node +// and thus should not allocate an updated path-string +static datanode_t dp_objmemb_internal(datanode_t dn, const char *k, bool error) { + asserttype(DATAPROC_T_OBJECT, (dn.node = NULL, dn.type = DATAPROC_T_ERR, dn)); + + void *child = get_memb_func(dn.node, k); + if (child == NULL && error) { + dp_error(&dn, "object has no member named \"%s\"", k); + } + + return (datanode_t){ + .file = dn.file, + .path = dn.path, + .node = child, + .type = child != NULL ? get_type_func(child) : DATAPROC_T_ERR, + }; +} + +bool dp_hasmemb(datanode_t dn, const char *k) { + datanode_t elem = dp_objmemb_internal(dn, k, false); + return elem.type != DATAPROC_T_ERR; +} + +datanode_t dp_objmemb(datanode_t dn, const char *k) { + datanode_t elem = dp_objmemb_internal(dn, k, true); + if (elem.type != DATAPROC_T_ERR) { + size_t size = snprintf(NULL, 0, "%s.%s", dn.path, k); + elem.path = palloc(dn.file->pool, char, size + 1); + snprintf(elem.path, size + 1, "%s.%s", dn.path, k); + } + + return elem; +} + +static regentry_t* regfind(const char *type) { + for (int i = 0; i < REGISTRY_SIZE; i++) { + regentry_t *entry = ®istry[i]; + if (entry->type == NULL) return NULL; + if (strcmp(entry->type, type) == 0) return entry; + } + + return NULL; +} + +static int lookup_cmp(const void *key, const void *member) { + const char *value = key; + const lookup_t *entry = member; + + return strcmp(value, entry->def); +} + +lookup_t* lookup_find(datanode_t *dnp, const char *type) { + datanode_t dn = *dnp; + + asserttype(DATAPROC_T_STRING, (dnp->node = NULL, dnp->type = DATAPROC_T_ERR, NULL)); + + regentry_t *entry = regfind(type); + if (entry == NULL) { + dp_error(dnp, "no lookup table registered for \"%s\"", type); + dnp->node = NULL; + dnp->type = DATAPROC_T_ERR; + return NULL; + } + + const char *value = get_string_func(dn.node); + lookup_t *found = bsearch(value, entry->table, entry->size, sizeof(lookup_t), lookup_cmp); + if (!found) { + dp_error(&dn, "expected an identifier for \"%s\"", type); + dnp->node = NULL; + dnp->type = DATAPROC_T_ERR; + return NULL; + } + + return found; +} + +datanode_t dp_lookup(datanode_t dn, const char *type) { + lookup_t *found = lookup_find(&dn, type); + if (found) { + dn.mapped = found->val; + dn.type = DATAPROC_T_MAPPED; + } + + return dn; +} + +datanode_t dp_lookup_s(datanode_t dn, const char *type) { + lookup_find(&dn, type); + return dn; +} diff --git a/tools/dataproc/lib/include/dataproc.h b/tools/dataproc/lib/include/dataproc.h new file mode 100644 index 0000000000..c4dac6ec97 --- /dev/null +++ b/tools/dataproc/lib/include/dataproc.h @@ -0,0 +1,115 @@ +#ifndef DATAPROC_H +#define DATAPROC_H + +#include +#include +#include + +enum format { + DATAPROC_F_NONE = 0, + DATAPROC_F_JSON, +}; + +enum nodetype { + DATAPROC_T_NONE = 0, + DATAPROC_T_NULL, + DATAPROC_T_BOOLEAN, + DATAPROC_T_INT, + DATAPROC_T_FLOAT, + DATAPROC_T_STRING, + DATAPROC_T_ARRAY, + DATAPROC_T_OBJECT, + + DATAPROC_T_MAPPED, + DATAPROC_T_ERR, +}; + +enum { + DATAPROC_E_NONE = 0, + + DATAPROC_E_UNKFORMAT, + DATAPROC_E_FOPEN, + DATAPROC_E_FSIZE, + DATAPROC_E_ALLOC, + DATAPROC_E_BACKEND, + DATAPROC_E_QUERY, + DATAPROC_E_BADTYPE, + DATAPROC_E_REGFULL, + DATAPROC_E_REGNOTFOUND, +}; + +enum diaglevel { + DIAG_NOTE, + DIAG_WARNING, + DIAG_ERROR, +}; + +typedef struct diagnostic diagnostic_t; +typedef struct datafile datafile_t; +typedef struct datanode datanode_t; +typedef struct lookup lookup_t; +typedef struct span span_t; + +struct datafile { + const char *filename; + const char *source; + size_t size; + + diagnostic_t *diag_head; + diagnostic_t *diag_tail; + + void *ctx; + void *pool; +}; + +struct datanode { + datafile_t *file; + char *path; + enum nodetype type; + union { + void *node; + long mapped; + }; +}; + +struct lookup { + long val; + const char *def; +}; + +int dp_init(enum format format); +int dp_register(lookup_t *table, size_t size, const char *type); +int dp_load(datafile_t *df, const char *filename); +void dp_free(datafile_t *df); + +enum diaglevel dp_report(datafile_t *df); + +void dp_error(datanode_t *dn, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +void dp_warn(datanode_t *dn, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +void dp_note(datanode_t *dn, const char *fmt, ...) __attribute__((format(printf, 2, 3))); + +datanode_t dp_try(datafile_t *df, const char *path); +datanode_t dp_get(datafile_t *df, const char *path); +bool dp_bool(datanode_t dn); +long dp_int(datanode_t dn); +int8_t dp_s8(datanode_t dn); +int16_t dp_s16(datanode_t dn); +int32_t dp_s32(datanode_t dn); +int64_t dp_s64(datanode_t dn); +uint8_t dp_u8(datanode_t dn); +uint8_t dp_u8range(datanode_t dn, uint8_t min, uint8_t max); +uint16_t dp_u16(datanode_t dn); +uint16_t dp_u16range(datanode_t dn, uint16_t min, uint16_t max); +uint32_t dp_u32(datanode_t dn); +uint32_t dp_u32range(datanode_t dn, uint32_t min, uint32_t max); +uint64_t dp_u64(datanode_t dn); +double dp_float(datanode_t dn); +const char* dp_string(datanode_t dn); +size_t dp_arrlen(datanode_t dn); +datanode_t dp_arrelem(datanode_t dn, size_t i); +bool dp_hasmemb(datanode_t dn, const char *k); +datanode_t dp_objmemb(datanode_t dn, const char *k); +datanode_t dp_lookup(datanode_t dn, const char *type); +datanode_t dp_lookup_s(datanode_t dn, const char *type); + +#endif // DATAPROC_H diff --git a/tools/dataproc/lib/json.c b/tools/dataproc/lib/json.c new file mode 100644 index 0000000000..7aa8e03b58 --- /dev/null +++ b/tools/dataproc/lib/json.c @@ -0,0 +1,113 @@ +#include "json.h" + +#include +#include + +#include "dataproc.h" +#include "private.h" +#include "yyjson.h" + +typedef struct yyjson_ctx yyjson_ctx_t; +struct yyjson_ctx { + yyjson_doc *doc; + yyjson_val *root; +}; + +int json_read(datafile_t *df) { + assert(df && "df pointer must not be NULL"); + assert(df->source && "df must have loaded a source-file"); + + yyjson_read_err yyjerr = { 0 }; + yyjson_ctx_t *yyjctx = malloc(sizeof(*yyjctx)); + if (yyjctx == NULL) return DATAPROC_E_ALLOC; + + yyjctx->doc = yyjson_read_opts((char *)df->source, df->size, 0, NULL, &yyjerr); + if (yyjctx->doc == NULL) { + dp_gerror(df, yyjerr.pos, yyjerr.pos + 1, "JSON parse error: %s", yyjerr.msg); + free(yyjctx); + return DATAPROC_E_BACKEND; + } + + yyjctx->root = yyjson_doc_get_root(yyjctx->doc); + df->ctx = yyjctx; + return DATAPROC_E_NONE; +} + +void json_free(datafile_t *df) { + assert(df && "df pointer must not be NULL"); + + yyjson_ctx_t *yyjctx = df->ctx; + if (yyjctx != NULL) { + yyjson_doc_free(yyjctx->doc); + free(yyjctx); + } + + df->ctx = NULL; +} + +void* json_get_root(void *ctx) { + assert(ctx && "ctx pointer must not be NULL"); + + yyjson_ctx_t *yyjctx = ctx; + return yyjctx->root; +} + +enum nodetype json_get_type(void *_node) { + yyjson_val *node = _node; + switch (yyjson_get_type(node)) { + default: return DATAPROC_T_NONE; + + case YYJSON_TYPE_RAW: return DATAPROC_T_STRING; + case YYJSON_TYPE_NULL: return DATAPROC_T_NULL; + case YYJSON_TYPE_BOOL: return DATAPROC_T_BOOLEAN; + case YYJSON_TYPE_STR: return DATAPROC_T_STRING; + case YYJSON_TYPE_ARR: return DATAPROC_T_ARRAY; + case YYJSON_TYPE_OBJ: return DATAPROC_T_OBJECT; + + case YYJSON_TYPE_NUM: + return yyjson_get_subtype(node) == YYJSON_SUBTYPE_REAL + ? DATAPROC_T_FLOAT + : DATAPROC_T_INT; + } +} + +span_t json_get_span(void *_node) { + yyjson_val *node = _node; + return (span_t){ .beg = node->beg, .end = node->end }; +} + +bool json_get_bool(void *node) { + return yyjson_get_bool(node); +} + +int json_get_int(void *node) { + return yyjson_get_int(node); +} + +int64_t json_get_i64(void *node) { + return yyjson_get_sint(node); +} + +uint64_t json_get_u64(void *node) { + return yyjson_get_uint(node); +} + +double json_get_float(void *node) { + return yyjson_get_real(node); +} + +const char* json_get_string(void *node) { + return yyjson_get_str(node); +} + +size_t json_get_length(void *node) { + return yyjson_get_len(node); +} + +void* json_get_element(void *array, size_t i) { + return yyjson_arr_get(array, i); +} + +void* json_get_member(void *object, const char *k) { + return yyjson_obj_get(object, k); +} diff --git a/tools/dataproc/lib/json.h b/tools/dataproc/lib/json.h new file mode 100644 index 0000000000..f0ea6bcfa2 --- /dev/null +++ b/tools/dataproc/lib/json.h @@ -0,0 +1,28 @@ +#ifndef DATAPROC_JSON_H +#define DATAPROC_JSON_H + +#include +#include +#include + +#include "dataproc.h" +#include "private.h" + +int json_read(datafile_t *dp); +void json_free(datafile_t *dp); + +void* json_get_root(void *ctx); +enum nodetype json_get_type(void *node); +span_t json_get_span(void *node); + +bool json_get_bool(void *node); +int json_get_int(void *node); +int64_t json_get_i64(void *node); +uint64_t json_get_u64(void *node); +double json_get_float(void *node); +const char* json_get_string(void *node); +size_t json_get_length(void *node); +void* json_get_element(void *array, size_t i); +void* json_get_member(void *object, const char *k); + +#endif // DATAPROC_JSON_H diff --git a/tools/dataproc/lib/private.h b/tools/dataproc/lib/private.h new file mode 100644 index 0000000000..e2f779d8d5 --- /dev/null +++ b/tools/dataproc/lib/private.h @@ -0,0 +1,26 @@ +#ifndef DATAPROC_PRIVATE_H +#define DATAPROC_PRIVATE_H + +#include + +#include "dataproc.h" + +typedef struct span span_t; +struct span { + size_t beg; + size_t end; +}; + +struct diagnostic { + span_t span; + const char *message; + + enum diaglevel level; + diagnostic_t *prev; + diagnostic_t *next; +}; + +__attribute__((format(printf, 4, 5))) +void dp_gerror(datafile_t *df, size_t beg, size_t end, const char *errfmt, ...); + +#endif // DATAPROC_PRIVATE_H diff --git a/tools/dataproc/meson.build b/tools/dataproc/meson.build new file mode 100644 index 0000000000..580f8ab147 --- /dev/null +++ b/tools/dataproc/meson.build @@ -0,0 +1,26 @@ +dataproc_cflags = [ + '-std=gnu17', + '-O3', + '-Wall', + '-Wextra', + '-Wpedantic', + '-Wconversion', + '-Wno-sign-conversion', +] + +dataproc_dep = declare_dependency( + include_directories: include_directories('lib/include'), + link_with: static_library( + 'dataproc', + sources: files( + 'lib/dataproc.c', + 'lib/json.c', + ), + + c_args: dataproc_cflags, + + include_directories: include_directories('lib/include'), + dependencies: dependency('yyjson'), + native: true, + ), +) diff --git a/tools/devtools/gen_compile_commands.py b/tools/devtools/gen_compile_commands.py index 26d397424e..6cde13a715 100755 --- a/tools/devtools/gen_compile_commands.py +++ b/tools/devtools/gen_compile_commands.py @@ -311,6 +311,31 @@ datagen_cpp_commands = [ for file in (homedir / "tools" / "datagen").rglob("*.cpp") ] +dataproc_c_commands = [ + { + "directory": builddir, + "arguments": [ + "gcc", + f"-I{homedir}/subprojects/yyjson-0.12.0/src", + f"-I{homedir}/tools/nitroarc/lib/include", + f"-I{homedir}/tools/dataproc/lib/include", + f"-I{homedir}/include", + f"-I{builddir}", + "-std=gnu17", + "-Wall", + "-Wextra", + "-Wpedantic", + "-Wconversion", + "-Wno-sign-conversion", + "-o", + file.with_suffix(".o"), + file.resolve(), + ], + "file": file.resolve(), + } + for file in (homedir / "tools" / "dataproc").rglob("*.c") +] + with open("compile_commands.json", "w") as ofp: json.dump( asm_commands @@ -322,7 +347,8 @@ with open("compile_commands.json", "w") as ofp: + ppwlobby_c_commands + c_commands + datagen_cpp_commands - + nitroarc_c_commands, + + nitroarc_c_commands + + dataproc_c_commands, ofp, default=str, indent=4, diff --git a/tools/meson.build b/tools/meson.build index 72a0e56b0d..3f55b68204 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -2,6 +2,7 @@ rapidjson_dep = dependency('rapidjson') # Native tools subdir('nitroarc') # Contains a library component that other tools depend on +subdir('dataproc') subdir('csv2bin') subdir('datagen') subdir('fixrom')