diff --git a/lib/struct.c b/lib/struct.c index ad0bc207..243d51b9 100644 --- a/lib/struct.c +++ b/lib/struct.c @@ -372,6 +372,7 @@ #include "ucode/vallist.h" static uc_resource_type_t *struct_type; +static uc_resource_type_t *fmtbuf_type; typedef struct formatdef { char format; @@ -395,6 +396,13 @@ typedef struct { formatcode_t codes[]; } formatstate_t; +typedef struct { + uc_resource_t resource; + size_t length; + size_t capacity; + size_t position; +} formatbuffer_t; + /* Define various structs to figure out the alignments of types */ @@ -2474,12 +2482,56 @@ parse_format(uc_vm_t *vm, uc_value_t *fmtval) return NULL; } -static uc_value_t * -uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) +static bool +grow_buffer(uc_vm_t *vm, void **buf, size_t *bufsz, size_t length) { - size_t ncode, arg, off; + const size_t overhead = sizeof(uc_string_t) + 1; + + if (length > *bufsz) { + size_t old_size = *bufsz; + size_t new_size = length; + + if (*buf != NULL) { + new_size = *bufsz; + + while (length > new_size) { + if (new_size > SIZE_MAX - (new_size >> 1)) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Overflow reallocating buffer from %zu to %zu bytes", + *bufsz, length); + + return false; + } + + new_size += (new_size >> 1); + } + } + + char *tmp = realloc(*buf, new_size + overhead); + + if (!tmp) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Error reallocating buffer to %zu+%zu bytes: %m", + new_size, overhead); + + return false; + } + + memset(tmp + overhead + old_size - 1, 0, new_size - old_size + 1); + + *buf = tmp; + *bufsz = new_size; + } + + return true; +} + +static bool +uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff, + void **buf, size_t *pos, size_t *capacity) +{ + size_t ncode, arg, off, new_pos; formatcode_t *code; - uc_string_t *buf; ssize_t size, n; const void *p; @@ -2504,16 +2556,16 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) } } - buf = xalloc(sizeof(*buf) + state->size + off + 1); - buf->header.type = UC_STRING; - buf->header.refcount = 1; - buf->length = state->size + off; + new_pos = *pos + state->size + off; + + if (!grow_buffer(vm, buf, capacity, new_pos)) + return NULL; for (ncode = 0, code = &state->codes[0], off = 0; ncode < state->ncodes; code = &state->codes[++ncode]) { const formatdef_t *e = code->fmtdef; - char *res = buf->str + code->offset + off; + char *res = *buf + sizeof(uc_string_t) + *pos + code->offset + off; ssize_t j = code->repeat; while (j--) { @@ -2526,7 +2578,7 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument for '*' must be a string"); - goto err; + return false; } n = ucv_string_length(v); @@ -2547,7 +2599,7 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument for 's' must be a string"); - goto err; + return false; } n = ucv_string_length(v); @@ -2564,7 +2616,7 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument for 'p' must be a string"); - goto err; + return false; } n = ucv_string_length(v); @@ -2583,61 +2635,32 @@ uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) } else { if (!e->pack(vm, res, v, e)) - goto err; + return false; } res += size; } } - return &buf->header; + *pos = new_pos; -err: - free(buf); - - return NULL; + return true; } static uc_value_t * -uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) +uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, + const char *buf, long long pos, size_t *rem) { - uc_value_t *bufval = uc_fn_arg(argoff); - uc_value_t *offset = uc_fn_arg(argoff + 1); - const char *startfrom = NULL; - ssize_t bufrem, size, n; uc_value_t *result; formatcode_t *code; size_t ncode, off; + ssize_t size, n; - if (ucv_type(bufval) != UC_STRING) { - uc_vm_raise_exception(vm, EXCEPTION_TYPE, - "Buffer value not a string"); + if (pos < 0) + pos += *rem; + if (pos < 0 || (size_t)pos >= *rem) return NULL; - } - - startfrom = ucv_string_get(bufval); - bufrem = ucv_string_length(bufval); - - if (offset) { - if (ucv_type(offset) != UC_INTEGER) { - uc_vm_raise_exception(vm, EXCEPTION_TYPE, - "Offset value not an integer"); - - return NULL; - } - - n = (ssize_t)ucv_int64_get(offset); - - if (n < 0) - n += bufrem; - - if (n < 0 || n >= bufrem) - return NULL; - - startfrom += n; - bufrem -= n; - } result = ucv_array_new(vm); @@ -2645,7 +2668,7 @@ uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) ncode < state->ncodes; code = &state->codes[++ncode]) { const formatdef_t *e = code->fmtdef; - const char *res = startfrom + code->offset + off; + const char *res = buf + code->offset + off; ssize_t j = code->repeat; while (j--) { @@ -2654,12 +2677,12 @@ uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) size = code->size; if (e->format == '*') { - if (size == -1 || size > bufrem) - size = bufrem; + if (size == -1 || (size_t)size > *rem) + size = *rem; off += size; } - else if (size > bufrem) { + else if (size >= 0 && (size_t)size > *rem) { goto fail; } @@ -2684,7 +2707,7 @@ uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) ucv_array_push(result, v); res += size; - bufrem -= size; + *rem -= size; } } @@ -2728,7 +2751,8 @@ static uc_value_t * uc_pack(uc_vm_t *vm, size_t nargs) { uc_value_t *fmtval = uc_fn_arg(0); - uc_value_t *res = NULL; + size_t pos = 0, capacity = 0; + uc_string_t *us = NULL; formatstate_t *state; state = parse_format(vm, fmtval); @@ -2736,11 +2760,20 @@ uc_pack(uc_vm_t *vm, size_t nargs) if (!state) return NULL; - res = uc_pack_common(vm, nargs, state, 1); + if (!uc_pack_common(vm, nargs, state, 1, (void **)&us, &pos, &capacity)) { + free(state); + free(us); + + return NULL; + } free(state); - return res; + us->header.type = UC_STRING; + us->header.refcount = 1; + us->length = pos; + + return &us->header; } /** @@ -2780,15 +2813,32 @@ static uc_value_t * uc_unpack(uc_vm_t *vm, size_t nargs) { uc_value_t *fmtval = uc_fn_arg(0); + uc_value_t *bufval = uc_fn_arg(1); + uc_value_t *offset = uc_fn_arg(2); uc_value_t *res = NULL; formatstate_t *state; + long long pos = 0; + size_t rem; + char *buf; + + if (ucv_type(bufval) != UC_STRING) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Buffer value not a string"); + + return NULL; + } + + if (offset && !ucv_as_longlong(vm, offset, &pos)) + return NULL; state = parse_format(vm, fmtval); if (!state) return NULL; - res = uc_unpack_common(vm, nargs, state, 1); + buf = ucv_string_get(bufval); + rem = ucv_string_length(bufval); + res = uc_unpack_common(vm, nargs, state, buf, pos, &rem); free(state); @@ -2851,14 +2901,6 @@ uc_struct_new(uc_vm_t *vm, size_t nargs) return uc_resource_new(struct_type, state); } -static void -uc_struct_gc(void *ud) -{ - formatstate_t *state = ud; - - free(state); -} - /** * Pack given values. * @@ -2884,12 +2926,24 @@ uc_struct_gc(void *ud) static uc_value_t * uc_struct_pack(uc_vm_t *vm, size_t nargs) { - formatstate_t **state = uc_fn_this("struct"); + formatstate_t **state = uc_fn_this("struct.format"); + size_t pos = 0, capacity = 0; + uc_string_t *us = NULL; if (!state || !*state) return NULL; - return uc_pack_common(vm, nargs, *state, 0); + if (!uc_pack_common(vm, nargs, *state, 0, (void **)&us, &pos, &capacity)) { + free(us); + + return NULL; + } + + us->header.type = UC_STRING; + us->header.refcount = 1; + us->length = pos; + + return &us->header; } /** @@ -2923,12 +2977,431 @@ uc_struct_pack(uc_vm_t *vm, size_t nargs) static uc_value_t * uc_struct_unpack(uc_vm_t *vm, size_t nargs) { - formatstate_t **state = uc_fn_this("struct"); + formatstate_t **state = uc_fn_this("struct.format"); + uc_value_t *bufval = uc_fn_arg(0); + uc_value_t *offset = uc_fn_arg(1); + long long pos = 0; + size_t rem; + char *buf; if (!state || !*state) return NULL; - return uc_unpack_common(vm, nargs, *state, 0); + if (ucv_type(bufval) != UC_STRING) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Buffer value not a string"); + + return NULL; + } + + if (offset && !ucv_as_longlong(vm, offset, &pos)) + return NULL; + + buf = ucv_string_get(bufval); + rem = ucv_string_length(bufval); + + return uc_unpack_common(vm, nargs, *state, buf, pos, &rem); +} + + +/** + * Represents a struct buffer instance created by `buffer()`. + * + * @class module:struct.buffer + * @hideconstructor + * + * @see {@link module:struct#buffer|buffer()} + * + * @example + * + * const buf = struct.buffer(); + * + * buf.put('I', 12345); + * + * const value = buf.get('I'); + */ + +/** + * Creates a new struct buffer instance. + * + * The `buffer()` function creates a new struct buffer object that can be used + * for incremental packing and unpacking of binary data. If an initial data + * string is provided, the buffer is initialized with this content. + * + * Note that even when initial data is provided, the buffer position is always + * set to zero. This design assumes that the primary intent when initializing + * a buffer with data is to read (unpack) from the beginning. If you want to + * append data to a pre-initialized buffer, you need to explicitly move the + * position to the end, either by calling `end()` or by setting the position + * manually with `pos()`. + * + * Returns a new struct buffer instance. + * + * @function module:struct#buffer + * + * @param {string} [initialData] + * Optional initial data to populate the buffer with. + * + * @returns {module:struct.buffer} + * + * @example + * // Create an empty buffer + * const emptyBuf = struct.buffer(); + * + * // Create a buffer with initial data + * const dataBuf = struct.buffer("\x01\x02\x03\x04"); + * + * // Read from the beginning of the initialized buffer + * const value = dataBuf.get('I'); + * + * // Append data to the initialized buffer + * dataBuf.end().put('I', 5678); + * + * // Alternative chained syntax for initializing and appending + * const buf = struct.buffer("\x01\x02\x03\x04").end().put('I', 5678); + */ +static uc_value_t * +uc_fmtbuf_new(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = xalloc(sizeof(*buffer)); + uc_value_t *init_data = uc_fn_arg(0); + + buffer->resource.header.type = UC_RESOURCE; + buffer->resource.header.refcount = 1; + buffer->resource.type = fmtbuf_type; + + if (ucv_type(init_data) == UC_STRING) { + char *buf = ucv_string_get(init_data); + size_t len = ucv_string_length(init_data); + + if (!grow_buffer(vm, &buffer->resource.data, &buffer->capacity, len)) { + free(buffer); + + return NULL; + } + + buffer->length = len; + memcpy((char *)buffer->resource.data + sizeof(uc_string_t), buf, len); + } + + return &buffer->resource.header; +} + +static formatbuffer_t * +formatbuffer_ctx(uc_vm_t *vm) +{ + uc_value_t *ctx = vm->callframes.entries[vm->callframes.count - 1].ctx; + + if (ucv_type(ctx) != UC_RESOURCE) + return NULL; + + uc_resource_t *res = (uc_resource_t *)ctx; + + if (res->type != fmtbuf_type) + return NULL; + + return (formatbuffer_t *)res; +} + +/** + * Get or set the current position in the buffer. + * + * If called without arguments, returns the current position. + * If called with a position argument, sets the current position to that value. + * + * @function module:struct.buffer.instance#pos + * + * @param {number} [position] + * The position to set. If omitted, the current position is returned. + * + * @returns {number|module:struct.buffer.instance} + * If called without arguments, returns the current position. + * If called with a position argument, returns the buffer instance for chaining. + * + * @example + * const currentPos = buf.pos(); + * buf.pos(10); // Set position to 10 + */ +static uc_value_t * +uc_fmtbuf_pos(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = formatbuffer_ctx(vm); + uc_value_t *new_pos = uc_fn_arg(0); + + if (!buffer) + return NULL; + + if (new_pos) { + long long pos; + + if (!ucv_as_longlong(vm, new_pos, &pos)) + return NULL; + + if (pos < 0) pos += buffer->length; + if (pos < 0) pos = 0; + + if (!grow_buffer(vm, &buffer->resource.data, &buffer->capacity, pos)) + return NULL; + + buffer->position = pos; + + if (buffer->position > buffer->length) + buffer->length = buffer->position; + + return ucv_get(&buffer->resource.header); + } + + return ucv_uint64_new(buffer->position); +} + +/** + * Set the buffer position to the start (0). + * + * @function module:struct.buffer#start + * + * @returns {module:struct.buffer} + * The buffer instance. + * + * @example + * buf.start(); + */ +static uc_value_t * +uc_fmtbuf_start(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = formatbuffer_ctx(vm); + + if (!buffer) + return NULL; + + buffer->position = 0; + + return ucv_get(&buffer->resource.header); +} + +/** + * Set the buffer position to the end. + * + * @function module:struct.buffer#end + * + * @returns {module:struct.buffer} + * The buffer instance. + * + * @example + * buf.end(); + */ +static uc_value_t * +uc_fmtbuf_end(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = formatbuffer_ctx(vm); + + if (!buffer) + return NULL; + + buffer->position = buffer->length; + + return ucv_get(&buffer->resource.header); +} + +/** + * Pack data into the buffer at the current position. + * + * The `put()` function packs the given values into the buffer according to + * the specified format string, starting at the current buffer position. + * The format string follows the same syntax as used in `struct.pack()`. + * + * For a detailed explanation of the format string syntax, refer to the + * ["Format Strings" section]{@link module:struct} in the module + * documentation. + * + * @function module:struct.buffer#put + * + * @param {string} format + * The format string specifying how to pack the data. + * + * @param {...*} values + * The values to pack into the buffer. + * + * @returns {module:struct.buffer} + * The buffer instance. + * + * @see {@link module:struct#pack|struct.pack()} + * + * @example + * buf.put('II', 1234, 5678); + */ +static uc_value_t * +uc_fmtbuf_put(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = formatbuffer_ctx(vm); + uc_value_t *fmt = uc_fn_arg(0); + formatstate_t *state; + bool res; + + if (!buffer) + return NULL; + + state = parse_format(vm, fmt); + + if (!state) + return NULL; + + res = uc_pack_common(vm, nargs, state, 1, + &buffer->resource.data, &buffer->position, &buffer->capacity); + + free(state); + + if (!res) + return NULL; + + if (buffer->position > buffer->length) + buffer->length = buffer->position; + + return ucv_get(&buffer->resource.header); +} + +/** + * Unpack data from the buffer at the current position. + * + * The `get()` function unpacks data from the buffer according to the + * specified format string, starting at the current buffer position. + * The format string follows the same syntax as used in `struct.unpack()`. + * + * For a detailed explanation of the format string syntax, refer to the + * ["Format Strings" section]{@link module:struct} in the module documentation. + * + * @function module:struct.buffer#get + * + * @param {string} format + * The format string specifying how to unpack the data. + * + * @returns {array} + * An array containing the unpacked values. + * + * @see {@link module:struct#unpack|struct.unpack()} + * + * @example + * const values = buf.get('II'); + */ +static uc_value_t * +uc_fmtbuf_get(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = formatbuffer_ctx(vm); + uc_value_t *fmt = uc_fn_arg(0); + formatstate_t *state; + uc_value_t *result; + size_t rem; + char *buf; + + if (!buffer) + return NULL; + + state = parse_format(vm, fmt); + + if (!state) + return NULL; + + rem = buffer->capacity - buffer->position; + buf = (char *)buffer->resource.data + sizeof(uc_string_t); + + result = uc_unpack_common(vm, nargs, state, buf, buffer->position, &rem); + + if (result) + buffer->position = buffer->capacity - rem; + + free(state); + + return result; +} + +/** + * Extract a slice of the buffer content. + * + * The `slice()` function returns a substring of the buffer content + * between the specified start and end positions. + * + * @function module:struct.buffer#slice + * + * @param {number} [start=0] + * The starting position of the slice. + * + * @param {number} [end=buffer.length] + * The ending position of the slice. + * + * @returns {string} + * A string containing the specified slice of the buffer content. + * + * @example + * const slice = buf.slice(4, 8); + */ +static uc_value_t * +uc_fmtbuf_slice(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = formatbuffer_ctx(vm); + uc_value_t *from = uc_fn_arg(0); + uc_value_t *to = uc_fn_arg(1); + long long spos, epos; + char *buf; + + if (!buffer) + return NULL; + + spos = 0; + epos = buffer->length; + + if (from && !ucv_as_longlong(vm, from, &spos)) + return NULL; + + if (to && !ucv_as_longlong(vm, to, &epos)) + return NULL; + + if (spos < 0) spos += buffer->length; + if (spos < 0) spos = 0; + + if (epos < 0) epos += buffer->length; + if (epos < spos) epos = spos; + + buf = (char *)buffer->resource.data + sizeof(uc_string_t) + spos; + + return ucv_string_new_length(buf, epos + 1 - spos); +} + +/** + * Extract and remove all content from the buffer. + * + * The `pull()` function returns all content of the buffer as a string + * and resets the buffer to an empty state. + * + * @function module:struct.buffer#pull + * + * @returns {string} + * A string containing all the buffer content. + * + * @example + * const allData = buf.pull(); + */ +static uc_value_t * +uc_fmtbuf_pull(uc_vm_t *vm, size_t nargs) +{ + formatbuffer_t *buffer = formatbuffer_ctx(vm); + uc_string_t *us; + + if (!buffer) + return NULL; + + if (!buffer->resource.data) + return ucv_string_new_length("", 0); + + us = buffer->resource.data; + us->header.type = UC_STRING; + us->header.refcount = 1; + us->length = buffer->length; + + buffer->resource.data = NULL; + buffer->capacity = 0; + buffer->position = 0; + buffer->length = 0; + + return ucv_get(&us->header); } @@ -2937,10 +3410,21 @@ static const uc_function_list_t struct_inst_fns[] = { { "unpack", uc_struct_unpack } }; +static const uc_function_list_t buffer_inst_fns[] = { + { "pos", uc_fmtbuf_pos }, + { "start", uc_fmtbuf_start }, + { "end", uc_fmtbuf_end }, + { "put", uc_fmtbuf_put }, + { "get", uc_fmtbuf_get }, + { "slice", uc_fmtbuf_slice }, + { "pull", uc_fmtbuf_pull }, +}; + static const uc_function_list_t struct_fns[] = { { "pack", uc_pack }, { "unpack", uc_unpack }, - { "new", uc_struct_new } + { "new", uc_struct_new }, + { "buffer", uc_fmtbuf_new } }; void uc_module_init(uc_vm_t *vm, uc_value_t *scope) @@ -2949,5 +3433,6 @@ void uc_module_init(uc_vm_t *vm, uc_value_t *scope) uc_function_list_register(scope, struct_fns); - struct_type = uc_type_declare(vm, "struct", struct_inst_fns, uc_struct_gc); + struct_type = uc_type_declare(vm, "struct.format", struct_inst_fns, free); + fmtbuf_type = uc_type_declare(vm, "struct.buffer", buffer_inst_fns, free); }