JSON Parsing, suspicion of memory leak

ikarius · December 21, 2024, 11:44pm

Hi all,

I’m currently trying to parse a JSON file containing an array of CPU tests :

const std = @import("std");

// This file reads a serie of JSON files,
// each containing 1000 tests for a given SM83 instruction.
const CpuState = struct {
    pc: u16,
    sp: u16,
    a: u8,
    b: u8,
    c: u8,
    d: u8,
    e: u8,
    f: u8,
    h: u8,
    l: u8,
    ime: u8,
    ie: ?u8 = null,
    ram: [][2]u16,
};

const OpTest = struct {
    name: []const u8,
    initial: CpuState,
    final: CpuState,
};

const test_path = "./tests/sm83/v1";

fn parseJsonFile(allocator: std.mem.Allocator, filename: []const u8) !std.json.Parsed([]*OpTest) {
    var file = try std.fs.cwd().openFile(filename, .{});
    defer file.close();

    const contents = try file.readToEndAlloc(allocator, std.math.maxInt(usize));
    defer allocator.free(contents);

    std.debug.print("Content {s}\n", .{contents});

    const result = try std.json.parseFromSlice([]*OpTest, allocator, contents, .{
        .ignore_unknown_fields = true,
        .allocate = .alloc_always,
    });

    return result;
}

test "Open sample file (NOP)" {
    const allocator = std.testing.allocator;
    const result = try parseJsonFile(allocator, test_path ++ "/00.json");
    defer result.deinit();

    const opsuite = result.value;

    std.debug.print("opsuite: {any}\n", .{opsuite});
    std.debug.print("name: {s}\n", .{opsuite[0].name});
    std.debug.print("initial: {any}\n", .{opsuite[0].initial});
    std.debug.print("final: {any}\n", .{opsuite[0].final});
}

After a long serie of test and tweaks, I can finally run the test without errors.

But I’ve got a serious doubt on the .allocate = .alloc_always parsing option.

When removing it, of course some of the arrays are not correctly initialized.
But with this option activated, am I sure that the testing allocator is used for all initializations during the construction of OpTest and CpuState objects ?

Looking at the source, I’m not really sure, but I’m also a bit lost
If this is the case, the testing allocator should throw errors when a memory leak occurs, correct ?

Thanks in advance for your help.

dimdin · December 22, 2024, 12:44am

.alloc_always means allocate both strings for the key and the value of an object.
.alloc_if_needed means allocate string for the value of an object (the key is resolved to a struct field and there is no need for a key string allocation).

You can safely use .alloc_if_needed since the keys are OpTest fields.

An idea is to use ArenaAllocator and parseFromSliceLeaky.

ikarius · December 22, 2024, 10:18am

Thanks a lot for you reply, I’ll investigate further the use of ArenaAllocator.

However, if I switch to .alloc_if_needed, the test crashes when trying to access the .name field of a result OpTest:

name: thread 2155014 panic: reached unreachable code
/var/home/ikarius/.local/share/zig/lib/std/posix.zig:1223:23: 0x109666e in write (test)
            .FAULT => unreachable,
                      ^
/var/home/ikarius/.local/share/zig/lib/std/fs/File.zig:1281:23: 0x1055c7a in write (test)
    return posix.write(self.handle, bytes);
                      ^
/var/home/ikarius/.local/share/zig/lib/std/io.zig:360:27: 0x105064a in typeErasedWriteFn (test)
            return writeFn(ptr.*, bytes);
                          ^
/var/home/ikarius/.local/share/zig/lib/std/io/Writer.zig:13:24: 0x10aabab in write (test)
    return self.writeFn(self.context, bytes);
                       ^
/var/home/ikarius/.local/share/zig/lib/std/io/Writer.zig:19:32: 0x107fe83 in writeAll (test)
        index += try self.write(bytes[index..]);
                               ^
/var/home/ikarius/.local/share/zig/lib/std/fmt.zig:1043:28: 0x10af7dd in formatBuf__anon_9676 (test)
        try writer.writeAll(buf);
                           ^
/var/home/ikarius/.local/share/zig/lib/std/fmt.zig:637:37: 0x10968f9 in formatType__anon_9086 (test)
                    return formatBuf(value, options, writer);
                                    ^
/var/home/ikarius/.local/share/zig/lib/std/fmt.zig:185:23: 0x1055d7e in format__anon_5508 (test)
        try formatType(
                      ^
/var/home/ikarius/.local/share/zig/lib/std/io/Writer.zig:24:26: 0x10506d0 in print__anon_4087 (test)
    return std.fmt.format(self, format, args);
                         ^
/var/home/ikarius/.local/share/zig/lib/std/io.zig:324:47: 0x104ad50 in print__anon_3946 (test)
            return @errorCast(self.any().print(format, args));
                                              ^
/var/home/ikarius/Projects/zig/sm83/src/tests.zig:55:20: 0x1048e07 in test.Open sample file (NOP) (test)
    std.debug.print("name: {s}\n", .{opsuite[0].name});
                   ^
/var/home/ikarius/.local/share/zig/lib/compiler/test_runner.zig:157:25: 0x105c760 in mainTerminal (test)
        if (test_fn.func()) |_| {
                        ^
/var/home/ikarius/.local/share/zig/lib/compiler/test_runner.zig:37:28: 0x1050feb in main (test)
        return mainTerminal();
                           ^
/var/home/ikarius/.local/share/zig/lib/std/start.zig:514:22: 0x104b759 in posixCallMainAndExit (test)
            root.main();
                     ^
/var/home/ikarius/.local/share/zig/lib/std/start.zig:266:5: 0x104b2c1 in _start (test)
    asm volatile (switch (native_arch) {
    ^
???:?:?: 0x0 in ??? (???)
error: the following test command crashed:
/var/home/ikarius/Projects/zig/sm83/.zig-cache/o/c4a0e1ef5be27bc84713a2583dc897ac/test

dimdin · December 22, 2024, 10:49am

Oh! .alloc_if_needed reuses the json input buffer to hold data. But the buffer (contents) is freed before exiting parseJsonFile.
Your options are to either extend the buffer lifetime or keep the .alloca_always option.

ikarius · December 22, 2024, 11:03am

Thanks so much.
Didn’t pay much attention to contents (should have).
I’ve got a better understanding of what is going on now!