Code review: what am I leaking?

Hey y’all, I’d much appreciate anyone that could help me get sorted;

I’m trying to build out a “lookahead” iterator, which can take a child iterator and buffer some number of tokens ahead in order to allow lookahead checks. It’s currently built on the graphemes.GraphemeIterator from the zg project

currently, my tests pass, but I’m getting a “gpa memory leak” failure, which I don’t understand; I’m deinit’ing Self, which is iterator through the linked list and freeing the nodes, which are the only things being allocated by the testing allocator.

Any insights or other code review would be very much appreciated.

Thank you!

// LookaheadIterator.zig
// wrap a grapheme.GraphemeIterator, and buffer graphemes when requesting lookahead

const std = @import("std");
const grapheme = @import("grapheme");
const GraphemeBuffer = std.DoublyLinkedList(grapheme.Grapheme);

allocator: std.mem.Allocator,
buffer: GraphemeBuffer,
tapped: bool,
iterator: grapheme.Iterator,
grapheme_data: *const grapheme.GraphemeData,

const Self = @This();

pub fn init(input: []const u8, grapheme_data: *const grapheme.GraphemeData, alloc: std.mem.Allocator) Self {
    return .{
        .allocator = alloc,
        .buffer = GraphemeBuffer{},
        .tapped = false,
        .iterator = grapheme.Iterator.init(input, grapheme_data),
        .grapheme_data = grapheme_data,
    };
}

pub fn deinit(self: *Self) void {
    while (self.buffer.pop()) |node| {
        self.allocator.destroy(node);
    }
}

pub fn next(self: *Self) !?grapheme.Grapheme {
    if (self.buffer.pop()) |prev|
        return prev.data;

    return self.iterator.next();
}

pub fn peek(self: *Self) !?grapheme.Grapheme {
    return self.lookahead(0);
}

pub fn lookahead(self: *Self, index: usize) !?grapheme.Grapheme {
    try self.fill_upto(index);
    if (self.buffer.len < (index + 1))
        return null;

    var cursor: usize = 0;
    var node = self.buffer.first;
    while (node != null and cursor < index) : (cursor += 1) {
        node = node.?.next;
    }

    return node.?.data;
}

pub fn has_remaining(self: *Self, index: usize) !bool {
    return (try self.lookahead(index)) != null;
}

fn fill_upto(self: *Self, index: usize) !void {
    while (self.buffer.len < (index + 1)) {
        if (self.iterator.next()) |g| {
            const node = try self.allocator.create(GraphemeBuffer.Node);
            node.*.data = g;
            self.buffer.prepend(node);

            std.debug.print("added a node: {}\n", .{node.data});
            continue;
        }

        std.debug.print("iterator tapped\n", .{});
        self.tapped = true;
        break;
    }
}

test "basic lookahead tests" {
    const allocator = std.testing.allocator;

    const text = "Hoe\u{301}"; // Hoé

    const gd = try grapheme.GraphemeData.init(allocator);
    defer gd.deinit();

    var iterator = Self.init(text, &gd, allocator);
    defer iterator.deinit();

    var res = try iterator.next();
    var out = res.?.bytes(text);
    try std.testing.expect(std.mem.eql(u8, out, "H"));

    try std.testing.expect(try iterator.has_remaining(1));
    try std.testing.expect(!try iterator.has_remaining(2));

    res = try iterator.next();
    try std.testing.expect(res != null);

    out = res.?.bytes(text);
    try std.testing.expect(std.mem.eql(u8, out, "o"));

    res = try iterator.next();
    try std.testing.expect(res != null);

    out = res.?.bytes(text);
    try std.testing.expect(std.mem.eql(u8, out, "e\u{301}"));

    res = try iterator.next();
    try std.testing.expect(res == null);
}

The error trace, though I doubt it’s super useful:

 > zig build test                                                                                                                                                                          main code/ezra-zig
test
└─ run test 3/3 passed, 1 leaked
error: 'ezra.comp.LookaheadIterator.test.basic lookahead tests' leaked: added a node: grapheme.Grapheme{ .len = 1, .offset = 1 }
added a node: grapheme.Grapheme{ .len = 3, .offset = 2 }
iterator tapped
[gpa] (err): memory address 0x102b2c020 leaked:
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:61:51: 0x1028f84e3 in fill_upto (test)
            const node = try self.allocator.create(GraphemeBuffer.Node);
                                                  ^
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:41:23: 0x1028f8b53 in lookahead (test)
    try self.fill_upto(index);
                      ^
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:55:31: 0x1028f8def in has_remaining (test)
    return (try self.lookahead(index)) != null;
                              ^
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:90:54: 0x1028f9097 in test.basic lookahead tests (test)
    try std.testing.expect(try iterator.has_remaining(1));
                                                     ^
/Users/jonathan/.asdf/installs/zig/0.13.0/lib/compiler/test_runner.zig:95:29: 0x1028f2547 in mainServer (test)
                test_fn.func() catch |err| switch (err) {
                            ^
/Users/jonathan/.asdf/installs/zig/0.13.0/lib/compiler/test_runner.zig:35:26: 0x1028ecddf in main (test)
        return mainServer() catch @panic("internal test runner failure");
                         ^
/Users/jonathan/.asdf/installs/zig/0.13.0/lib/std/start.zig:514:22: 0x1028ec9bb in main (test)
            root.main();
                     ^
???:?:?: 0x1839760df in ??? (???)
???:?:?: 0x3706ffffffffffff in ??? (???)

[gpa] (err): memory address 0x102b2c040 leaked:
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:61:51: 0x1028f84e3 in fill_upto (test)
            const node = try self.allocator.create(GraphemeBuffer.Node);
                                                  ^
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:41:23: 0x1028f8b53 in lookahead (test)
    try self.fill_upto(index);
                      ^
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:55:31: 0x1028f8def in has_remaining (test)
    return (try self.lookahead(index)) != null;
                              ^
/Users/jonathan/local/code/ezra-zig/src/ezra/comp/LookaheadIterator.zig:90:54: 0x1028f9097 in test.basic lookahead tests (test)
    try std.testing.expect(try iterator.has_remaining(1));
                                                     ^
/Users/jonathan/.asdf/installs/zig/0.13.0/lib/compiler/test_runner.zig:95:29: 0x1028f2547 in mainServer (test)
                test_fn.func() catch |err| switch (err) {
                            ^
/Users/jonathan/.asdf/installs/zig/0.13.0/lib/compiler/test_runner.zig:35:26: 0x1028ecddf in main (test)
        return mainServer() catch @panic("internal test runner failure");
                         ^
/Users/jonathan/.asdf/installs/zig/0.13.0/lib/std/start.zig:514:22: 0x1028ec9bb in main (test)
            root.main();
                     ^
???:?:?: 0x1839760df in ??? (???)
???:?:?: 0x3706ffffffffffff in ??? (???)
error: while executing test 'ezra.comp.tokenizer.test_0', the following test command failed:
/Users/jonathan/local/code/ezra-zig/.zig-cache/o/831f844133ce8e8d58ba1b48bd663619/test --listen=-
Build Summary: 9/11 steps succeeded; 1 failed; 3/3 tests passed; 1 leaked (disable with --summary none)
test transitive failure
└─ run test 3/3 passed, 1 leaked
error: the following build command failed with exit code 1:
/Users/jonathan/local/code/ezra-zig/.zig-cache/o/ea88fbcbbec5961fae170f960a1212c0/build /Users/jonathan/.asdf/installs/zig/0.13.0/zig /Users/jonathan/local/code/ezra-zig /Users/jonathan/local/code/ezra-zig/.zig-cache /Users/jonathan/.cache/zig --seed 0xd5105711 -Zd1edc97e9aeef3aa test

Hey @lygaret, cool project :slight_smile:

I haven’t run your project myself yet, but there’s a potential conflict of interest between these two functions:

pub fn next(self: *Self) !?grapheme.Grapheme {
    if (self.buffer.pop()) |prev|
        return prev.data;

    return self.iterator.next();
}

deinit and next both can pop a value off the buffer. You could lose a node when you call next. You may need a “free list” that catches the nodes you throw away that you can use later and free if you end up not using them.


Edit: Let me loosely sketch something here…

pub fn next(self: *Self) !?grapheme.Grapheme {

    if (self.buffer.pop()) |prev| <-- we pop the node off the list
        return prev.data; <-- and then drop it on the floor

    return self.iterator.next();
}

Remember… these nodes were created here:

const node = try self.allocator.create(GraphemeBuffer.Node);

You can catch the node and cache it for later (probably what you want to do) or you can free the node. If you free the node, make sure you use a defer free and don’t accidentally free the data you’re trying to return first.


If the items are stable in memory (which they most likely are, we’re using an iterator), then I wouldn’t be afraid to just store pointers to them in a contiguous buffer. You’ll probably allocate less frequently by doing that and I think you’ll get the same stability because the items have to continue to exist to iterate to them.

That said, you can get similar behaviour by using a stack-based allocator (there’s a few in the standard to look through). I’d probably go with a fallback allocator if you’re going to take the linked-list approach.

5 Likes

Ack, it’s always staring me in the face :man_facepalming:, thank you so much for your eyes.

Re: stack based allocator, my original plan was to use the std.RingBuffer but it is u8 only afaict; I’ll keep digging, thank you!

2 Likes

If you’re looking for a more flexible stack based ring buffer, you can take a look at this one.

1 Like

Thank you, @Calder-Ty, I used that as a kick to realize that there’s not that much special about the std structures, and it’s probably likely we’ll add little utilities like this from time to time.

This is the ring queue (not really a buffer, it’s two sided) I landed on, written under study of that link and my problem; thank you!

const std = @import("std");

// _ _ _ _ _ _ _ _
// x _ _ _ _ _ _ _ push x
// y x _ _ _ _ _ _ push y
// z y x _ _ _ _ _ push z
// z y x _ _ _ _ _ lookahead_pop(0) = z, lookahead_pop(1) = y, lookahead_pop(2) = x
// z y x _ _ _ _ _ lookahead_pull(0) = x, lookahead_pull(1) = y, lookahead_pull(2) = z
// z y _ _ _ _ _ _ pull -> x
// y _ _ _ _ _ _ _ pop -> z
// _ _ _ _ _ _ _ _ pull -> y, pop -> y

pub fn RingQueue(comptime T: type, comptime capacity: usize) type {
    return struct {
        const Self = @This();

        len: usize = 0,
        buffer: [capacity]?T,
        read_head: usize = 0,
        write_head: usize = 0,

        pub fn init() Self {
            const buffer = [_]?T{null} ** capacity;
            return Self{
                .buffer = buffer,
            };
        }

        pub fn at_capacity(self: *Self) bool {
            return self.len == capacity;
        }

        fn advance_head(head: usize, amount: usize) usize {
            return @mod(head + amount, capacity);
        }

        fn rewind_head(head: usize, amount: usize) usize {
            std.debug.assert(amount <= capacity);

            // 0 1 2
            // x + n - k % n; adding n ensures we don't have negative mod
            // _ . . -> . . _ (0 + 3 - 1) % 3 = 2
            // . _ . -> _ . . (1 + 3 - 1) % 3 = 0
            // . . _ -> . _ . (2 + 3 - 1) % 3 = 1

            return @mod((head + capacity) - amount, capacity);
        }

        // put a new value at the write head, and advance it forward
        pub fn push(self: *Self, data: T) !void {
            if (self.len >= capacity)
                return error.at_capacity;

            self.buffer[self.write_head] = data;

            self.write_head = advance_head(self.write_head, 1);
            self.len += 1;
        }

        // return the value at the read head, clear the space, and advance forward
        pub fn pull(self: *Self) ?T {
            if (self.len == 0)
                return null;

            const data = self.buffer[self.read_head];

            self.buffer[self.read_head] = null;
            self.read_head = advance_head(self.read_head, 1);
            self.len -= 1;

            return data;
        }

        // return the value at the read head + index, assuming it's within size
        pub fn pull_lookahead(self: *Self, amount: usize) ?T {
            if (amount > self.len)
                return null;

            const index = advance_head(self.read_head, amount);
            return self.buffer[index];
        }

        // rewind the write head, return that value, and clear it from the write_head
        pub fn pop(self: *Self) ?T {
            if (self.len == 0)
                return null;

            self.write_head = rewind_head(self.write_head, 1);
            const data = self.buffer[self.write_head];

            self.buffer[self.write_head] = null;
            self.len -= 1;

            return data;
        }

        // return the value just written at the write head - index, assuming it's within size
        pub fn pop_lookahead(self: *Self, amount: usize) ?T {
            if (amount > self.len)
                return null;

            const index = rewind_head(self.write_head, amount + 1);
            return self.buffer[index];
        }
    };
}

test "can push/pull" {
    const RQU32 = RingQueue(u8, 6);
    var buffer = RQU32.init();

    try buffer.push('1');
    try buffer.push('2');
    try buffer.push('3');
    try buffer.push('4');

    try std.testing.expect(buffer.pull() == '1');
    try std.testing.expect(buffer.pull() == '2');
    try std.testing.expect(buffer.pull() == '3');
    try std.testing.expect(buffer.pull() == '4');
    try std.testing.expect(buffer.pull() == null);
}

test "can push/pop" {
    const RQU32 = RingQueue(u8, 6);
    var buffer = RQU32.init();

    try buffer.push('1');
    try buffer.push('2');
    try buffer.push('3');
    try buffer.push('4');

    try std.testing.expect(buffer.pop() == '4');
    try std.testing.expect(buffer.pop() == '3');
    try std.testing.expect(buffer.pop() == '2');
    try std.testing.expect(buffer.pop() == '1');
    try std.testing.expect(buffer.pop() == null);
}

test "can't push past end" {
    const RQU32 = RingQueue(u8, 3);
    var buffer = RQU32.init();

    try buffer.push('1');
    try buffer.push('2');
    try buffer.push('3');
    if (buffer.push('4')) {
        try std.testing.expect(false); // should have thrown
    } else |err| {
        try std.testing.expect(err == error.at_capacity);
    }
}

test "can initialize" {
    const RQU32 = RingQueue(u8, 6);
    var buffer = RQU32.init();

    // x _ _ _ _ _ push x
    // x y _ _ _ _ push y
    // x y z _ _ _ push z
    // x y z a _ _ push a
    // x y z a b _ push b
    // x y z a _ _ pop b
    // x y z a b _ push b
    // x y z a b c push c, at_capacity() -> true
    // x y z a b c push _, error -> at_capacity
    // _ y z a b c pull -> x
    // _ _ z a b c pull -> y
    // q _ z a b c push q
    // q v z a b c push v
    // q v z a b c pull -> z

    try buffer.push('x');
    try buffer.push('y');

    try std.testing.expect(buffer.pull_lookahead(0) == 'x');
    try std.testing.expect(buffer.pull_lookahead(1) == 'y');
    try std.testing.expect(buffer.pull_lookahead(2) == null);

    try std.testing.expect(buffer.pop_lookahead(0) == 'y');
    try std.testing.expect(buffer.pop_lookahead(1) == 'x');
    try std.testing.expect(buffer.pop_lookahead(2) == null);

    try buffer.push('z');
    try buffer.push('a');
    try buffer.push('b');

    try std.testing.expect(buffer.at_capacity() == false);
    try std.testing.expect(buffer.len == 5);

    try std.testing.expect(buffer.pop() == 'b');
    try std.testing.expect(buffer.len == 4);

    try std.testing.expect(buffer.pull() == 'x');
    try std.testing.expect(buffer.len == 3);

    try buffer.push('a');
    try buffer.push('b');
    try buffer.push('c');

    try std.testing.expect(buffer.at_capacity());

    if (buffer.push('_')) {
        try std.testing.expect(false);
    } else |err| {
        try std.testing.expect(err == error.at_capacity);
    }

    try std.testing.expect(buffer.pull() == 'y');
    try std.testing.expect(buffer.pull() == 'z');

    try std.testing.expect(buffer.at_capacity() == false);
    try std.testing.expect(buffer.len == 4);

    try buffer.push('q');
    try std.testing.expect(buffer.at_capacity() == false);
    try std.testing.expect(buffer.len == 5);

    try buffer.push('v');
    try std.testing.expect(buffer.at_capacity() == true);
    try std.testing.expect(buffer.len == 6);

    try std.testing.expect(buffer.pull() == 'a');
    try std.testing.expect(buffer.pull() == 'a');
    try std.testing.expect(buffer.pull() == 'b');
    try std.testing.expect(buffer.pull() == 'c');
    try std.testing.expect(buffer.pull() == 'q');
    try std.testing.expect(buffer.pull() == 'v');

    try std.testing.expect(buffer.len == 0);
    try std.testing.expect(buffer.pull() == null);
    try std.testing.expect(buffer.pop() == null);
}
2 Likes