I want to assign stack alloced [num][size]u8 to [][]u8, but encountered memory problem, please help

system:
zig version 0.13.0 on Ubuntu 20.04.6 LTS

So, I want to have a variable of type [][]u8 as slice of strings which alloced on stack. Just to store some arbitrary strings.

Here is my code:

const std = @import("std");

fn makeSliceOfStrings(comptime num: usize, comptime size: usize) type {
    return struct {
        const Self = @This();

        buffer: [num * size + num * @sizeOf([]u8)]u8,
        data: [][]u8 = undefined,
        fba: std.heap.FixedBufferAllocator = undefined,
        allocator: std.mem.Allocator = undefined,

        pub fn new() !Self {
            var self = Self{ .buffer = .{0} ** (num * size + num * @sizeOf([]u8)) };
            std.debug.print("buffer addr: {*}\n", .{&self.buffer});
            std.debug.print("buffer: {any}\n", .{self.buffer});

            self.fba = std.heap.FixedBufferAllocator.init(&self.buffer);
            self.allocator = self.fba.allocator();

            self.data = try self.allocator.alloc([]u8, num);
            std.debug.print("data addr: {*}\n", .{self.data.ptr});

            for (self.data, 0..) |_, idx| {
                const datatmp = try self.allocator.alloc(u8, size);

                // std.debug.print("data[{d}]  data addr: {*}\n", .{ idx, datatmp.ptr });
                self.data[idx] = datatmp;
                std.debug.print("sos data[{d}] fatptr addr:{*} addr: {*} len:{d}\n", .{ idx, &(self.data[idx]), self.data[idx].ptr, self.data[idx].len });
            }

            std.debug.print("buffer 2: {any}\n", .{self.buffer});

            return self;
        }

        pub fn clear(self: *Self) void {
            std.debug.print("clear data addr: {*}\n", .{self.data.ptr});
            for (self.data, 0..) |_, idx| {
                std.debug.print("clear data[{d}] fatptr addr:{*} addr: {*} len:{d}\n", .{ idx, &(self.data[idx]), self.data[idx].ptr, self.data[idx].len });
                // @memset(self.data[idx], 0);
            }
            std.debug.print("buffer 3: {any}\n", .{self.buffer});
        }
    };
}

fn doSomething(_: [][]u8) void {}

pub fn main() !void {
    var sos = try makeSliceOfStrings(3, 32).new();
    sos.clear();

    std.debug.print("data len:{d} ptr:{*}\n", .{ sos.data.len, sos.data.ptr });

    for (sos.data, 0..) |s, idx| {
        std.debug.print("idx:{d}, addr: {*} len:{d}\n", .{ idx, s.ptr, s.len });
        std.debug.print("print buffer[{d}] fatptr addr:{*} addr: {*} len:{d}\n", .{ idx, &(sos.data[idx]), sos.data[idx].ptr, sos.data[idx].len });
        // std.debug.print("data:{s}\n", .{s});
    }

    doSomething(sos.data);
}

Here is the print:

buffer addr: [144]u8@7ffff60ef678
buffer: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
data addr: []u8@7ffff60ef678
sos data[0] fatptr addr:[]u8@7ffff60ef678 addr: u8@7ffff60ef6a8 len:32
sos data[1] fatptr addr:[]u8@7ffff60ef688 addr: u8@7ffff60ef6c8 len:32
sos data[2] fatptr addr:[]u8@7ffff60ef698 addr: u8@7ffff60ef6e8 len:32
buffer 2: { 168, 246, 14, 246, 255, 127, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 200, 246, 14, 246, 255, 127, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 232, 246, 14, 246, 255, 127, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170 }
clear data addr: []u8@7ffff60ef678
clear data[0] fatptr addr:[]u8@7ffff60ef678 addr: u8@7ffff60ef9cc len:1
clear data[1] fatptr addr:[]u8@7ffff60ef688 addr: u8@0 len:140737321563984
clear data[2] fatptr addr:[]u8@7ffff60ef698 addr: u8@105c134 len:1
buffer 3: { 168, 246, 14, 246, 255, 127, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 200, 246, 14, 246, 255, 127, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 232, 246, 14, 246, 255, 127, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170 }
data len:3 ptr:[]u8@7ffff60ef678
idx:0, addr: u8@20109cc3a len:140737321564872
print buffer[0] fatptr addr:[]u8@7ffff60ef678 addr: u8@1 len:16907863
idx:1, addr: u8@1 len:0
print buffer[1] fatptr addr:[]u8@7ffff60ef688 addr: u8@200000000 len:16907863
idx:2, addr: u8@3b len:57
print buffer[2] fatptr addr:[]u8@7ffff60ef698 addr: u8@1 len:1

In new every thing is good.
In clear and main data[idx].len and data[idx].ptr got weird.
Any idea why?

Once new returns, you got a dangling pointer to self.

1 Like

I see! thanks.

So, I have to make it a ‘two stage operation’ to actually put memory in caller’s stack. Is there any other way?

const std = @import("std");

pub fn makeSliceOfStrings(comptime num: usize, comptime size: usize) type {
    return struct {
        const Self = @This();

        buffer: [num * size + num * @sizeOf([]u8)]u8,
        data: [][]u8 = undefined,
        fba: std.heap.FixedBufferAllocator = undefined,
        allocator: std.mem.Allocator = undefined,

        pub fn new() Self {
            return Self{ .buffer = .{0} ** (num * size + num * @sizeOf([]u8)) };
        }

        pub fn init(self: *Self) !void {
            std.debug.print("buffer addr: {*}\n", .{&self.buffer});
            std.debug.print("buffer: {any}\n", .{self.buffer});

            self.fba = std.heap.FixedBufferAllocator.init(&self.buffer);
            self.allocator = self.fba.allocator();

            self.data = try self.allocator.alloc([]u8, num);
            std.debug.print("data addr: {*}\n", .{self.data.ptr});

            for (self.data, 0..) |_, idx| {
                const datatmp = try self.allocator.alloc(u8, size);

                // std.debug.print("data[{d}]  data addr: {*}\n", .{ idx, datatmp.ptr });
                self.data[idx] = datatmp;
                std.debug.print("sos data[{d}] fatptr addr:{*} addr: {*} len:{d}\n", .{ idx, &(self.data[idx]), self.data[idx].ptr, self.data[idx].len });
            }

            std.debug.print("buffer 2: {any}\n", .{self.buffer});
        }

        pub fn clear(self: *Self) void {
            std.debug.print("clear data addr: {*}\n", .{self.data.ptr});
            for (self.data, 0..) |_, idx| {
                std.debug.print("clear data[{d}] fatptr addr:{*} addr: {*} len:{d}\n", .{ idx, &(self.data[idx]), self.data[idx].ptr, self.data[idx].len });
                @memset(self.data[idx], 0);
            }
            std.debug.print("buffer 3: {any}\n", .{self.buffer});
        }
    };
}

fn doSomething(_: [][]u8) void {}

pub fn main() !void {
    var sos = makeSliceOfStrings(3, 32).new();
    try sos.init();
    sos.clear();

    std.debug.print("data len:{d} ptr:{*}\n", .{ sos.data.len, sos.data.ptr });

    for (sos.data, 0..) |s, idx| {
        std.debug.print("idx:{d}, addr: {*} len:{d}\n", .{ idx, s.ptr, s.len });
        std.debug.print("print buffer[{d}] fatptr addr:{*} addr: {*} len:{d}\n", .{ idx, &(sos.data[idx]), sos.data[idx].ptr, sos.data[idx].len });
        // std.debug.print("data:{s}\n", .{s});
    }

    doSomething(sos.data);
}

Sometimes I wish Zig had a way of doing something like this:


const Foo = struct {
    const capacity: usize = 256;

    buffer: [capacity]u8,
    len: usize,

    pub fn init(self: *@This()) *@This() {
        @memset(self.buffer[0..capacity], 0);
        return self;
    }
};

pub fn main() void {
    const buf = Foo{}.init();
}

E.g., making it easy to run init() on an uninitialized self. You can do this if all the fields have a default value, but setting defaults just for being able to initialize with a method feels wrong.

Unfortunately no, that is the only way. I mean, you could allocate self on the heap, which is what a lot of C libraries do, but that’s a huge cost to pay for something that should be trivial.
There’s a proposal for pinned structs, which should help detect errors like this.

I don’t understand why you store the FixedBufferAllocator and std.mem.Allocator as fields when they are only used in the new function, have you elided other code?
Otherwise I see no reason to keep those around.

Further, I don’t think that it is helpful to use the allocator interface here (unless you need it for some other reason not shown), I would just directly create the slices from the buffer manually.

To avoid the awkwardness of dangling pointers / two step initialization, you can avoid creating self-referential pointers by instead using indices, because those are relative they can’t become invalid.

Then you can add a function that converts the indices to an array of slices (if/when you really need it):

const std = @import("std");

fn ManyBuffer(comptime num: u32, comptime size: u32) type {
    return struct {
        const Self = @This();

        const Part = struct {
            start: u32,
            len: u32,
        };

        buffer: [num * size]u8,
        parts: [num]Part,

        pub fn init() Self {
            var self: Self = undefined;
            self.reset();
            return self;
        }

        pub fn reset(self: *Self) void {
            @memset(&self.buffer, 0);

            var start: u32 = 0;
            for (&self.parts) |*dest| {
                dest.* = .{ .start = start, .len = size };
                start += size;
            }
        }

        // slices can be called to create an array of slices when it is needed
        // (and where it is needed / you can put it somewhere on the stack),
        // this avoids doing unnecessary work (when we don't need slices) and self-referential pointers
        // in moving structs
        pub fn slices(self: *Self) [num][]u8 {
            var res: [num][]u8 = undefined;
            for (&res, self.parts) |*dest, part| dest.* = self.buffer[part.start..][0..part.len];
            return res;
        }

        pub fn resize(self: *Self, index: usize, len: usize) void {
            std.debug.assert(len <= size);
            self.parts[index].len = @intCast(len);
        }
    };
}

fn doSomething(_: []const []u8) void {}

pub fn main() !void {
    var mb = ManyBuffer(3, 32).init();

    for (mb.slices(), 0..) |s, idx| {
        const res = try std.fmt.bufPrint(s, "{d} element", .{idx});
        std.debug.print("res.len: {}\n", .{res.len});
        std.debug.print("idx:{d}, addr: {*} len:{d} contents: {s}\n", .{ idx, s.ptr, s.len, s });

        // now we can update part.len to res.len to store the true length of the used string
        mb.resize(idx, res.len);
    }

    // call slices again to get slices with used string size,
    // if we aren't interested in used length we would use the result of the first call
    const slices = mb.slices();
    std.debug.print("used length\n", .{});
    for (slices, 0..) |s, idx| {
        std.debug.print("idx:{d}, addr: {*} len:{d} contents: {s}\n", .{ idx, s.ptr, s.len, s });
    }
    doSomething(&slices);

    mb.reset();
    std.debug.print("reset\n", .{});
    for (mb.slices(), 0..) |s, idx| {
        std.debug.print("idx:{d}, addr: {*} len:{d} contents: {s}\n", .{ idx, s.ptr, s.len, s });
    }
}

So with such a stack data-structure I wouldn’t keep around the slices within fields, but just create them on the stack where I need them, so that they can be passed to functions that expect slices.

This has the additional benefit of allowing you to either track the capacity of the inidividual slices or the used-size.

2 Likes

Using &res to avoid local variable is never mutated, I didn’t know that.

Conversion between pointer slice and array is confusing to me. Is there any good read about this?

Thanks for your code, very refreshing to me.

It avoids that, but I use &res so that the res-array can be used in a for loop together with the pointer-capture *dest so that I can mutate the array element.

I am not sure about learning resources about this specifically, it took me a while to feel comfortable with the different pointer types and I learned bits and pieces from different sources and now find it hard to remember where I learned it exactly.

What specifically is confusing?
That may help with finding something that explains it.

If you haven’t done Ziglings yet, I would recommend that, it helped me get up to speed with the language quickly and learn lots of the details better.

Yes, I’ll read more code to wrap my mind aroud it.

Thanks for your help :smiley:

1 Like