Proper usage of explicit pointers

gonzo · March 2, 2024, 11:24am

Please, I need some sanity checks here. This Zig behaviour surprised me, after using zig for a couple of years. I tried to distil it into a simple test:

const std = @import("std");
const testing = std.testing;

const Foo = struct {
    id: usize,
    value: usize,
    other: ?*Foo,

    pub fn init(id: usize) Foo {
        const self = Foo{
            .id = id,
            .value = 0,
            .other = null,
        };
        std.debug.print("CREATE Foo id {} (other: {*})\n", .{ self.id, self.other });
        return self;
    }

    pub fn deinit(self: *Foo) void {
        std.debug.print("DESTROY Foo id {} (other: {*}), value={}\n", .{ self.id, self.other, self.value });
    }

    pub fn show(self: Foo) void {
        std.debug.print("Foo id {} (other: {*}), value={}\n", .{ self.id, self.other, self.value });
    }

    pub fn setValue(self: *Foo, value: usize) void {
        self.value = value;
    }

    pub fn setOther(self: *Foo, other: *Foo) void {
        self.other = other;
    }
};

const Bar = struct {
    const SIZE = 3;

    id: usize,
    foos: [SIZE]Foo,

    pub fn init(id: usize) Bar {
        var self = Bar{ .id = id, .foos = undefined };
        for (&self.foos, 0..) |_, p| {
            self.foos[p] = Foo.init(p);
        }
        for (&self.foos, 0..) |_, p| {
            const o = (p + 1) % SIZE;
            std.debug.print("OTHER for {} at {*} is {} at {*}\n", .{ p, &self.foos[p], o, &self.foos[o] });
            self.foos[p].setOther(&self.foos[o]);
        }
        return self;
    }

    pub fn deinit(self: *Bar) void {
        for (&self.foos, 0..) |_, p| {
            self.foos[p].deinit();
        }
    }

    pub fn show(self: Bar) void {
        std.debug.print("Bar id {}\n", .{self.id});
        for (&self.foos, 0..) |_, p| {
            std.debug.print("  {} {*}: ", .{ p, &self.foos[p] });
            self.foos[p].show();
        }
    }

    pub fn populate(self: *Bar) void {
        for (&self.foos, 0..) |_, p| {
            self.foos[p].setValue(p * 10);
        }
    }

    pub fn cycle(self: *Bar) void {
        for (&self.foos, 0..) |_, p| {
            self.foos[p].other.?.*.setValue(p * 100);
        }
    }
};

test "Foo" {
    std.debug.print("\n", .{});
    const id = 11;
    var foo = Foo.init(id);
    defer foo.deinit();
    try testing.expectEqual(foo.id, id);
    foo.show();
}

test "Bar" {
    std.debug.print("\n", .{});
    const id = 11;
    var bar = Bar.init(id);
    defer bar.deinit();
    try testing.expectEqual(bar.id, id);
    bar.show();
    bar.populate();
    bar.show();
    bar.cycle();
    bar.show();
}

This is the output I get:

$ zig test memory.zig   
Test [1/2] test.Foo... 
CREATE Foo id 11 (other: *memory.Foo@0)
Foo id 11 (other: *memory.Foo@0), value=0
DESTROY Foo id 11 (other: *memory.Foo@0), value=0
Test [2/2] test.Bar... 
CREATE Foo id 0 (other: *memory.Foo@0)
CREATE Foo id 1 (other: *memory.Foo@0)
CREATE Foo id 2 (other: *memory.Foo@0)
OTHER for 0 at memory.Foo@16ba65fd8 is 1 at memory.Foo@16ba65ff0
OTHER for 1 at memory.Foo@16ba65ff0 is 2 at memory.Foo@16ba66008
OTHER for 2 at memory.Foo@16ba66008 is 0 at memory.Foo@16ba65fd8
Bar id 11
  0 memory.Foo@16ba66010: Foo id 0 (other: *memory.Foo@16ba65ff0), value=0
  1 memory.Foo@16ba66028: Foo id 1 (other: *memory.Foo@16ba66008), value=0
  2 memory.Foo@16ba66040: Foo id 2 (other: *memory.Foo@16ba65fd8), value=0
Bar id 11
  0 memory.Foo@16ba66010: Foo id 0 (other: *memory.Foo@16ba65ff0), value=0
  1 memory.Foo@16ba66028: Foo id 1 (other: *memory.Foo@16ba66008), value=10
  2 memory.Foo@16ba66040: Foo id 2 (other: *memory.Foo@16ba65fd8), value=20
Bar id 11
  0 memory.Foo@16ba66010: Foo id 0 (other: *memory.Foo@16ba65ff0), value=0
  1 memory.Foo@16ba66028: Foo id 1 (other: *memory.Foo@16ba66008), value=10
  2 memory.Foo@16ba66040: Foo id 2 (other: *memory.Foo@16ba65fd8), value=20
DESTROY Foo id 0 (other: *memory.Foo@16ba65ff0), value=0
DESTROY Foo id 1 (other: *memory.Foo@16ba66008), value=10
DESTROY Foo id 2 (other: *memory.Foo@16ba65fd8), value=20
All 2 tests passed.

These are the things that surprised me:

The addresses for other don’t seem to have a stable value.
The changes made through other when calling bar.cycle() don’t seem to have any effect, or happened in a place I am not looking at…

This all seems quite basic, but I will confess I am flabbergasted… Please help restore my sanity.

dude_the_builder · March 2, 2024, 11:49am

Not 100% sure but I think this is a bug. Since Bar.foos is an array, it’s created on the stack and any references to it or its items will be invalid once Bar.init returns.

dee0xeed · March 2, 2024, 12:05pm

Looks like yet another example for this doc.

gonzo · March 2, 2024, 12:12pm

I think you probably are onto something @dude_the_builder – if I move the code to set the other pointers to a separate function:

pub fn fixup(self: *Bar) void {
    for (&self.foos, 0..) |_, p| {
        const o = (p + 1) % SIZE;
        std.debug.print("OTHER for {} at {*} is {} at {*}\n", .{ p, &self.foos[p], o, &self.foos[o] });
        self.foos[p].setOther(&self.foos[o]);
    }
}

and call it separately, after the call to Bar.init(), it all works.

gonzo · March 2, 2024, 12:13pm

I agree. I think the idea of zig’s arrays being value objects is still not well settled in my brain – too much C in there…

dee0xeed · March 2, 2024, 12:22pm

It may be useful to look at a value of a pointer.
Entities on stack have “large” addresses, while entities on heap “small” ones:

const std = @import("std");
const log = std.debug.print;

const Foo  = struct {
    a: usize = 0,
    b: usize = 0,
};

pub fn main() !void {
    const a = std.heap.c_allocator;
    var foo_on_stak = Foo{};
    var foo_on_heap = try a.create(Foo);

    log("addr of a foo on stak is {*}\n", .{&foo_on_stak});
    log("addr of a foo on heap is {*}\n", .{ foo_on_heap});
}

addr of a foo on stak is stack-heap-adresses.Foo@7fff9c4f8b98
// definitely on stack
addr of a foo on heap is stack-heap-adresses.Foo@205c2a0
// definitely not on stack

dee0xeed · March 2, 2024, 12:36pm

Well…

#include <stdio.h>

char *get_arr(void) {
    char arr[8];
    return arr;
}

int main(void) {
    char *a = get_arr();
    printf("%p\n", a);
}

gcc warns about it:

$ gcc a.c 
a.c: In function ‘get_arr’:
a.c:6:12: warning: function returns address of local variable [-Wreturn-local-addr]
    6 |     return arr;
      |            ^~~

$ ./a.out 
(nil) // oops

Some ZIg’s GH issue about this and similar things have been already mentioned somewhere but I do not remember where exactly.

dee0xeed · March 2, 2024, 12:58pm

A bit of off-topic, sorry… Hmmm, why (nil)?!? I expected some stack address (“large” value) here. Ok, maximally simplified example.

#include <stdio.h>

int *get_ptr(void) {
    int x;
    printf("inside: &x = %p\n", &x);
    return &x;
}

int main(void) {
    int *p = get_ptr();
    int i;
    printf("outside: p = %p, &i = %p\n", p, &i);
}

$ ./a.out 
inside: &x = 0x7fff6ee27044
outside: p = (nil), &i = 0x7fff6ee2706c

It seems that gcc not only warns about the footgun, but it also makes that pointer zero behind the scene - and instead of doing some strange things when de-referencing the pointer a program will 100% crash.

gonzo · March 2, 2024, 1:02pm

Yeah, that’s not the case I was thinking of… But we are getting too off-topic here.

dee0xeed · March 2, 2024, 7:52pm

I think we are not, this footgun in it’s various forms already got people sick and tired.

We all here (a bit of ∀ quantifier demagogue, sorry ) believe Zig to be a good C competitor (currently) and a C replacement (in some distant future), right?

I just do not want people to repeat mindlessly “zig is better than c 'cause it’s <subst your favorite zig’s feature>”.

@dude_the_builder and gcc are able to recognize a (not so trivial) case of using pointers to local vars outside of a function, why Zig can not?

AndrewCodeDev · March 2, 2024, 8:01pm

C was first introduced between 1969-1973. GCC was first released in 1987. Zig first appeared in 2016.

They’ve had compilers for C longer than Zig has been around. I imagine they have some features we don’t at this point. There’s an open issue for this, too: Add Compiler Error when returning pointer to stack variable · Issue #2646 · ziglang/zig · GitHub

I’m not really sure what to say beyond that. It’s a work in progress by a small team of people.

gonzo · March 2, 2024, 8:49pm

I personally believe zig is already better than C. Is it perfect? Sure, not (yet?).

In my mind I was mapping assigning the result of Foo.init() to an array element, to assigning the result of malloc() to an array member in C. This mental model is wrong, as was proven in this thread. That’s what I meant by “too much C in my mind”.

dee0xeed · March 4, 2024, 9:01am

I am not laying into Zig nor am I talking about which one is better, I am just comparing capabilities of compilers. As to heap allocated things, it’s clear that (C/Zig) compilers can not “know/understand” that something is on heap, because there is no special syntax/keywords for heap allocation (like new in Pascal/Ada), everything is handled by std library calls.

But what about stack allocations?
Do compilers “understand” that something is on stack?
If they do (and I guess the really do), then they can look (recursively I guess)
at what is being returned by a function and if they find some local addresses there,
it is compilation warning (or better an error).

slonik-az · March 4, 2024, 10:28am

At least codegen can know when it layouts locals on stack.