Surprising behavior when consuming payload of tagged union passed to switch

I came across this curious behavior:

const std = @import("std");

const Datum = struct {
    pub fn init() @This() {
        return @This(){};
    }
    pub fn deinit(self: *@This()) void {
        self.* = undefined;
    }
};

const Tag = enum { poor, rich };

const Choice = union(Tag) {
    poor: void,
    rich: Datum,
};

pub fn generate() Choice {
    return Choice { .rich = Datum.init() };
}

pub fn main() void {
    switch (generate()) {
        .rich => |*d| d.deinit(),
        .poor => {},
    }
}

Resulting in:

demo.zig:25:24: error: expected type '*demo.Datum', found '*const demo.Datum'
        .rich => |*d| d.deinit(),
                      ~^~~~~~~

(Zig version 0.15.0-dev.847+850655f06)

However, if I store the result of generate() in a temporary variable, then it works:

 pub fn main() void {
-    switch (generate()) {
+    var x = generate();
+    switch (x) {
         .rich => |*d| d.deinit(),
         .poor => {},
     }
 }

Is that intended? How to rewrite main without using a temporary variable in a reasonable way?

Yes. |*d| is a const pointer because values are const by default in Zig, so unless you explicitly declare the result of generate() as var (which is not possible if you directly evaluate it inside the switch condition) all references to it will also be const references.

You could have deinit() take a const pointer self: *const @This() or a value self: @This(). I don’t know if that’s possible with your real Datum type which I suspect looks different than the one in the example but if it is that would work. Note that the self.* = undefined is kind of useless here anyway (at least if you defer d.deinit()) because the generated datum goes out of scope as soon as the switch expression finishes.

I would just use a temporary variable, if you’re worried about polluting your scope you can always put a block around it.

2 Likes

A bit related:

1 Like

Also I think the usage of @This() is a bit overkill here.

1 Like

I read, for exampe in this article, that it is a common idiom to invalidate the memory of a structure at the end of a deinit function by setting self.* = undefined. For that, I need self to be a pointer to a non-const struct. In my case it is something like this:

pub const ConnectFail = struct {
    code: DbConnError,
    pg_conn: ?*c.PGconn,
    pub fn message(self: ConnectFail) [*:0]const u8 {
        return c.PQerrorMessage(self.pg_conn orelse return "libpq could not allocate memory");
    }
    pub fn deinit(self: *ConnectFail) void {
        if (self.pg_conn) |conn| {
            c.PQfinish(conn);
        }
        self.* = undefined;   
    }
};

(Having experimentally modified my approach here to refrain from demanding a message handler but returning a “to-be-deinit(ed)” error struct on failure.)

I then have to write something like that when using this code:

pub fn main() !void {
    const params = mylib.ConnectParams{ .string = "dbname=jbe" };
    var attempt = mylib.ConnectAttempt.init(params);
    switch (attempt) {
        .ok => |*conn| {
            defer conn.deinit();
            // use conn here
        },
        .fail => |*fail| {
            defer fail.deinit();
            std.debug.print("{s}\n", .{fail.message()});
        },
    }
}

So what I attempted was basically to have a switch statement that differentiates between the two outcomes of the connection attempt (successful connect and non-succeessful connection with a to-be-free’d error status).

Yeah, it’s a common idiom for sure, and if you use a temporary variable that is still potentially accessable after its deinitialization I would definitely use it.

I would suggest something like this:

const ConnectAttempt = union {
    ok: Connection,
    fail: ConnectFail,

    pub fn init(params: ConnectParams) ConnectAttempt { ... }

    pub fn deinit(attempt: *ConnectAttempt) void {
        switch (attempt) {
            .ok => |*conn| => conn.deinit(),
            .fail => |*fail| => fail.deinit(),
        }
    }
};

pub fn main() !void {
    const params = mylib.ConnectParams{ .string = "dbname=jbe" };
    var attempt = mylib.ConnectAttempt.init(params);
    defer attempt.deinit();
    switch (attempt) {
        .ok => |*conn| {
            // use conn here
        },
        .fail => |*fail| {
            std.debug.print("{s}\n", .{fail.message()});
        },
    }
}

This way initialization and deinitialization happen in the same scope right next to each other which helps avoid bugs and confusion IMO especially if you plan to add a lot of code to the switch prongs or more cases in the future.

2 Likes

My idea was that maybe the error has a different lifetime than the connection handle. But right now I’m just experimenting and this isn’t a real-world problem yet. Still trying to get accustomed to the language.

Maybe I’ll just return a database handle both on success and failure (like the underlaying libpq), which is similar to your last proposal where the same deinit is used for the success and failure case.