Idiomatic workaround RLS

Hi everyone, I find myself writing self referencing structure quite often and, as far as I know, you have to initialize an instance in two steps in order to preserve pointer addresses.

For example, I want to create a serializer that cache a pointer to a writer so that each methods can access it without putting var writer = self.buffer.writer() at the beginning of each method.

To achieve this, you can’t do that:

const Serializer = struct {
    allocator: Allocator,
    buffer: ArrayListUnmanaged(u8) = .{},
    writer: ArrayListUnmanaged(u8).Writer,

    // Doesn't work because we return a local variable, pointers will be invalid
    pub fn init(allocator: Allocator) Serializer {
        var self = Serializer{ .allocator = allocator, .writer = undefined };
        self.writer = self.buffer.writer(allocator);

        return self;
    }
}

What I result to all the time is defining two methods (but you have to know/document that you need to call the two before using it) like:

const Serializer = struct {
    allocator: Allocator,
    buffer: ArrayListUnmanaged(u8) = .{},
    writer: ArrayListUnmanaged(u8).Writer,

    pub fn new(allocator: Allocator) Serializer {
        return .{ .allocator = allocator, .writer = undefined };
    }

    pub fn init(self: *Serializer) void {
        self.writer = self.buffer.writer(self.allocator);
    }
};

I know that we can also do it like so:

var serializer: Serializer = undefined;
serializer.init(allocator); // Here, init creates both `buffer` and `writer`

But is there a better way to do so? How do you do?

In std the second variant exists e.g. in std.Thread.Pool.init.

Yes, it feels like the less error prone of the two options

I run into that “problem” all the time inside init functions.
If some field-initialization is more than 1 or 2 lines I write an extra (privvate) function to handle it.
But it is kinda inconvenient having to do that. No clean init.

BTW: I also run into the “problem” that I need a

1) init() Self {}
2) init_from(other: *const Self) Self {}
3) copy_from(self: *Self, other: *const Self) {}
1 Like

A couple of alternative approaches you may consider is:

  • allocating the serializer instead of relying on RLS (which tends to be fine if you’re allocating anyway and you already have a deinit). This is a pretty common pattern for these situations.
  • pass in the writer (or buffer) where needed instead of having it as a field

Something like:

const std = @import("std");

// Allocating version
pub const Serializer = struct {
    allocator: std.mem.Allocator,
    buffer: std.ArrayListUnmanaged(u8),
    writer: std.ArrayListUnmanaged(u8).Writer,

    pub fn init(allocator: std.mem.Allocator) !*Serializer {
        var serializer = try allocator.create(Serializer);
        serializer.* = .{
            .allocator = allocator,
            .buffer = .{},
            .writer = undefined,
        };
        serializer.writer = serializer.buffer.writer(allocator);
        return serializer;
    }

    pub fn deinit(self: *Serializer) void {
        self.buffer.deinit(self.allocator);
        self.allocator.destroy(self);
    }

    pub fn writeSome(self: *Serializer, data: []const u8) !void {
        try self.writer.writeAll(data);
    }
};

// Version where you pass in what's needed
pub const SerializerUnmanaged = struct {
    pub fn init() Serializer {
        return .{};
    }

    pub fn writeSome(_: *SerializerUnmanaged, writer: anytype, data: []const u8) !void {
        try writer.writeAll(data);
    }
};

test "Serializer allocating" {
    var serializer = try Serializer.init(std.testing.allocator);
    defer serializer.deinit();

    try serializer.writeSome("Hello, World!");
}

test "Serializer unmanaged" {
    var buffer = std.ArrayList(u8).init(std.testing.allocator);
    defer buffer.deinit();

    var serializer: SerializerUnmanaged = .{};
    try serializer.writeSome(buffer.writer(), "Hello, World!");
}