Composing generic types with enum sets?

Zig error sets can be merged with the || operator, I want to do the same for regular enums:

const Arithmetic = enum { plus, minus, star, slash };
const Literal = enum { string, number, keyword };
const TokenKind = Arithmetic || Literal;

Then I would define a generic type allowing me to use any of the enum sets:

fn Token(comptime T: type) type {
    return struct {
        type: T,
        lexeme: []const u8,
        offset: usize,
    };
}

const TokenList = std.ArrayList(Token(TokenKind));

const BinaryExpr = struct {
    op: Token(Arithmetic),
    // ...
};

The goal here is limiting the kind of Token that BinaryExpr uses for its op field to the Arithmetic enum set {plus, minus, star, slash}, while being able to use any enums from the Arithmetic or Literal sets in TokenList.

I understand that this doesn’t work in Zig. Is there a way to write something similar that achieves the same result? Or another approach entirely I should consider?

Rather than constructing Arithmetic and Literal first and then merging them into a TokenKind, I think a more feasible solution is to construct a TokenKind first, and then use @Type to split its different parts into Arithmetic and Literal. This ensures that the integers corresponding to the tags in Arithmetic and Literal correspond one-to-one with those in TokenKind and there are no conflicts.

Maybe related:

2 Likes

I wouldn’t use a generic type for Token because then you can’t have declarations or methods on that type, making it more inconvenient to work with that type.

Instead I would make Token into a type that can give you access to the different views you are interested in, you can do that by making sure the enums have separate value ranges and then using a packed union to access either the flat enum that contains all values or one of the sub ranges.

Technically Token.Kind isn’t needed because it could be computed by accessing flat and figuring out which sub range the value belongs to, but I find it much easier to keep the single bit that tells you what category the token belongs to, technically it is redundant information, but it could also be useful because that way you also can switch on kind and then access the right sub range.

So here is what I would do:

pub fn main() !void {
    const tk: Token = .arithmetic(.plus);

    // switch (tk.kind) {
    //     .arithmetic =>
    //     .literal =>
    // }
    switch (tk.view.arithmetic) {
        .plus => std.debug.print("plus\n", .{}),
        else => {},
    }
    switch (tk.view.flat) {
        .plus => std.debug.print("plus\n", .{}),
        .string => std.debug.print("string\n", .{}),
        .keyword => std.debug.print("keyword\n", .{}),
        else => {},
    }
    std.debug.print("tk: {f}\n", .{tk});

    const a: Arithmetic = .slash;
    std.debug.print("a: {t}\n", .{a});
    std.debug.print("a.token(): {f}\n", .{a.token()});
    const b: Literal = .keyword;
    std.debug.print("b: {t}\n", .{b});
    std.debug.print("b.token(): {f}\n", .{b.token()});
}

const BackingInt = u7;
const Arithmetic = enum(BackingInt) {
    plus,
    minus,
    star,
    slash,

    pub fn token(self: Arithmetic) Token {
        return .arithmetic(self);
    }
};
const Literal = enum(BackingInt) {
    string = getHighestValue(Arithmetic) + 1, // make enums non-overlapping
    number,
    keyword,

    pub fn token(self: Literal) Token {
        return .literal(self);
    }
};
const Token = packed struct {
    kind: Kind,
    view: View,

    pub const Flat = ConcatEnums(BackingInt, &.{ Arithmetic, Literal });
    pub const Kind = enum(u1) { arithmetic, literal };
    pub const View = packed union {
        flat: Flat,
        arithmetic: Arithmetic,
        literal: Literal,
    };

    pub fn arithmetic(token: Arithmetic) Token {
        return .{
            .kind = .arithmetic,
            .view = .{
                .arithmetic = token,
            },
        };
    }
    pub fn literal(token: Literal) Token {
        return .{
            .kind = .literal,
            .view = .{
                .literal = token,
            },
        };
    }

    pub fn format(
        self: Token,
        writer: *std.Io.Writer,
    ) std.Io.Writer.Error!void {
        switch (self.kind) {
            .arithmetic => try writer.print("arithmetic{}", .{self.view.arithmetic}),
            .literal => try writer.print("literal{}", .{self.view.literal}),
        }
    }
};

pub fn getHighestValue(comptime T: type) comptime_int {
    switch (@typeInfo(T)) {
        .@"enum" => |e| {
            var val = e.fields[0].value;
            for (e.fields[1..]) |f| {
                val = @max(val, f.value);
            }
            return val;
        },
        else => @compileError("not supported"),
    }
}

pub fn ConcatEnums(comptime Tag: type, comptime Enums: []const type) type {
    comptime var len = 0;
    for (Enums) |E| len += std.meta.fields(E).len;

    const needed_bits = std.math.log2_int_ceil(usize, len);
    if (@bitSizeOf(Tag) < needed_bits) {
        @compileError(std.fmt.comptimePrint("ConcatEnums needs at least an u{} to concat all given enums", .{needed_bits}));
    }

    var i: usize = 0;
    var fields: [len]std.builtin.Type.EnumField = undefined;
    for (Enums) |E| {
        for (std.meta.fields(E)) |f| {
            fields[i] = .{
                .name = f.name,
                .value = f.value,
            };
            i += 1;
        }
    }
    return @Type(.{ .@"enum" = .{
        .fields = &fields,
        .decls = &.{},
        .tag_type = Tag,
        .is_exhaustive = true,
    } });
}

const std = @import("std");
5 Likes

It is also possible to create this fancy version that doesn’t have redundant information (that has its own downsides, which is why I probably would prefer the first version I posted).

With this version the kind of the subrange/subset is directly embedded in the bitpattern of all the enum values, this is also what makes this much more difficult to construct without accidentally creating a wrong bit-pattern somewhere.

The whole idea is that with packed unions we can see the same value in different ways so if we align the values in useful ways we can make it so that different views are accessible. We then can use Token.view.active.kind to access the subset kind.

Technically we could directly make Token into the packed union, but if you do that you will get name-conflicts with the arithmetic and literal functions/fields, also keeping the union in .view is also helpful if you want to add other fields to token which are then common to all tokens.

Or you could even have another .data field which is a union with fields .literal and .arithmetic so that way the different kinds could have common data.

pub fn main() !void {
    const tk: Token = .arithmetic(.plus);

    // switch (tk.view.active.kind) {
    //     .arithmetic =>
    //     .literal =>
    // }
    switch (tk.view.arithmetic) {
        .plus => std.debug.print("plus\n", .{}),
        else => {},
    }
    switch (tk.view.flat) {
        .plus => std.debug.print("plus\n", .{}),
        .string => std.debug.print("string\n", .{}),
        .keyword => std.debug.print("keyword\n", .{}),
        else => {},
    }
    std.debug.print("tk: {f}\n", .{tk});

    const a: Arithmetic = .slash;
    std.debug.print("a: {t}\n", .{a});
    std.debug.print("a.token(): {f}\n", .{a.token()});
    const b: Literal = .keyword;
    std.debug.print("b: {t}\n", .{b});
    std.debug.print("b.token(): {f}\n", .{b.token()});
}

const BackingInt = u3;
const Active = packed struct {
    subrange: u2,
    kind: enum(u1) { arithmetic, literal },
};
const Arithmetic = enum(BackingInt) {
    plus = 0b0_00,
    minus = 0b0_01,
    star = 0b0_10,
    slash = 0b0_11,

    pub fn token(self: Arithmetic) Token {
        return .arithmetic(self);
    }
};
const Literal = enum(BackingInt) {
    string = 0b1_00,
    number = 0b1_01,
    keyword = 0b1_10,

    pub fn token(self: Literal) Token {
        return .literal(self);
    }
};
pub const Token = packed struct {
    view: View,
    pub const View = packed union {
        pub const Flat = ConcatEnums(BackingInt, &.{ Arithmetic, Literal });
        active: Active,
        flat: Flat,
        arithmetic: Arithmetic,
        literal: Literal,
    };

    pub fn arithmetic(token: Arithmetic) Token {
        return .{ .view = .{
            .arithmetic = token,
        } };
    }
    pub fn literal(token: Literal) Token {
        return .{ .view = .{
            .literal = token,
        } };
    }

    pub fn format(
        self: Token,
        writer: *std.Io.Writer,
    ) std.Io.Writer.Error!void {
        switch (self.view.active.kind) {
            .arithmetic => try writer.print("arithmetic{}", .{self.view.arithmetic}),
            .literal => try writer.print("literal{}", .{self.view.literal}),
        }
    }
};

pub fn ConcatEnums(comptime Tag: type, comptime Enums: []const type) type {
    comptime var len = 0;
    for (Enums) |E| len += std.meta.fields(E).len;

    const needed_bits = std.math.log2_int_ceil(usize, len);
    if (@bitSizeOf(Tag) < needed_bits) {
        @compileError(std.fmt.comptimePrint("ConcatEnums needs at least an u{} to concat all given enums", .{needed_bits}));
    }

    var i: usize = 0;
    var fields: [len]std.builtin.Type.EnumField = undefined;
    for (Enums) |E| {
        for (std.meta.fields(E)) |f| {
            fields[i] = .{
                .name = f.name,
                .value = f.value,
            };
            i += 1;
        }
    }
    return @Type(.{ .@"enum" = .{
        .fields = &fields,
        .decls = &.{},
        .tag_type = Tag,
        .is_exhaustive = true,
    } });
}

const std = @import("std");

If you use this version I suggest you add a bit of comptime code that asserts that all of the enums contain only values with a kind of their own enum.