Composing generic types with enum sets?

ghostdance · October 26, 2025, 5:45pm

Zig error sets can be merged with the || operator, I want to do the same for regular enums:

const Arithmetic = enum { plus, minus, star, slash };
const Literal = enum { string, number, keyword };
const TokenKind = Arithmetic || Literal;

Then I would define a generic type allowing me to use any of the enum sets:

fn Token(comptime T: type) type {
    return struct {
        type: T,
        lexeme: []const u8,
        offset: usize,
    };
}

const TokenList = std.ArrayList(Token(TokenKind));

const BinaryExpr = struct {
    op: Token(Arithmetic),
    // ...
};

The goal here is limiting the kind of Token that BinaryExpr uses for its op field to the Arithmetic enum set {plus, minus, star, slash}, while being able to use any enums from the Arithmetic or Literal sets in TokenList.

I understand that this doesn’t work in Zig. Is there a way to write something similar that achieves the same result? Or another approach entirely I should consider?

npc1054657282 · October 26, 2025, 6:06pm

Rather than constructing Arithmetic and Literal first and then merging them into a TokenKind, I think a more feasible solution is to construct a TokenKind first, and then use @Type to split its different parts into Arithmetic and Literal. This ensures that the integers corresponding to the tags in Arithmetic and Literal correspond one-to-one with those in TokenKind and there are no conflicts.

Maybe related:

github.com/ziglang/zig

allow ranges when switching on enums

opened 09:35PM - 02 May 23 UTC

andrewrk

proposal accepted

Use case: ```zig pub const Index = enum(u32) { pub const first_type: In…dex = .u1_type; u1_type, u8_type, i8_type, u16_type, i16_type, u29_type, u32_type, i32_type, u64_type, i64_type, u80_type, u128_type, i128_type, usize_type, isize_type, c_char_type, c_short_type, c_ushort_type, c_int_type, c_uint_type, c_long_type, c_ulong_type, c_longlong_type, c_ulonglong_type, c_longdouble_type, f16_type, f32_type, f64_type, f80_type, f128_type, anyopaque_type, bool_type, void_type, type_type, anyerror_type, comptime_int_type, comptime_float_type, noreturn_type, anyframe_type, null_type, undefined_type, enum_literal_type, atomic_order_type, atomic_rmw_op_type, calling_convention_type, address_space_type, float_mode_type, reduce_op_type, call_modifier_type, prefetch_options_type, export_options_type, extern_options_type, type_info_type, manyptr_u8_type, manyptr_const_u8_type, single_const_pointer_to_comptime_int_type, const_slice_u8_type, anyerror_void_error_union_type, generic_poison_type, empty_struct_type, pub const last_type: Index = .empty_struct_type; pub const first_value: Index = .undef; /// `undefined` (untyped) undef, /// `0` (comptime_int) zero, /// `0` (usize) zero_usize, /// `1` (comptime_int) one, /// `1` (usize) one_usize, /// `std.builtin.CallingConvention.C` calling_convention_c, /// `std.builtin.CallingConvention.Inline` calling_convention_inline, /// `{}` void_value, /// `unreachable` (noreturn type) unreachable_value, /// `null` (untyped) null_value, /// `true` bool_true, /// `false` bool_false, /// `.{}` (untyped) empty_struct, pub const last_value: Index = .empty_struct; /// Used for generic parameters where the type and value /// is not known until generic function instantiation. generic_poison, none = std.math.maxInt(u32), _, ``` ```zig pub fn isNoReturn(ty: Type) bool { switch (ty.ip_index) { InternPool.Index.first_type...@intToEnum(InternPool.Index, @enumToInt(InternPool.Index.noreturn_type) - 1) => return false, .noreturn_type => return true, @intToEnum(InternPool.Index, @enumToInt(InternPool.Index.noreturn_type) + 1)...InternPool.Index.last_type => return false, InternPool.Index.first_value...InternPool.Index.last_value => unreachable, .generic_poison => unreachable, // TODO add empty error sets here // TODO add enums with no fields here _ => return false, .none => switch (ty.tag()) { .noreturn => return true, .error_set => { const err_set_obj = ty.castTag(.error_set).?.data; const names = err_set_obj.names.keys(); return names.len == 0; }, .error_set_merged => { const name_map = ty.castTag(.error_set_merged).?.data; const names = name_map.keys(); return names.len == 0; }, else => return false, }, } } ``` Currently this gives an error for having declarations in the enum. If those are worked around, then it gives this error: ``` /home/andy/Downloads/zig/src/type.zig:2847:19: error: ranges not allowed when switching on type 'InternPool.Index' switch (ty.ip_index) { ~~^~~~~~~~~ /home/andy/Downloads/zig/src/type.zig:2848:40: note: range here InternPool.Index.first_type...@intToEnum(InternPool.Index, @enumToInt(InternPool.Index.noreturn_type) - 1) => return false, ~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``` I think this should be allowed. It makes sense what should be generated in machine code, and this is more type-safe than the alternative, which does compile: ```zig pub fn isNoReturn(ty: Type) bool { switch (@enumToInt(ty.ip_index)) { @enumToInt(InternPool.Index.first_type)...@enumToInt(InternPool.Index.noreturn_type) - 1 => return false, @enumToInt(InternPool.Index.noreturn_type) => return true, @enumToInt(InternPool.Index.noreturn_type) + 1...@enumToInt(InternPool.Index.last_type) => return false, @enumToInt(InternPool.Index.first_value)...@enumToInt(InternPool.Index.last_value) => unreachable, @enumToInt(InternPool.Index.generic_poison) => unreachable, // TODO add empty error sets here // TODO add enums with no fields here else => return false, @enumToInt(InternPool.Index.none) => switch (ty.tag()) { .noreturn => return true, .error_set => { const err_set_obj = ty.castTag(.error_set).?.data; const names = err_set_obj.names.keys(); return names.len == 0; }, .error_set_merged => { const name_map = ty.castTag(.error_set_merged).?.data; const names = name_map.keys(); return names.len == 0; }, else => return false, }, } } } ``` This has two problems compared to the original example: * A wrong type could be used inside the `@enumToInt` calls * It uses `else` instead of `_` so newly added enum tags will not be noticed at this site by the compiler

Sze · October 26, 2025, 8:16pm

I wouldn’t use a generic type for Token because then you can’t have declarations or methods on that type, making it more inconvenient to work with that type.

Instead I would make Token into a type that can give you access to the different views you are interested in, you can do that by making sure the enums have separate value ranges and then using a packed union to access either the flat enum that contains all values or one of the sub ranges.

Technically Token.Kind isn’t needed because it could be computed by accessing flat and figuring out which sub range the value belongs to, but I find it much easier to keep the single bit that tells you what category the token belongs to, technically it is redundant information, but it could also be useful because that way you also can switch on kind and then access the right sub range.

So here is what I would do:

pub fn main() !void {
    const tk: Token = .arithmetic(.plus);

    // switch (tk.kind) {
    //     .arithmetic =>
    //     .literal =>
    // }
    switch (tk.view.arithmetic) {
        .plus => std.debug.print("plus\n", .{}),
        else => {},
    }
    switch (tk.view.flat) {
        .plus => std.debug.print("plus\n", .{}),
        .string => std.debug.print("string\n", .{}),
        .keyword => std.debug.print("keyword\n", .{}),
        else => {},
    }
    std.debug.print("tk: {f}\n", .{tk});

    const a: Arithmetic = .slash;
    std.debug.print("a: {t}\n", .{a});
    std.debug.print("a.token(): {f}\n", .{a.token()});
    const b: Literal = .keyword;
    std.debug.print("b: {t}\n", .{b});
    std.debug.print("b.token(): {f}\n", .{b.token()});
}

const BackingInt = u7;
const Arithmetic = enum(BackingInt) {
    plus,
    minus,
    star,
    slash,

    pub fn token(self: Arithmetic) Token {
        return .arithmetic(self);
    }
};
const Literal = enum(BackingInt) {
    string = getHighestValue(Arithmetic) + 1, // make enums non-overlapping
    number,
    keyword,

    pub fn token(self: Literal) Token {
        return .literal(self);
    }
};
const Token = packed struct {
    kind: Kind,
    view: View,

    pub const Flat = ConcatEnums(BackingInt, &.{ Arithmetic, Literal });
    pub const Kind = enum(u1) { arithmetic, literal };
    pub const View = packed union {
        flat: Flat,
        arithmetic: Arithmetic,
        literal: Literal,
    };

    pub fn arithmetic(token: Arithmetic) Token {
        return .{
            .kind = .arithmetic,
            .view = .{
                .arithmetic = token,
            },
        };
    }
    pub fn literal(token: Literal) Token {
        return .{
            .kind = .literal,
            .view = .{
                .literal = token,
            },
        };
    }

    pub fn format(
        self: Token,
        writer: *std.Io.Writer,
    ) std.Io.Writer.Error!void {
        switch (self.kind) {
            .arithmetic => try writer.print("arithmetic{}", .{self.view.arithmetic}),
            .literal => try writer.print("literal{}", .{self.view.literal}),
        }
    }
};

pub fn getHighestValue(comptime T: type) comptime_int {
    switch (@typeInfo(T)) {
        .@"enum" => |e| {
            var val = e.fields[0].value;
            for (e.fields[1..]) |f| {
                val = @max(val, f.value);
            }
            return val;
        },
        else => @compileError("not supported"),
    }
}

pub fn ConcatEnums(comptime Tag: type, comptime Enums: []const type) type {
    comptime var len = 0;
    for (Enums) |E| len += std.meta.fields(E).len;

    const needed_bits = std.math.log2_int_ceil(usize, len);
    if (@bitSizeOf(Tag) < needed_bits) {
        @compileError(std.fmt.comptimePrint("ConcatEnums needs at least an u{} to concat all given enums", .{needed_bits}));
    }

    var i: usize = 0;
    var fields: [len]std.builtin.Type.EnumField = undefined;
    for (Enums) |E| {
        for (std.meta.fields(E)) |f| {
            fields[i] = .{
                .name = f.name,
                .value = f.value,
            };
            i += 1;
        }
    }
    return @Type(.{ .@"enum" = .{
        .fields = &fields,
        .decls = &.{},
        .tag_type = Tag,
        .is_exhaustive = true,
    } });
}

const std = @import("std");

Sze · October 26, 2025, 9:15pm

It is also possible to create this fancy version that doesn’t have redundant information (that has its own downsides, which is why I probably would prefer the first version I posted).

With this version the kind of the subrange/subset is directly embedded in the bitpattern of all the enum values, this is also what makes this much more difficult to construct without accidentally creating a wrong bit-pattern somewhere.

The whole idea is that with packed unions we can see the same value in different ways so if we align the values in useful ways we can make it so that different views are accessible. We then can use Token.view.active.kind to access the subset kind.

Technically we could directly make Token into the packed union, but if you do that you will get name-conflicts with the arithmetic and literal functions/fields, also keeping the union in .view is also helpful if you want to add other fields to token which are then common to all tokens.

Or you could even have another .data field which is a union with fields .literal and .arithmetic so that way the different kinds could have common data.

pub fn main() !void {
    const tk: Token = .arithmetic(.plus);

    // switch (tk.view.active.kind) {
    //     .arithmetic =>
    //     .literal =>
    // }
    switch (tk.view.arithmetic) {
        .plus => std.debug.print("plus\n", .{}),
        else => {},
    }
    switch (tk.view.flat) {
        .plus => std.debug.print("plus\n", .{}),
        .string => std.debug.print("string\n", .{}),
        .keyword => std.debug.print("keyword\n", .{}),
        else => {},
    }
    std.debug.print("tk: {f}\n", .{tk});

    const a: Arithmetic = .slash;
    std.debug.print("a: {t}\n", .{a});
    std.debug.print("a.token(): {f}\n", .{a.token()});
    const b: Literal = .keyword;
    std.debug.print("b: {t}\n", .{b});
    std.debug.print("b.token(): {f}\n", .{b.token()});
}

const BackingInt = u3;
const Active = packed struct {
    subrange: u2,
    kind: enum(u1) { arithmetic, literal },
};
const Arithmetic = enum(BackingInt) {
    plus = 0b0_00,
    minus = 0b0_01,
    star = 0b0_10,
    slash = 0b0_11,

    pub fn token(self: Arithmetic) Token {
        return .arithmetic(self);
    }
};
const Literal = enum(BackingInt) {
    string = 0b1_00,
    number = 0b1_01,
    keyword = 0b1_10,

    pub fn token(self: Literal) Token {
        return .literal(self);
    }
};
pub const Token = packed struct {
    view: View,
    pub const View = packed union {
        pub const Flat = ConcatEnums(BackingInt, &.{ Arithmetic, Literal });
        active: Active,
        flat: Flat,
        arithmetic: Arithmetic,
        literal: Literal,
    };

    pub fn arithmetic(token: Arithmetic) Token {
        return .{ .view = .{
            .arithmetic = token,
        } };
    }
    pub fn literal(token: Literal) Token {
        return .{ .view = .{
            .literal = token,
        } };
    }

    pub fn format(
        self: Token,
        writer: *std.Io.Writer,
    ) std.Io.Writer.Error!void {
        switch (self.view.active.kind) {
            .arithmetic => try writer.print("arithmetic{}", .{self.view.arithmetic}),
            .literal => try writer.print("literal{}", .{self.view.literal}),
        }
    }
};

pub fn ConcatEnums(comptime Tag: type, comptime Enums: []const type) type {
    comptime var len = 0;
    for (Enums) |E| len += std.meta.fields(E).len;

    const needed_bits = std.math.log2_int_ceil(usize, len);
    if (@bitSizeOf(Tag) < needed_bits) {
        @compileError(std.fmt.comptimePrint("ConcatEnums needs at least an u{} to concat all given enums", .{needed_bits}));
    }

    var i: usize = 0;
    var fields: [len]std.builtin.Type.EnumField = undefined;
    for (Enums) |E| {
        for (std.meta.fields(E)) |f| {
            fields[i] = .{
                .name = f.name,
                .value = f.value,
            };
            i += 1;
        }
    }
    return @Type(.{ .@"enum" = .{
        .fields = &fields,
        .decls = &.{},
        .tag_type = Tag,
        .is_exhaustive = true,
    } });
}

const std = @import("std");

If you use this version I suggest you add a bit of comptime code that asserts that all of the enums contain only values with a kind of their own enum.