I have a runtime-known active union tag and a type-erased runtime-known payload. How can I efficiently (in terms of speed and code size) initialize a union from them?
After a couple of hours of trial and error, the closest I’ve come to an ideal generic solution is this monstrosity:
pub fn runtimeUnionInit(
comptime Union: type,
active_tag: @typeInfo(Union).@"union".tag_type.?,
payload: *const anyopaque,
) Union {
var result: Union = switch (active_tag) {
inline else => |tag| @unionInit(Union, @tagName(tag), undefined),
};
const result_payload_bytes: [*]u8 = switch (active_tag) {
inline else => |tag| @ptrCast(&@field(result, @tagName(tag))),
};
const payload_size: usize = switch (active_tag) {
inline else => |tag| @sizeOf(@FieldType(Union, @tagName(tag))),
};
@memcpy(result_payload_bytes[0..payload_size], @as([*]const u8, @ptrCast(payload)));
return result;
}
The seperate switches are necessary in order to coerce the compiler/optimizer into constructing SoA lookup tables. Looking at the disassembly using Godbolt (https://zig.godbolt.org/z/W1chY6cnj) this looks pretty good and scales well for unions with many disorderly fields:
example.getU:
push rbp
mov rbp, rsp
mov eax, offset example.u
pop rbp
ret
example.setU:
push rbp
mov rbp, rsp
sub rsp, 32
xor dil, 16
movzx eax, dil
and eax, 31
shl eax, 3
mov rcx, qword ptr [rax + .Lswitch.table.example.setU]
mov rdx, qword ptr [rax + .Lswitch.table.example.setU.1]
mov rax, qword ptr [rcx + 16]
mov qword ptr [rbp - 16], rax
movups xmm0, xmmword ptr [rcx]
movaps xmmword ptr [rbp - 32], xmm0
lea rdi, [rbp - 32]
call memcpy@PLT
movaps xmm0, xmmword ptr [rbp - 32]
movups xmmword ptr [rip + example.u], xmm0
mov rax, qword ptr [rbp - 16]
mov qword ptr [rip + example.u+16], rax
add rsp, 32
pop rbp
ret
__anon_650:
.zero 16
.byte 0
.zero 7
__anon_657:
.zero 16
.byte 1
.zero 7
__anon_664:
.zero 16
.byte 2
.zero 7
__anon_671:
.zero 16
.byte 3
.zero 7
__anon_678:
.zero 16
.byte 4
.zero 7
__anon_685:
.zero 16
.byte 5
.zero 7
__anon_692:
.zero 16
.byte 6
.zero 7
__anon_699:
.zero 16
.byte 7
.zero 7
__anon_706:
.zero 16
.byte 8
.zero 7
__anon_713:
.zero 16
.byte 9
.zero 7
__anon_722:
.zero 16
.byte 10
.zero 7
__anon_729:
.zero 16
.byte 11
.zero 7
__anon_736:
.zero 16
.byte 12
.zero 7
__anon_743:
.zero 16
.byte 13
.zero 7
__anon_750:
.zero 16
.byte 14
.zero 7
__anon_757:
.zero 16
.byte 15
.zero 7
__anon_764:
.zero 16
.byte 16
.zero 7
__anon_771:
.zero 16
.byte 17
.zero 7
__anon_778:
.zero 16
.byte 18
.zero 7
__anon_785:
.zero 16
.byte 19
.zero 7
__anon_798:
.zero 16
.byte 20
.zero 7
__anon_805:
.zero 16
.byte 21
.zero 7
__anon_812:
.zero 16
.byte 22
.zero 7
__anon_819:
.zero 16
.byte 23
.zero 7
__anon_826:
.zero 16
.byte 24
.zero 7
__anon_834:
.zero 16
.byte 25
.zero 7
__anon_842:
.zero 16
.byte 26
.zero 7
__anon_850:
.zero 16
.byte 27
.zero 7
__anon_858:
.zero 16
.byte 28
.zero 7
__anon_866:
.zero 16
.byte 29
.zero 7
.Lswitch.table.example.setU:
.quad __anon_764
.quad __anon_771
.quad __anon_778
.quad __anon_785
.quad __anon_798
.quad __anon_805
.quad __anon_812
.quad __anon_819
.quad __anon_826
.quad __anon_834
.quad __anon_842
.quad __anon_850
.quad __anon_858
.quad __anon_866
.zero 8
.zero 8
.quad __anon_650
.quad __anon_657
.quad __anon_664
.quad __anon_671
.quad __anon_678
.quad __anon_685
.quad __anon_692
.quad __anon_699
.quad __anon_706
.quad __anon_713
.quad __anon_722
.quad __anon_729
.quad __anon_736
.quad __anon_743
.quad __anon_750
.quad __anon_757
.Lswitch.table.example.setU.1:
.quad 4
.quad 4
.quad 4
.quad 4
.quad 16
.quad 16
.quad 16
.quad 16
.quad 16
.quad 16
.quad 16
.quad 16
.quad 16
.quad 16
.zero 8
.zero 8
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 4
.quad 4
.quad 4
.quad 4
.quad 4
.quad 4
getU = example.getU
setU = example.setU
(Zig source code)
pub fn runtimeUnionInit(
comptime Union: type,
active_tag: @typeInfo(Union).@"union".tag_type.?,
payload: *const anyopaque,
) Union {
var result: Union = switch (active_tag) {
inline else => |tag| @unionInit(Union, @tagName(tag), undefined),
};
const result_payload_bytes: [*]u8 = switch (active_tag) {
inline else => |tag| @ptrCast(&@field(result, @tagName(tag))),
};
const payload_size: usize = switch (active_tag) {
inline else => |tag| @sizeOf(@FieldType(Union, @tagName(tag))),
};
@memcpy(result_payload_bytes[0..payload_size], @as([*]const u8, @ptrCast(payload)));
return result;
}
const U = union(enum) {
void0: void,
void1: void,
void2: void,
void3: void,
void4: void,
void5: void,
void6: void,
void7: void,
void8: void,
void9: void,
int0: i32,
int1: i32,
int2: i32,
int3: i32,
int4: i32,
int5: i32,
int6: i32,
int7: i32,
int8: i32,
int9: i32,
string0: []const u8,
string1: []const u8,
string2: []const u8,
string3: []const u8,
string4: []const u8,
string5: []const u8,
string6: []const u8,
string7: []const u8,
string8: []const u8,
string9: []const u8,
};
var u: U = undefined;
export fn setU(tag: u32, payload: *const anyopaque) void {
u = runtimeUnionInit(U, @enumFromInt(tag), payload);
}
export fn getU() *const anyopaque {
return &u;
}
However, one pretty significant mistake the compiler makes is that it constructs a lookup table of all possible uninitialized payloads, roughly equivalent to this:
const unions = [_]Union{
.{ .field0 = undefined },
.{ .field1 = undefined },
.{ .field2 = undefined },
// ...
.{ .fieldn = undefined },
};
var result: Union = unions[i];
This results in a lot of unnecessary binary bloat, especially if the union has one or a few fields that have a significantly larger size than the rest (try adding padding: [256]u8 to the Godbolt repro to see for yourself). See also this issue: Inefficient handling of initialization to undefined with structs, unions, optionals and error unions · Issue #24313 · ziglang/zig · GitHub
What I really want is for the compiler to just do this:
var result: Union = undefined;
result.__tag = active_tag;
const result_payload_bytes: [*]u8 = @ptrCast(&result.__payload);
const payload_size = lookUpPayloadSize(active_tag);
@memcpy(result_payload_bytes[0..payload_size], @as([*]const u8, @ptrCast(payload)));
return result;
But I can’t find any way to express this so that the compiler takes the hint, without explicitly reinterpreting memory and making assumptions about the layout of tagged unions (which I would prefer to avoid due to risk of breakage in the future). I’ve been searching threads and issues and grepping the compiler codebase for @unionInit to look for similar patterns but I’m coming up empty.
Does anyone have any ideas, or is this the best we can do without compiler improvements and/or language changes?