Most of us have run into this situation before. You have a switch statement. All but one cases make use of a set of variables. You want to keep the initiation logic outside the switch to avoid duplication, but that means that the null case would end up doing unnecessary work. Or would it? That was my question: is the compiler smart enough to optimize away the waste?
Another occasion for Godbolt. Here’s my sample code:
const std = @import("std");
extern fn bar([*]u32, i32) void;
export fn foo(a: i32) void {
var array = std.mem.zeroes([4096]u32);
switch (a) {
0 => {
@branchHint(.likely);
},
1 => bar(&array, 13),
2 => bar(&array, 23),
3 => bar(&array, 33),
else => {},
}
}
And the resulting assembly:
foo:
push rbp
mov rbp, rsp
sub rsp, 16384
mov edx, edi
lea rdi, [rbp - 16384]
mov ecx, 4096
xor eax, eax
rep stosd es:[rdi], eax
dec edx
cmp edx, 2
ja .LBB0_2
imul esi, edx, 10
add esi, 13
lea rdi, [rbp - 16384]
call bar@PLT
.LBB0_2:
add rsp, 16384
pop rbp
ret
The answer turns out to be “no”. As you can see, the rep stosd instruction comes before the cmp-ja combo. The compiler did manage to cleverly optimize away the jump table though. It must be pretty good at recognizing regular patterns when it comes to switch cases.
Out of curiosity I wanted to check whether these two functions are identical:
const std = @import("std");
extern fn bar([*]u32, i32) void;
export const ptr1: *const anyopaque = &foo1;
export const ptr2: *const anyopaque = &foo2;
fn foo1(a: i32) void {
var array = std.mem.zeroes([4096]u32);
switch (a) {
1 => bar(&array, 13),
2 => bar(&array, 23),
3 => bar(&array, 33),
else => unreachable,
}
}
fn foo2(a: i32) void {
var array = std.mem.zeroes([4096]u32);
bar(&array, switch (a) {
1 => 13,
2 => 23,
3 => 33,
else => unreachable,
});
}
And they are:
example.foo1:
push rbp
mov rbp, rsp
sub rsp, 16384
mov edx, edi
lea r8, [rbp - 16384]
mov ecx, 4096
xor eax, eax
mov rdi, r8
rep stosd es:[rdi], eax
imul esi, edx, 10
add esi, 3
mov rdi, r8
call bar@PLT
add rsp, 16384
pop rbp
ret
ptr1:
.quad example.foo1
ptr2:
.quad example.foo1