I have some code that wait for certain action to occur in another thread:
const initial_value = 123456789;
// ...
@call(.never_inline, std.Thread.Futex.wait, .{ &futex.value, initial_value });
result = @enumFromInt(futex.value.load(.acquire));
It works in Debug but not in any of the release modes. The assembly shows the following:
00331d: 41 95 9a ef 3a | i32.const 123456789
003322: 10 8d 02 | call 269
003325: 20 03 | local.get 3
003327: fe 10 02 08 | i32.atomic.load 2 8
The code for function 269:
0037a1 func[269]:
0037a2: 0b | end
Without the use of @call(.never_inline, ...)
then no code gets generated at all.
Has anyone run into the same issue? Is there a workaround?
1 Like
Making the inline assembly statements in WasmImpl
volatile seems to fix the issue:
const WasmImpl = struct {
fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
if (!comptime std.Target.wasm.featureSetHas(builtin.target.cpu.features, .atomics)) {
@compileError("WASI target missing cpu feature 'atomics'");
}
const to: i64 = if (timeout) |to| @intCast(to) else -1;
const result = asm volatile (
\\local.get %[ptr]
\\local.get %[expected]
\\local.get %[timeout]
\\memory.atomic.wait32 0
\\local.set %[ret]
: [ret] "=r" (-> u32),
: [ptr] "r" (&ptr.raw),
[expected] "r" (@as(i32, @bitCast(expect))),
[timeout] "r" (to),
);
switch (result) {
0 => {}, // ok
1 => {}, // expected =! loaded
2 => return error.Timeout,
else => unreachable,
}
}
fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
if (!comptime std.Target.wasm.featureSetHas(builtin.target.cpu.features, .atomics)) {
@compileError("WASI target missing cpu feature 'atomics'");
}
assert(max_waiters != 0);
const woken_count = asm volatile (
\\local.get %[ptr]
\\local.get %[waiters]
\\memory.atomic.notify 0
\\local.set %[ret]
: [ret] "=r" (-> u32),
: [ptr] "r" (&ptr.raw),
[waiters] "r" (max_waiters),
);
_ = woken_count; // can be 0 when linker flag 'shared-memory' is not enabled
}
};
Function 269 now looks like this:
0037d0 func[269]:
0037d1: 01 7e | local[0] type=i64
0037d3: 01 7f | local[1] type=i32
0037d5: 42 7f | i64.const -1
0037d7: 21 02 | local.set 2
0037d9: 20 00 | local.get 0
0037db: 20 01 | local.get 1
0037dd: 20 02 | local.get 2
0037df: fe 01 02 00 | memory.atomic.wait32 2 0
0037e3: 21 03 | local.set 3
0037e5: 0b | end
2 Likes