std.Thread.Futex.wait() gets optimized to nothing in WebAssembly

I have some code that wait for certain action to occur in another thread:

const initial_value = 123456789;
// ...
@call(.never_inline, std.Thread.Futex.wait, .{ &futex.value, initial_value });
result = @enumFromInt(futex.value.load(.acquire));

It works in Debug but not in any of the release modes. The assembly shows the following:

00331d: 41 95 9a ef 3a             |   i32.const 123456789
003322: 10 8d 02                   |   call 269
003325: 20 03                      |   local.get 3
003327: fe 10 02 08                |   i32.atomic.load 2 8

The code for function 269:

0037a1 func[269]:
0037a2: 0b                         | end

Without the use of @call(.never_inline, ...) then no code gets generated at all.

Has anyone run into the same issue? Is there a workaround?

1 Like

Making the inline assembly statements in WasmImpl volatile seems to fix the issue:

const WasmImpl = struct {
    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
        if (!comptime std.Target.wasm.featureSetHas(builtin.target.cpu.features, .atomics)) {
            @compileError("WASI target missing cpu feature 'atomics'");
        }
        const to: i64 = if (timeout) |to| @intCast(to) else -1;
        const result = asm volatile (
            \\local.get %[ptr]
            \\local.get %[expected]
            \\local.get %[timeout]
            \\memory.atomic.wait32 0
            \\local.set %[ret]
            : [ret] "=r" (-> u32),
            : [ptr] "r" (&ptr.raw),
              [expected] "r" (@as(i32, @bitCast(expect))),
              [timeout] "r" (to),
        );
        switch (result) {
            0 => {}, // ok
            1 => {}, // expected =! loaded
            2 => return error.Timeout,
            else => unreachable,
        }
    }

    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
        if (!comptime std.Target.wasm.featureSetHas(builtin.target.cpu.features, .atomics)) {
            @compileError("WASI target missing cpu feature 'atomics'");
        }
        assert(max_waiters != 0);
        const woken_count = asm volatile (
            \\local.get %[ptr]
            \\local.get %[waiters]
            \\memory.atomic.notify 0
            \\local.set %[ret]
            : [ret] "=r" (-> u32),
            : [ptr] "r" (&ptr.raw),
              [waiters] "r" (max_waiters),
        );
        _ = woken_count; // can be 0 when linker flag 'shared-memory' is not enabled
    }
};

Function 269 now looks like this:

0037d0 func[269]:
0037d1: 01 7e                      | local[0] type=i64
0037d3: 01 7f                      | local[1] type=i32
0037d5: 42 7f                      | i64.const -1
0037d7: 21 02                      | local.set 2
0037d9: 20 00                      | local.get 0
0037db: 20 01                      | local.get 1
0037dd: 20 02                      | local.get 2
0037df: fe 01 02 00                | memory.atomic.wait32 2 0
0037e3: 21 03                      | local.set 3
0037e5: 0b                         | end
2 Likes