Am I canceling my std.Io.Group incorrectly?

I am using 0.16.0-dev.1912+0cbaaa5eb, and I have this main loop in my code:

pub fn start(server: *Server, io: Io, gpa: Allocator) !void {
    var tcp_server = try IpAddress.listen(try IpAddress.parse(
        server.info.host,
        server.info.port,
    ), io, .{});
    defer tcp_server.deinit(io);
    log.debug("Server headers: {s}", .{if (server.info.headers) "true" else "false"});
    log.debug("Server max payload: {d}", .{server.info.max_payload});
    log.info("Server ID: {s}", .{server.info.server_id});
    log.info("Server name: {s}", .{server.info.server_name});
    log.info("Server listening on {s}:{d}", .{ server.info.host, server.info.port });

    var client_group: Group = .init;
    defer {
        log.debug("Canceling client group", .{});
        client_group.cancel(io); // the panic seems to happen when I call this.
    }

    const read_buffer_size, const write_buffer_size = getBufferSizes(io);
    log.debug("read buf: {d} write buf: {d}", .{ read_buffer_size, write_buffer_size });

    var id: usize = 0;
    while (true) : (id +%= 1) {
        if (server.clients.contains(id)) continue;
        log.debug("Accepting next client", .{});
        const stream = tcp_server.accept(io) catch |err| {
            log.debug("error: {s}", .{@errorName(err)}); // prints error: Canceled
            return err;
        };
        log.debug("Accepted connection {d}", .{id});
        _ = client_group.concurrent(io, handleConnectionInfallible, .{
            server,
            gpa,
            io,
            id,
            stream,
            read_buffer_size,
            write_buffer_size,
        }) catch |err| {
            log.err("Could not start concurrent handler for {d}", .{id});
            stream.close(io);
            return err;
        };
    }
}

I’m running into this issue when I connect a client to my server, and then cancel this task while the client is connected:

debug: Server headers: true
debug: Server max payload: 1048576
info: Server ID: server-id-123
info: Server name: Zits Server
info: Server listening on 0.0.0.0:5224
debug: read buf: 212992 write buf: 212992
debug: Accepting next client
debug: Accepted connection 0
debug: Accepting next client
^C
info: Shutting down...
debug: error: Canceled
debug: Canceling client group
debug: Client 0 disconnected
thread 13559 panic: reached unreachable code

robby@lambda ~/src/zits$ /home/robby/downloads/zig-x86_64-linux-0.16.0-dev.1912+0cbaaa5eb/lib/std/debug.zig:419:14: 0x1068389 in assert (std.zig)
    if (!ok) unreachable; // assertion failure
             ^
/home/robby/downloads/zig-x86_64-linux-0.16.0-dev.1912+0cbaaa5eb/lib/std/Io/Threaded.zig:260:23: 0x10ca181 in start (std.zig)
                assert(!cancel_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled`
                      ^
/home/robby/downloads/zig-x86_64-linux-0.16.0-dev.1912+0cbaaa5eb/lib/std/Io/Threaded.zig:1317:29: 0x111ac8d in worker (std.zig)
            runnable.startFn(runnable, &thread, t);
                            ^
/home/robby/downloads/zig-x86_64-linux-0.16.0-dev.1912+0cbaaa5eb/lib/std/Thread.zig:558:13: 0x10f8a50 in callFn__anon_15769 (std.zig)
            @call(.auto, f, args);
            ^
/home/robby/downloads/zig-x86_64-linux-0.16.0-dev.1912+0cbaaa5eb/lib/std/Thread.zig:1499:30: 0x10c8e50 in entryFn (std.zig)
                return callFn(f, self.fn_args);
                             ^
/home/robby/downloads/zig-x86_64-linux-0.16.0-dev.1912+0cbaaa5eb/lib/std/os/linux/x86_64.zig:105:5: 0x10f8b05 in clone (std.zig)
    asm volatile (
    ^

The assertion comment “group task acknowledged cancelation but did not return error.Canceled” makes me feel like this is a bug in the std.Io.Threaded implementation, since I cannot return error.Canceled any more than I already am. But given how new this all is, I’m not confident that I’m using it right. I also tried fiddling with calling std.Io.recancel, but it didn’t seem to resolve the issue. Am I doing things wrong or did I run into a bug?

I figured it out. I didn’t realize that functions started with std.Io.Group.concurrent could actually return error.Canceled, I thought they had to return void. This basically solved my problem:

diff --git a/src/server/Server.zig b/src/server/Server.zig
index e5f199f643..18214ae45c 100644
--- a/src/server/Server.zig
+++ b/src/server/Server.zig
@@ -126,9 +126,10 @@
     stream: Stream,
     r_buf_size: usize,
     w_buf_size: usize,
-) void {
-    handleConnection(server, server_allocator, io, id, stream, r_buf_size, w_buf_size) catch |err| {
-        log.err("Failed processing client {d}: {any}", .{ id, err });
+) !void {
+    handleConnection(server, server_allocator, io, id, stream, r_buf_size, w_buf_size) catch |err| switch (err) {
+        error.Canceled => return error.Canceled,
+        else => log.err("Failed processing client {d}: {any}", .{ id, err }),
     };
 }
 
@@ -229,8 +230,9 @@
             },
         }
     } else |err| switch (err) {
-        error.EndOfStream => {
+        error.EndOfStream, error.ReadFailed => {
             log.debug("Client {d} disconnected", .{id});
+            return error.Canceled;
         },
         else => {
             return err;

That’s not quite right - you can’t materialize an error.Canceled out of thin air, or it will cause an assertion failure in that same place.

I think the reason this appears to work is that you are receiving error.ReadFailed and the underlying reader err is set to error.Canceled which means you’re not materializing it, you’re actually just forwarding it. But the way error.EndOfStream is handled is now incorrect, as well as the case when a different read failure occurs. Suggested change:

    } else |err| switch (err) {
        error.EndOfStream => return error.ClientDisconnected,
        error.ReadFailed => return reader.err.?, // could be `error.Canceled` or something else

If you don’t have access to reader.err in this context, then you should return err directly and let the parent decode the error.ReadFailed into the specific error that occurred.

In general, on the API boundary where you construct a *Io.Reader, you should expect to receive error.ReadFailed and then unwrap that into the more specific error from the concrete reader instance that you created.

Edit: everything I said above still applies but also there was a recent bug in the std lib regarding this assertion FYI: #30723 - Io.Threaded: fix bad assertion - ziglang/zig - Codeberg.org

2 Likes

Thanks :slight_smile: I was unaware that the underlying std.Io.net.Stream.Reader exposed its errors like that. That makes a lot of sense though. Here is my new diff:

diff --git a/src/server/Server.zig b/src/Server.zig
rename from src/server/Server.zig
rename to src/Server.zig
index e5f199f643..02c539ac16 100644
--- a/src/server/Server.zig
+++ b/src/Server.zig
@@ -126,9 +128,11 @@
     stream: Stream,
     r_buf_size: usize,
     w_buf_size: usize,
-) void {
-    handleConnection(server, server_allocator, io, id, stream, r_buf_size, w_buf_size) catch |err| {
-        log.err("Failed processing client {d}: {any}", .{ id, err });
+) !void {
+    handleConnection(server, server_allocator, io, id, stream, r_buf_size, w_buf_size) catch |err| switch (err) {
+        error.Canceled => return error.Canceled,
+        error.ClientDisconnected => log.debug("Client {d} disconnected", .{id}),
+        else => log.err("Failed processing client {d}: {any}", .{ id, err }),
     };
 }
 
@@ -229,12 +236,8 @@
             },
         }
     } else |err| switch (err) {
-        error.EndOfStream => {
-            log.debug("Client {d} disconnected", .{id});
-        },
-        else => {
-            return err;
-        },
+        error.EndOfStream => return error.ClientDisconnected,
+        else => return reader.err.?,
     }
 }
 

Some more tips:

  1. "{t}", .{err} if you just want the “Foo” part of error.Foo. (also works for enums)
  2. the error handling is still a bit unsafe. I recommend to only assume reader.err is non-null in the case you see that exact error code:
error.ReadFailed => return reader.err.?,
else => |e| return e,

You could also handle error.EndOfStream with the else prong if that error code unambiguously tells you a client disconnected.

Hope that helps!

6 Likes

Very helpful indeed, thanks again!