Sanity check on my wrapping of a std.Io.Writer

With Zig 0.14 I used a struct to wrap around an existing writer to treat consecutive newlines as only one newline and perform a few other minor transformations. I’m porting this into Zig 0.15 and I’ve come up with the following code. It works but I’m wondering if this is a good pattern or if others have betters ideas for accomplishing the same thing:

const NewlineFilterWriterWrapper = struct {
    writer: *std.Io.Writer,
    interface: std.Io.Writer,

    const Self = @This();

    fn drain(w: *std.Io.Writer, data: []const []const u8, splat: usize) std.Io.Writer.Error!usize {
        const self: *NewlineFilterWriterWrapper = @fieldParentPtr("interface", w);
        // Add custom logic here, update data if necessary
        // use self.writer.vtable.drain(...) to forward the writes
    }

    fn sendFile(w: *std.Io.Writer, reader: *std.Io.Reader, limit: std.Io.Limit) !usize {
        const self: *NewlineFilterWriterWrapper = @fieldParentPtr("interface", w);
        return self.writer.vtable.sendFile(self.writer, reader, limit);
    }

    fn flush(w: *std.Io.Writer) error{WriteFailed}!void {
        const self: *NewlineFilterWriterWrapper = @fieldParentPtr("interface", w);
        return self.writer.vtable.flush(self.writer);
    }

    fn rebase(w: *std.Io.Writer, preserve: usize, capacity: usize) error{WriteFailed}!void {
        const self: *NewlineFilterWriterWrapper = @fieldParentPtr("interface", w);
        return self.writer.vtable.rebase(self.writer, preserve, capacity);
    }

    fn init(writer: *std.Io.Writer) NewlineFilterWriterWrapper {
        return .{
            .writer = writer,
            .interface = .{
                .buffer = writer.buffer,
                .end = writer.end,
                .vtable = &.{
                    .drain = drain,
                    .sendFile = sendFile,
                    .rebase = rebase,
                    .flush = flush,
                },
            },
        };
    }
};
2 Likes

This won’t work for every Writer implementation. For example, Writer.Allocating may resize its buffer during any of drain/sendFile/rebase, so the buffer in your wrapper will become stale after that happens.

In my experience, this style of “wrapping” doesn’t really work with the new API (for example, see the deletion of CountingWriter).

I believe the most foolproof solution would be to have your implementation take its own buffer and implement only drain. Your drain function would then take data from your buffer (and maybe data as well if possible) and write the modified version of that data to the underlying writer.

3 Likes

To give something more concrete, here’s what I came up with for just the newline consolidation part:

const std = @import("std");

const NewLineConsolidatingWriter = struct {
    out: *std.Io.Writer,
    interface: std.Io.Writer,
    // We need to keep track of this information across `drain` calls
    last_was_newline: bool,

    /// `buffer` can be any length, including zero.
    pub fn init(out: *std.Io.Writer, buffer: []u8) NewLineConsolidatingWriter {
        return .{
            .out = out,
            .last_was_newline = false,
            .interface = .{
                .buffer = buffer,
                .end = 0,
                .vtable = &.{
                    .drain = drain,
                },
            },
        };
    }

    fn drain(io_w: *std.Io.Writer, data: []const []const u8, splat: usize) std.Io.Writer.Error!usize {
        const w: *NewLineConsolidatingWriter = @fieldParentPtr("interface", io_w);

        // First, process everything that's in the buffer
        try process(w, io_w.buffered());
        io_w.end = 0;

        // Then, process everything in `data`
        for (data[0 .. data.len - 1]) |bytes| {
            try process(w, bytes);
        }
        const pattern = data[data.len - 1];
        for (0..splat) |_| {
            try process(w, pattern);
        }

        // On success, we always process everything in `data`
        return std.Io.Writer.countSplat(data, splat);
    }

    fn process(w: *NewLineConsolidatingWriter, data: []const u8) std.Io.Writer.Error!void {
        // very naive implementation, could definitely make this faster
        for (data) |byte| {
            if (byte == '\n') {
                if (w.last_was_newline) continue;
                w.last_was_newline = true;
            } else {
                w.last_was_newline = false;
            }
            try w.out.writeByte(byte);
        }
    }
};

test NewLineConsolidatingWriter {
    var out: std.Io.Writer.Allocating = .init(std.testing.allocator);
    defer out.deinit();

    var buf: [3]u8 = undefined;
    var w: NewLineConsolidatingWriter = .init(&out.writer, &buf);

    var tr: std.testing.Reader = .init(&.{}, &.{
        .{ .buffer = "a\nb" },
        .{ .buffer = "\n" },
        .{ .buffer = "\n\n\n" },
        .{ .buffer = "\n\n\n\n\n\nc\n" },
        .{ .buffer = "\nd\n\n" },
    });

    _ = try tr.interface.streamRemaining(&w.interface);

    try std.testing.expectEqualSlices(u8, "a\nb\nc\nd\n", out.written());
}

The testing.Reader is used here to artificially break up the data to ensure that the implementation handles edge cases correctly (consecutive newlines split across drain calls).

With some added debug printing (printing of buffered length in drain and a call to std.debug.dumpHex in process), the test will pass with this output:

buffered: 3
00007ffeb31bf672  61 0A 62                                          a␊b
0000000001226032  0A                                                ␊
buffered: 3
00007ffeb31bf672  0A 0A 0A                                          ␊␊␊
0000000001226039  0A 0A 0A 0A 0A 0A 63 0A                           ␊␊␊␊␊␊c␊
buffered: 0
0000000001226045  0A 64 0A 0A                                       ␊d␊␊

Note that with a zero length buffer, the test will pass with this output:

buffered: 0
000000000122602d  61 0A 62                                          a␊b
buffered: 0
0000000001226032  0A                                                ␊
buffered: 0
0000000001226034  0A 0A 0A                                          ␊␊␊
buffered: 0
0000000001226039  0A 0A 0A 0A 0A 0A 63 0A                           ␊␊␊␊␊␊c␊
buffered: 0
0000000001226045  0A 64 0A 0A                                       ␊d␊␊

In a real example, when using a zero-length buffer, the length of the data in each drain call will depend on the Reader implementation and its buffer size. When streaming from a .fixed Reader, for example, there would only be a single drain call with the full data.

4 Likes

Thanks for the code! Very helpful! I saw your reply but didn’t read it, I wanted to write my own version and then compare it. My version is very similar to yours but I learned about the std.Io.Writer.countSplat which is pretty nice cause I hand rolled my own counter. I also didn’t use a buffer cause I figured whatever underlying writer we receive in the new line filter writer, has that buffer configured by the consumer of it. Calling write would feed that out writer’s buffer. Yours makes that a choice.

1 Like