Writers for vcard

I have been writing vcard writer and wanted to share with you guys and here your thoughts. Did I make it more complex than it seems? is the interface good (I tried matching bufferedReader from std.io)

fn lineWriter(writer: anytype, eol: []const u8) LineWriter(1024, @TypeOf(writer)) {
    return .{ .free_writer = writer, .end_of_line = eol };
}

fn LineWriter(comptime g_line_length: usize, comptime WriterType: type) type {
    return struct {
        //! lineWriter takes a writer and an end of line delimiter and ensures that each line is
        //! at most `g_line_length` length including the end of line delimiter.
        //! Used in writing vcard specification because it requires each line to be max 1024 in length
        //! including the \r\n
        free_writer: WriterType,
        end: usize = 0,
        end_of_line: []const u8,

        // line_length includes the end_of_line text
        pub const line_length = g_line_length;

        pub const Error = WriterType.Error;
        pub const Writer = std.io.Writer(*Self, Error, write);

        const Self = @This();

        pub fn writer(self: *Self) Writer {
            return .{ .context = self };
        }

        pub fn write(self: *Self, bytes: []const u8) Error!usize {
            var remainder_start: isize = 0;
            var index_eol: isize = -1;
            var has_eol: bool = false;
            var eol_written: usize = 0;
            while (remainder_start < bytes.len) {
                if (index_eol < remainder_start) {
                    if (std.mem.indexOf(u8, bytes[@intCast(remainder_start)..], self.end_of_line)) |index| {
                        index_eol = remainder_start + @as(isize, @intCast(index));
                        has_eol = true;
                    } else {
                        index_eol = @as(isize, @intCast(bytes.len));
                        has_eol = false;
                    }
                }
                const available_bytes = Self.line_length - self.end_of_line.len - self.end;

                if (index_eol - remainder_start <= available_bytes) {
                    try self.free_writer.writeAll(bytes[@intCast(remainder_start)..@as(usize, @intCast(index_eol))]);

                    if (!has_eol) {
                        self.end += @as(usize, @intCast(index_eol - remainder_start));
                    } else {
                        try self.free_writer.writeAll(self.end_of_line);
                        eol_written += 1;

                        self.end = 0;
                    }
                    remainder_start = index_eol + @as(isize, @intCast(self.end_of_line.len));
                } else {
                    try self.free_writer.writeAll(bytes[@intCast(remainder_start) .. @as(usize, @intCast(remainder_start)) + available_bytes]);
                    try self.free_writer.writeAll(self.end_of_line);
                    eol_written += 1;

                    remainder_start += @as(isize, @intCast(available_bytes));
                    self.end = 0;
                }
            }
            return bytes.len;
        }
    };
}

I also wrote a small writer that behaves like the tee command so I have been thinking of collecting these functions (takes a writer and returns a modified one) into a repo. what do you think?

fn teeWriters(writer1: anytype, writer2: antype) TeeWriters(@TypeOf(writer1), @TypeOf(writer2)) {
    return .{.writer1 = writer1, .writer2 = writer2};
}

fn TeeWriters(writer1_type: type, writer2_type: type) type {
    return struct {
        //! just like the tee command. Takes two writers and returns a structure with Writer interface
        //! that redirects to both
        writer1: writer1_type,
        writer2: writer2_type,

        const Self = @This();
        pub const Error = writer1_type.Error || writer2_type.Error;
        pub const Writer = std.io.Writer(*Self, Error, write);

        pub fn write(self: *Self, bytes: []const u8) Error!usize {
            const w1 = try self.writer1.write(bytes);
            const m = @min(w1, bytes.len);
            const w2 = try self.writer2.write(bytes[0..m]);
            return w2;
        }

        pub fn writer(self: *Self) Writer {
            return .{ .context = self };
        }
    };
}

Are you using -1 as a sort of null? An explicit optional would be better.
Also, what’s with the signed sizes? Even if you want to reserve a bit pattern as a null, you could set it to maxInt(usize).

This is so common that the compiler has a shortcut:

self.free_writer.writeAll(bytes[remainder_start ..][0..available_bytes]);

As to the logic, it was kind of complicated. How about this?

fn lineWriter(writer: anytype, comptime eol: []const u8) LineWriter(1024, eol, @TypeOf(writer)) {
    return .{ .free_writer = writer };
}

fn LineWriter(comptime g_line_length: usize,
  // I can't think of a reason for eol being a runtime argument. 
  // You should always know your eol at comptime.
  // This leads to better optimizations.
  comptime eol: []const u8,
  comptime WriterType: type) type {
    return struct {
       const initial_remaining = g_line_length - eol.len;
       const Remaining = std.math.IntFittingRange(0, initial_remaining);
       const EolStage = std.math.IntFittingRange(0, eol.len - 1);

        //! lineWriter takes a writer and an end of line delimiter and ensures that each line is
        //! at most `g_line_length` length including the end of line delimiter.
        //! Used in writing vcard specification because it requires each line to be max 1024 in length
        //! including the \r\n
        free_writer: WriterType,
        remaining: Remaining = initial_remaining,
        eol_stage: EolStage = 0

        pub const Error = WriterType.Error;
        pub const Writer = std.io.Writer(*Self, Error, write);

        const Self = @This();

        pub fn writer(self: *Self) Writer {
            return .{ .context = self };
        }

        pub fn write(self: *Self, bytes: []const u8) Error!usize {
           if(bytes[0] == eol[self.eol_stage]) {
             // It seems like the caller is writing the eol manually
             if(self.eol_stage == eol.len - 1) {
               // Full eol has been written
               self.eol_stage = 0;
               self.remaining = initial_remaining;
             } else {
               self.eol_stage += 1;
             }
           } else {
            // This is not a eol
            self.eol_stage = 0;
            if(self.remaining == 0) {
              try writer.writeAll(eol);
              self.remaining = initial_remaining;
            }
          }
           self.remaining -= 1;
           try self.free_writer.writeByte(bytes[0]);
           return 1;
        }
    };
}
2 Likes

Thanks for the detailed response. I iterated on my solution a couple of times and it is great to see a better version. I just think there is a bug (and correct me if I am wrong). Let us assume initial_remaining=3 and eol="\r\n".
If bytes="abc\r\n" then when reaching the ‘\r’ character remaining=0 and the lineself.remaining -= 1 will underflow.

1 Like

Yes, you’re right.
So, lets suppose the user is writing "abc\rd". In the conditions you described, this should be transformed into "abc\r\n\rd". The caller is allowed to write one byte a time, so we can’t count on peeking into the next bytes. When we receive the \r, we don’t know if this will be an eol or just a random character.
The solution therefore, is to assume it is an eol. When the assumption is broken, i.e., when we see the character d, we finish the eol that was written halfway, and then we resend the part of the eol that had been written. That is:

  1. We write abc as normal. remaining is now 0.
  2. We see \r. Assume it is an eol. eol_stage is now 1.
  3. We see d. Assumption is broken. However, we have eol_stage == 1, so we know that the previous write was \r, since that is the first character of our eol. In other words, we know that the user was trying to write \rd.
  4. Finish the eol that was unfinished by sending \n.
  5. Rewrite what the user was trying to write, i.e, send the \r again and the finish by sending the d.

I’ll see if I can figure this out.

I think I got it. Godbolt.

const std = @import("std");

fn lineWriter(
    writer: anytype,
    comptime line_length: usize,
    comptime eol: []const u8,
) LineWriter(line_length, eol, @TypeOf(writer)) {
    return .{ .free_writer = writer };
}

fn LineWriter(
    comptime line_length: usize,
    // I can't think of a reason for eol being a runtime argument.
    // You should always know your eol at comptime.
    // This leads to better optimizations.
    comptime eol: []const u8,
    comptime WriterType: type,
) type {
    return struct {
        const Remaining = std.math.IntFittingRange(0, line_length);
        const EolStage = std.math.IntFittingRange(0, eol.len - 1);

        /// lineWriter takes a writer and an end of line delimiter and ensures that each line is
        /// at most `g_line_length` length including the end of line delimiter.
        /// Used in writing vcard specification because it requires each line to be max 1024 in length
        /// including the \r\n
        free_writer: WriterType,

        /// How many bytes can still be written to the line, discounting the eol
        remaining: Remaining = line_length,

        /// How many bytes of the eol were written
        eol_stage: EolStage = 0,

        pub const Error = WriterType.Error;
        pub const Writer = std.io.Writer(*Self, Error, write);

        const Self = @This();

        pub fn writer(self: *Self) Writer {
            return .{ .context = self };
        }

        pub fn write(self: *Self, bytes: []const u8) Error!usize {
            if (bytes[0] == eol[self.eol_stage]) {
                // It seems like the caller is writing the eol manually

                if (self.eol_stage == eol.len - 1) {
                    // Full eol has been written
                    self.eol_stage = 0;
                    self.remaining = line_length;
                    try self.free_writer.writeByte(bytes[0]);
                    return 1;
                } else {
                    self.eol_stage += 1;
                }
            } else {
                // Not a eol

                if (self.remaining <= eol.len) {
                    const missing_part = eol.len - self.remaining;
                    if (eol[missing_part] == bytes[0]) {
                        self.eol_stage = @intCast(missing_part + 1);
                    } else {
                        try self.free_writer.writeAll(eol[missing_part..]);
                        try self.free_writer.writeAll(eol[0..missing_part]);
                        self.remaining = line_length;
                        self.eol_stage = 0;
                    }
                } else {
                    self.eol_stage = 0;
                }
            }

            self.remaining -= 1;
            try self.free_writer.writeByte(bytes[0]);
            return 1;
        }
    };
}

const test_namespace = struct {
    const line_length = 5;
    const eol = "\r\n";

    fn helper(src: []const u8, comptime expected: []const u8) !void {
        var actual: [expected.len]u8 = undefined;
        var stream = std.io.fixedBufferStream(&actual);
        var lw = lineWriter(stream.writer(), line_length, eol);
        try lw.writer().writeAll(src);
        try std.testing.expectEqualSlices(u8, expected, &actual);
    }
    test "normal use" {
        try helper("a", "a");
        try helper("abc", "abc");
        try helper("abcd", "abc\r\nd");
        try helper("abcdef", "abc\r\ndef");
        try helper("abcdefg", "abc\r\ndef\r\ng");
    }
    test "manual eol" {
        try helper("\r\n", "\r\n");
        try helper("a\r\n", "a\r\n");
        try helper("ab\r\n", "ab\r\n");
        try helper("ab\r\nc", "ab\r\nc");
        try helper("abc\r\n", "abc\r\n");
        try helper("abc\r\nd", "abc\r\nd");
        try helper("abc\r\ndef", "abc\r\ndef");
        try helper("abc\r\ndef\r\n", "abc\r\ndef\r\n");
        try helper("abc\r\ndef\r\ng", "abc\r\ndef\r\ng");
    }
    test "fake eol" {
        try helper("\r", "\r");
        try helper("\ra", "\ra");
        try helper("ab\r", "ab\r");
        try helper("ab\rc", "ab\r\r\nc");
        try helper("abc\rd", "abc\r\n\rd");
        try helper("ab\r\r\ncd", "ab\r\r\ncd");
    }
};

test {
    std.testing.refAllDeclsRecursive(test_namespace);
}
1 Like