I have been writing vcard writer and wanted to share with you guys and here your thoughts. Did I make it more complex than it seems? is the interface good (I tried matching bufferedReader from std.io)
fn lineWriter(writer: anytype, eol: []const u8) LineWriter(1024, @TypeOf(writer)) {
return .{ .free_writer = writer, .end_of_line = eol };
}
fn LineWriter(comptime g_line_length: usize, comptime WriterType: type) type {
return struct {
//! lineWriter takes a writer and an end of line delimiter and ensures that each line is
//! at most `g_line_length` length including the end of line delimiter.
//! Used in writing vcard specification because it requires each line to be max 1024 in length
//! including the \r\n
free_writer: WriterType,
end: usize = 0,
end_of_line: []const u8,
// line_length includes the end_of_line text
pub const line_length = g_line_length;
pub const Error = WriterType.Error;
pub const Writer = std.io.Writer(*Self, Error, write);
const Self = @This();
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
pub fn write(self: *Self, bytes: []const u8) Error!usize {
var remainder_start: isize = 0;
var index_eol: isize = -1;
var has_eol: bool = false;
var eol_written: usize = 0;
while (remainder_start < bytes.len) {
if (index_eol < remainder_start) {
if (std.mem.indexOf(u8, bytes[@intCast(remainder_start)..], self.end_of_line)) |index| {
index_eol = remainder_start + @as(isize, @intCast(index));
has_eol = true;
} else {
index_eol = @as(isize, @intCast(bytes.len));
has_eol = false;
}
}
const available_bytes = Self.line_length - self.end_of_line.len - self.end;
if (index_eol - remainder_start <= available_bytes) {
try self.free_writer.writeAll(bytes[@intCast(remainder_start)..@as(usize, @intCast(index_eol))]);
if (!has_eol) {
self.end += @as(usize, @intCast(index_eol - remainder_start));
} else {
try self.free_writer.writeAll(self.end_of_line);
eol_written += 1;
self.end = 0;
}
remainder_start = index_eol + @as(isize, @intCast(self.end_of_line.len));
} else {
try self.free_writer.writeAll(bytes[@intCast(remainder_start) .. @as(usize, @intCast(remainder_start)) + available_bytes]);
try self.free_writer.writeAll(self.end_of_line);
eol_written += 1;
remainder_start += @as(isize, @intCast(available_bytes));
self.end = 0;
}
}
return bytes.len;
}
};
}
I also wrote a small writer that behaves like the tee command so I have been thinking of collecting these functions (takes a writer and returns a modified one) into a repo. what do you think?
fn teeWriters(writer1: anytype, writer2: antype) TeeWriters(@TypeOf(writer1), @TypeOf(writer2)) {
return .{.writer1 = writer1, .writer2 = writer2};
}
fn TeeWriters(writer1_type: type, writer2_type: type) type {
return struct {
//! just like the tee command. Takes two writers and returns a structure with Writer interface
//! that redirects to both
writer1: writer1_type,
writer2: writer2_type,
const Self = @This();
pub const Error = writer1_type.Error || writer2_type.Error;
pub const Writer = std.io.Writer(*Self, Error, write);
pub fn write(self: *Self, bytes: []const u8) Error!usize {
const w1 = try self.writer1.write(bytes);
const m = @min(w1, bytes.len);
const w2 = try self.writer2.write(bytes[0..m]);
return w2;
}
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
};
}
Are you using -1
as a sort of null? An explicit optional would be better.
Also, what’s with the signed sizes? Even if you want to reserve a bit pattern as a null, you could set it to maxInt(usize)
.
This is so common that the compiler has a shortcut:
self.free_writer.writeAll(bytes[remainder_start ..][0..available_bytes]);
As to the logic, it was kind of complicated. How about this?
fn lineWriter(writer: anytype, comptime eol: []const u8) LineWriter(1024, eol, @TypeOf(writer)) {
return .{ .free_writer = writer };
}
fn LineWriter(comptime g_line_length: usize,
// I can't think of a reason for eol being a runtime argument.
// You should always know your eol at comptime.
// This leads to better optimizations.
comptime eol: []const u8,
comptime WriterType: type) type {
return struct {
const initial_remaining = g_line_length - eol.len;
const Remaining = std.math.IntFittingRange(0, initial_remaining);
const EolStage = std.math.IntFittingRange(0, eol.len - 1);
//! lineWriter takes a writer and an end of line delimiter and ensures that each line is
//! at most `g_line_length` length including the end of line delimiter.
//! Used in writing vcard specification because it requires each line to be max 1024 in length
//! including the \r\n
free_writer: WriterType,
remaining: Remaining = initial_remaining,
eol_stage: EolStage = 0
pub const Error = WriterType.Error;
pub const Writer = std.io.Writer(*Self, Error, write);
const Self = @This();
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
pub fn write(self: *Self, bytes: []const u8) Error!usize {
if(bytes[0] == eol[self.eol_stage]) {
// It seems like the caller is writing the eol manually
if(self.eol_stage == eol.len - 1) {
// Full eol has been written
self.eol_stage = 0;
self.remaining = initial_remaining;
} else {
self.eol_stage += 1;
}
} else {
// This is not a eol
self.eol_stage = 0;
if(self.remaining == 0) {
try writer.writeAll(eol);
self.remaining = initial_remaining;
}
}
self.remaining -= 1;
try self.free_writer.writeByte(bytes[0]);
return 1;
}
};
}
2 Likes
Thanks for the detailed response. I iterated on my solution a couple of times and it is great to see a better version. I just think there is a bug (and correct me if I am wrong). Let us assume initial_remaining=3
and eol="\r\n"
.
If bytes="abc\r\n"
then when reaching the ‘\r’ character remaining=0
and the lineself.remaining -= 1
will underflow.
1 Like
Yes, you’re right.
So, lets suppose the user is writing "abc\rd"
. In the conditions you described, this should be transformed into "abc\r\n\rd"
. The caller is allowed to write one byte a time, so we can’t count on peeking into the next bytes. When we receive the \r
, we don’t know if this will be an eol or just a random character.
The solution therefore, is to assume it is an eol. When the assumption is broken, i.e., when we see the character d
, we finish the eol that was written halfway, and then we resend the part of the eol that had been written. That is:
- We write
abc
as normal. remaining
is now 0.
- We see
\r
. Assume it is an eol. eol_stage
is now 1.
- We see
d
. Assumption is broken. However, we have eol_stage == 1
, so we know that the previous write was \r
, since that is the first character of our eol. In other words, we know that the user was trying to write \rd
.
- Finish the eol that was unfinished by sending
\n
.
- Rewrite what the user was trying to write, i.e, send the
\r
again and the finish by sending the d
.
I’ll see if I can figure this out.
I think I got it. Godbolt.
const std = @import("std");
fn lineWriter(
writer: anytype,
comptime line_length: usize,
comptime eol: []const u8,
) LineWriter(line_length, eol, @TypeOf(writer)) {
return .{ .free_writer = writer };
}
fn LineWriter(
comptime line_length: usize,
// I can't think of a reason for eol being a runtime argument.
// You should always know your eol at comptime.
// This leads to better optimizations.
comptime eol: []const u8,
comptime WriterType: type,
) type {
return struct {
const Remaining = std.math.IntFittingRange(0, line_length);
const EolStage = std.math.IntFittingRange(0, eol.len - 1);
/// lineWriter takes a writer and an end of line delimiter and ensures that each line is
/// at most `g_line_length` length including the end of line delimiter.
/// Used in writing vcard specification because it requires each line to be max 1024 in length
/// including the \r\n
free_writer: WriterType,
/// How many bytes can still be written to the line, discounting the eol
remaining: Remaining = line_length,
/// How many bytes of the eol were written
eol_stage: EolStage = 0,
pub const Error = WriterType.Error;
pub const Writer = std.io.Writer(*Self, Error, write);
const Self = @This();
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
pub fn write(self: *Self, bytes: []const u8) Error!usize {
if (bytes[0] == eol[self.eol_stage]) {
// It seems like the caller is writing the eol manually
if (self.eol_stage == eol.len - 1) {
// Full eol has been written
self.eol_stage = 0;
self.remaining = line_length;
try self.free_writer.writeByte(bytes[0]);
return 1;
} else {
self.eol_stage += 1;
}
} else {
// Not a eol
if (self.remaining <= eol.len) {
const missing_part = eol.len - self.remaining;
if (eol[missing_part] == bytes[0]) {
self.eol_stage = @intCast(missing_part + 1);
} else {
try self.free_writer.writeAll(eol[missing_part..]);
try self.free_writer.writeAll(eol[0..missing_part]);
self.remaining = line_length;
self.eol_stage = 0;
}
} else {
self.eol_stage = 0;
}
}
self.remaining -= 1;
try self.free_writer.writeByte(bytes[0]);
return 1;
}
};
}
const test_namespace = struct {
const line_length = 5;
const eol = "\r\n";
fn helper(src: []const u8, comptime expected: []const u8) !void {
var actual: [expected.len]u8 = undefined;
var stream = std.io.fixedBufferStream(&actual);
var lw = lineWriter(stream.writer(), line_length, eol);
try lw.writer().writeAll(src);
try std.testing.expectEqualSlices(u8, expected, &actual);
}
test "normal use" {
try helper("a", "a");
try helper("abc", "abc");
try helper("abcd", "abc\r\nd");
try helper("abcdef", "abc\r\ndef");
try helper("abcdefg", "abc\r\ndef\r\ng");
}
test "manual eol" {
try helper("\r\n", "\r\n");
try helper("a\r\n", "a\r\n");
try helper("ab\r\n", "ab\r\n");
try helper("ab\r\nc", "ab\r\nc");
try helper("abc\r\n", "abc\r\n");
try helper("abc\r\nd", "abc\r\nd");
try helper("abc\r\ndef", "abc\r\ndef");
try helper("abc\r\ndef\r\n", "abc\r\ndef\r\n");
try helper("abc\r\ndef\r\ng", "abc\r\ndef\r\ng");
}
test "fake eol" {
try helper("\r", "\r");
try helper("\ra", "\ra");
try helper("ab\r", "ab\r");
try helper("ab\rc", "ab\r\r\nc");
try helper("abc\rd", "abc\r\n\rd");
try helper("ab\r\r\ncd", "ab\r\r\ncd");
}
};
test {
std.testing.refAllDeclsRecursive(test_namespace);
}
1 Like