String formatting in a bit more complex cases

Hi, ive been trying to get some formatting done where a writer.print() isnt enough. I am extremely frustated with this.

test authorPrint {
    const authors = [_][]const u8{
        "a",
        "b",
        "c",
        "d",
    };
    try testing.expectEqualStrings("Written by a, b, c and d.\n", try authorPrint(&authors));
}

(if you can get the function working the test may fail from punctuation marks but thats not relevant)

Ive tried among a lot of things
naive array approach

fn authorPrint(comptime authors: []const []const u8) ![]const u8 {
    if (authors.len == 0) {
        return "";
    }

    var str: [1000:0]u8 = "Written by ";
    for (authors, 1..) |author, i| {
        if (i < 10) {
            str = str ++ author;
            if (i < authors.len - 1) {
                str = str ++ ", ";
            } else if (i == authors.len - 1) {
                str = str ++ " and ";
            }
        } else {
            str = str ++ " and others";
        }
    }
    str = str ++ ".\n";

    std.debug.print("{s}", .{str});
    return str;
}

and array buffer print

fn authorPrint(comptime authors: []const []const u8) ![]const u8 {
    if (authors.len == 0) {
        return "";
    }

    var str: [1000:0]u8 = undefined;
    var a: []u8 = try std.fmt.bufPrint(&str, "Written by ", .{});
    for (authors, 1..) |author, i| {
        if (i < 10) {
            a = try std.fmt.bufPrint(&str, "{s} ", .{author});
            if (i < authors.len - 1) {
                a = try std.fmt.bufPrint(&str, ", ", .{});
            } else if (i == authors.len - 1) {
                a = try std.fmt.bufPrint(&str, " and ", .{});
            }
        } else {
            a = try std.fmt.bufPrint(&str, " and others ", .{});
        }
    }
    a = try std.fmt.bufPrint(&str, ".\n", .{});
    std.debug.print("{s}", .{a});
    return a;
}

and buffer with alocation

fn authorPrint(comptime authors: []const []const u8) ![]const u8 {
    if (authors.len == 0) {
        return "";
    }

    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();

    var string = std.ArrayList(u8).init(allocator);
    defer string.deinit();

    try string.appendSlice("Written by ");
    for (authors, 1..) |author, i| {
        if (i < 10) {
            try string.appendSlice(author);
            if (i < authors.len - 1) {
                try string.appendSlice(", ");
            } else if (i == authors.len - 1) {
                try string.appendSlice(" and ");
            }
        } else {
            try string.appendSlice(" and others");
        }
    }
    try string.appendSlice(".\n");
    std.debug.print("{s}", .{string.items});
    return string.items;
}

and switch

fn authorPrint(comptime authors: []const []const u8) ![]const u8 {
    const str = switch (authors.len) {
        0 => "",
        1 => "Written by {s}.\n",
        2...9 => "Written by " ++ "{s}, " ** (authors.len - 1) ++ "and {s}.\n",
        else => "Written by " ++ "{s}, " ** 10 ++ "and others.\n",
    };
    std.debug.print("{s}", .{str});
    return std.fmt.comptimePrint(str, .{authors});
    return "";
}

and comptimePrint, inline for loop…
:sob:
no wonder the c code im using as reference literally just goes and switches the 10 cases

This would have worked if you used inline 2...9 =>.
I think what you want is this:

fn authorPrint(writer: anytype, authors: []const []const u8) !void {
    if(authors.len == 0) return;
    try std.fmt.format(writer, "Written by {s}", .{authors[0]});
    if(authors.len > 1){
        const withComma = @min(authors.len - 1, 9);
        for(authors[1..withComma]) |author|{
            try std.fmt.format(writer, ", {s}", .{author});
        }
        try std.fmt.format(writer, " and {s}", .{if(authors.len > 10)
            "others"
            else 
            authors[withComma]});
    }
    try std.fmt.format(writer, ".\n", .{});
}

Example

2 Likes

I’ve tried creating a custom formatter.

const std = @import("std");

const AuthorsFmt = struct {
    authors: []const []const u8,

    pub fn format(self: AuthorsFmt, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
        if (self.authors.len == 0) return;

        const MAX_AUTHORS = 10;

        try writer.writeAll("Written by ");
        try writer.writeAll(self.authors[0]);

        if (self.authors.len > 1) {
            for (1..@min(self.authors.len, MAX_AUTHORS)-1) |i| {
                try writer.writeAll(", ");
                try writer.writeAll(self.authors[i]);
            }

            if (self.authors.len <= MAX_AUTHORS) {
                try writer.writeAll(" and ");
                try writer.writeAll(self.authors[self.authors.len-1]);
            }
            else {
                try writer.writeAll(", ");
                try writer.writeAll(self.authors[MAX_AUTHORS-1]);
                try writer.writeAll(" and others");
            }
        }
        try writer.writeAll(".\n");
    }
};

pub fn main() !void {
    single: {
        const authors = &.{"a"};
        std.debug.print("{}", .{AuthorsFmt{.authors = authors}});
        break:single;
    }
    pair: {
        const authors = &.{"a", "b"};
        std.debug.print("{}", .{AuthorsFmt{.authors = authors}});
        break:pair;
    }
    just_10: {
        const authors = &.{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"};
        std.debug.print("{}", .{AuthorsFmt{.authors = authors}});
        break:just_10;
    }
    over_10: {
        const authors = &.{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
        std.debug.print("{}", .{AuthorsFmt{.authors = authors}});
        break:over_10;
    }
}

Result:

$ zig build run
Written by a.
Written by a and b.
Written by a, b, c, d, e, f, g, h, i and j.
Written by a, b, c, d, e, f, g, h, i, j and others.

Note:
I didn’t use Writer.print because of avoiding overheads.
And if you ware to build a state machine, you could write it more concisely.

2 Likes

Sadly no. That particular attempt perfectly creates the formatting string even without that inline, but it cant fill it with the array items.

And the other code you posted does look quite good but im not being able to make it work because i dont know how to use writers yet and my attempts are utterly failing.
But also is there no way to generate the str in place? Especially since all of it is comptime known.
Having to pass a writer from outside the function into it when its only needed inside seems to be adding unnecessary(?) complexity.

Dont wanna be ungrateful, but isnt this a bit overengineered?

The good thing about using a writer is flexibility and not having to either pre-compute the final length of the string, or repeatedly reallocate the string that is being accumulated in something like an ArrayList.
If you are printing to stdout or writing to a file the append to the output can be done directly without all the temporary additional allocations that would happen if you turn it into one big string first and print it then.

When such a format function is defined on a type and you try to print that instance, then that function is automatically invoked with a writer, you don’t have to manually provide it.

Also take a look at this post:

2 Likes

The writer interface works at comptime. In general, in Zig, you want to write your functions to be runtime-compatible, then, at the call site, you just put the comptime keyword in front of it. You can make a comptime var buffer, create a writer from it, and write to it.
Like @Sze mentioned, using the writer adds flexibility, as it separates the act of writing from the destination of the write. The same code can write to a memory buffer just as well as socket.

2 Likes

I see.

And would it be reasonable then to have a wrapper function around it for the sake of not having to create the writer in the function that you want to call the format from?
In the post that @Sze linked it gave me the impression thats not good practice.

(also i somehow had missed the linked example in your first post and just saw it, that does solve the problem i was having with not figuring out hoow too work a writer hahah, thanks)

1 Like

Sure:

fn authorPrint2(comptime authors: []const []const u8) []const u8 {
    comptime {
        var buffer: [1000]u8 = undefined;
        var stream = std.io.fixedBufferStream(&buffer);
        authorPrint(stream.writer(), authors) catch unreachable;
        const written = stream.getWritten();
        const final = written[0..written.len].*;
        return &final;
    }
}

Example

The standard library has a bunch of ready-made writers. FixedStream is for turning a chunk of memory into a stream, but you can get a writer from an array list, a file, and others. The reader/writer interface is pretty cool. Once you learn how to use it, you’re gonna want to use it everywhere.

1 Like

Great! Thanks so much

I think i just have one last question then

would that mean that this

fn version(writer: anytype) !void {
    try writer.writeAll("imagine this as a complexly formatted str thats comptime known\n");
}

pub fn main() !void {
    // argument handling

    const stdout_writer = std.io.getStdOut().writer();
    var buffered_writer = std.io.bufferedWriter(stdout_writer);
    const buffered_stdout = buffered_writer.writer();

    if (arg == "--version") {
        try version(buffered_stdout);
    }
    try buffered_writer.flush();
}

has the final string already calculated before you actually call the stdout buffered writer?
I would imagine that the calling to the stdout writer is runtime so i would need to get the string into a comptime var and then print that. But maybe thats not the case.

youve been very helpful :slight_smile:

Yes, in the case you provided the string is definitely comptime-known, because you are providing as a string literal. Therefore, that string will live in the constant data section of your executable, and, at runtime, you’re only providing a pointer to that string. If you do more complex stuff with your string, you need to be careful to properly separate runtime code from comptime code. The compiler will do that for you to some degree, but if it fails, some of the formatting might happen at runtime without you realizing it, which is a waste of performance. Remember to use the comptime keyword to force the formatting to happen at comptime. This way, if you mix runtime with comptime code, the compiler will complain.

1 Like

Thanks a ton :smiley: