Reading file formats enforced by comptime ordering

Inspired by this excellent post How to enforce Function Call Ordering I wanted to try to update some of my personal libraries that focus on reading certain file formats like PNG and KTX2. Using comptime to enforce that all elements are read or discarded in the correct order. I have a generic OrderedReader type that basically enforces all reads or discards done in order of the struct fields given to the OrderedReader, the Idea being that the given type should represent the file format you are trying to read.

See here the implementation, some types are not yet supported and in order to parse PNG files for example I would need to support slices with runtime length.

const std = @import("std");
const debug = std.debug;
const Allocator = std.mem.Allocator;
const Io = std.Io;

pub fn OrderedReader(comptime StructT: type) type {
    return struct {
        const Self = @This();
        pub const FieldName = std.meta.FieldEnum(StructT);
        next_field_index: comptime_int = 0,

        pub fn field(comptime self: *Self, comptime name: FieldName) type {
            if (comptime @intFromEnum(name) != self.next_field_index) {
                const expected: FieldName = @enumFromInt(self.next_field_index);
                @compileError("." ++ @tagName(expected) ++ " is expected, but received ." ++ @tagName(name));
            }

            self.next_field_index += 1;
            const FT = comptime FieldType(name);

            const next_index = self.next_field_index;

            return switch (@typeInfo(FT)) {
                .bool => struct {
                    pub const discardUntil = ReaderImpl(name, next_index).discardUntil;
                    pub const discard = ReaderImpl(name, next_index).discard;
                    pub const read = ReaderImpl(name, next_index).read;
                    pub const take = ReaderImpl(name, next_index).takeBool;
                },
                .int => |i| struct {
                    pub const discardUntil = ReaderImpl(name, next_index).discardUntil;
                    pub const discard = ReaderImpl(name, next_index).discard;
                    pub const read = ReaderImpl(name, next_index).read;
                    pub const take = if (@divExact(i.bits, 8) == 1) ReaderImpl(name, next_index).takeByte else ReaderImpl(name, next_index).takeInt;
                },
                .@"enum" => struct {
                    pub const discardUntil = ReaderImpl(name, next_index).discardUntil;
                    pub const discard = ReaderImpl(name, next_index).discard;
                    pub const read = ReaderImpl(name, next_index).read;
                    pub const take = ReaderImpl(name, next_index).takeEnum;
                },
                .@"struct" => struct {
                    pub const discardUntil = ReaderImpl(name, next_index).discardUntil;
                    pub const discard = ReaderImpl(name, next_index).discard;
                    pub const read = ReaderImpl(name, next_index).read;
                    pub const take = ReaderImpl(name, next_index).takeStruct;
                },
                .array => struct {
                    pub const discardUntil = ReaderImpl(name, next_index).discardUntil;
                    pub const discard = ReaderImpl(name, next_index).discard;
                    pub const read = ReaderImpl(name, next_index).read;
                    pub const take = ReaderImpl(name, next_index).takeArray;
                },
                else => @compileError("Type cannot be read"),
            };
        }

        fn FieldType(comptime name: FieldName) type {
            return @FieldType(StructT, @tagName(name));
        }

        pub fn ReaderImpl(comptime f: FieldName, comptime next_index: comptime_int) type {
            const T = FieldType(f);

            return struct {
                inline fn discard(reader: *Io.Reader) !void {
                    try reader.discardAll(@sizeOf(T));
                }

                // TODO: How to handle slice fields?
                inline fn discardUntil(reader: *Io.Reader, name: FieldName) !void {
                    const next_field: FieldName = @enumFromInt(next_index);
                    comptime debug.assert(@intFromEnum(name) > @intFromEnum(next_field));
                    const n = @offsetOf(StructT, @tagName(name)) - @offsetOf(StructT, @tagName(f));
                    try reader.discardAll(n);
                }

                inline fn read(reader: *Io.Reader, buffer: []const u8) !void {
                    debug.assert(buffer.len >= @sizeOf(T));
                    try reader.readSliceAll(buffer[0..@sizeOf(T)]);
                }

                inline fn takeInt(reader: *Io.Reader, endian: std.builtin.Endian) !T {
                    return reader.takeInt(T, endian);
                }

                inline fn takeEnum(reader: *Io.Reader, endian: std.builtin.Endian) !T {
                    return reader.takeEnum(T, endian);
                }

                inline fn takeBool(reader: *Io.Reader) !bool {
                    return (try reader.takeByte()) == 1;
                }

                inline fn takeByte(reader: *Io.Reader) !bool {
                    return reader.takeByte();
                }

                inline fn takeStruct(reader: *Io.Reader, endian: std.builtin.Endian) !T {
                    return reader.takeStruct(T, endian);
                }

                inline fn takeArray(reader: *Io.Reader) !T {
                    const array_ptr = try reader.takeArray(@sizeOf(T));
                    return std.mem.bytesAsValue(T, array_ptr).*;
                }
            };
        }
    };
}

test "OrderedReader enforces read's to be in order" {
    const builtin = @import("builtin");
    const testing = std.testing;
    const native_endian = builtin.cpu.arch.endian();

    const data: TestStruct = .{
        .signature = .{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
        .header = .{
            .size = @sizeOf(TestBody),
        },
        .body = .{
            .i = 12,
            .j = 13,
            .k = .i,
            .l = .j,
            .m = true,
        },
    };

    const source = std.mem.asBytes(&data);
    var reader: Io.Reader = .fixed(source);
    comptime var ordered_reader: OrderedReader(TestStruct) = .{};

    // If swapped with header a compile time error will occur. The ordered reader enforces
    // the order of reads to be the same as the order of struct fields passed to OrderedReader.
    const signature = try ordered_reader.field(.signature).take(&reader);
    try testing.expectEqualSlices(u8, &data.signature, &signature);

    const header = ordered_reader.field(.header).take(&reader, native_endian);
    try testing.expectEqualDeep(data.header, header);

    const body = ordered_reader.field(.body).take(&reader, native_endian);
    try testing.expectEqualDeep(data.body, body);
}

On it’s own it works well and does what I would expect it to do. But now I want to try to integrate this type in a more specialized reader, here I’m running into some troubles. Here one variation I tried:

const Ktx2 = struct {
    pub const SuperCompression = enum(u32) { none, basislz, zstandard, zlib, _ };

    pub const Header = extern struct {
        format: u32,
        type_size: u32,
        width: u32,
        height: u32,
        depth: u32,
        layers: u32,
        faces: u32,
        levels: u32,
        supercompression: SuperCompression,
    };

    pub const Index = extern struct {
        fd_byte_offset: u32,
        dfd_byte_length: u32,
        kvd_byte_offset: u32,
        kvd_byte_length: u32,
        sgd_byte_offset: u64,
        sgd_byte_length: u64,
    };

    pub const signature = .{ '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' };

    identifier: [12]u8,
    header: Header,
    index: Index,
};

pub fn Ktx2Reader(comptime order: *OrderedReader(Ktx2)) type {
    return struct {
        const Self = @This();
        pub const TakeHeaderError = error{ InvalidSignature, InvalidLevelCount } || Io.Reader.Error;

        reader: *Io.Reader,

        pub fn init(reader: *Io.Reader) Ktx2Reader {
            return .{
                .reader = reader,
            };
        }

        pub fn takeHeader(self: *Self) TakeHeaderError!Ktx2.Header {
            const sig = order.field(.identifier).take(self.reader);

            if (!std.mem.eql(u8, &sig, &Ktx2.signature)) return error.InvalidSignature;

            return order.field(.header).take(self.reader);
        }

        pub fn takeIndex(self: *Self) !Ktx2.Index {
            return order.field(.index).take(self.reader);
        }
    };
}

test "Can read header" {
    const testing = std.testing;
    const io = testing.io;

    const cwd = Io.Dir.cwd();

    for (test_ktx2_files) |f| {
        const file = try cwd.openFile(io, f.path, .{});
        var buffer: [128]u8 = undefined;
        var fr = file.reader(io, &buffer);
        const reader = &fr.interface;

        comptime var order: OrderedReader(Ktx2) = .{};
        var ktx2_reader: Ktx2Reader(&order) = .init(reader);
        const header = try ktx2_reader.takeHeader();

        try testing.expectEqual(f.format, header.format);
        try testing.expectEqual(f.type_size, header.type_size);
        try testing.expectEqual(f.width, header.width);
        try testing.expectEqual(f.height, header.height);
        try testing.expectEqual(f.depth, header.depth);
        try testing.expectEqual(f.layers, header.layers);
        try testing.expectEqual(f.faces, header.faces);
        try testing.expectEqual(f.levels, header.levels);
        try testing.expectEqual(f.scomp, header.supercompression);
    }
}

Unfortunatly this results in the following error:

src/root.zig:40:12: error: captured value contains reference to comptime var
    return struct {
           ^~~~~~
src/root.zig:83:18: note: 'order' points to comptime var declared here
        comptime var order: OrderedReader(Ktx2) = .{};
        ~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
src/root.zig:84:36: note: called at comptime here
        var ktx2_reader: Ktx2Reader(&order) = .init(reader);
                         ~~~~~~~~~~^~~~~~~~

Some other variations I tried are

  • Embedding the ordered reader into the Ktx2Reader
  • Passing the ordered reader as parameter to takeHeader/takeIndex functions
    this resulted in a similar error.

Is there any way to work arround this issue?

1 Like

You are trying to mix a runtime variable with a comptime variable, how is that supposed to work? (the lifetimes of comptime vars and normal vars don’t overlap, the former exist while the compiler runs the latter when the compiler is done and your program runs)
You need to separate the runtime and comptime parts so that they don’t depend on another.

With OrderedReader you have a clear separation between the field method that runs at comptime which just returns a type and the ReaderImpl which then receives the runtime reader.

Ktx2Reader tries to mix them both so it doesn’t work.
It shouldn’t take/store the reader, be a comptime var itself and then generate something which has a function that takes a reader at runtime, similar to what you have done in OrderedReader.


I don’t really understand what your intention is with this OrderedReader, wouldn’t it be easier to describe the order in a declarative way in some way and then use that description and generate reading/writing code for it in one step?

Thank you for your reply!
I think I have some miss understanding of zig’s comptime, I have a difficult time seeing where I’m here mixing runtime and comptime variables as the parameter order is defined to be comptime and the variable is comptime as well?

I don’t really understand what your intention is with this OrderedReader, wouldn’t it be easier to describe the order in a declarative way in some way and then use that description and generate reading/writing code for it in one step?

The main reason was to enforce reading in a specified comptime enforced order. For example with KTX2 You always would need to first read the Identifier, than the header, than the index etc, if you would try to read the header first you will get garbage values. I saw the mentioned post and thought it might be interesting to experiment with. What do you mean exactly with declarative here? Is the ordered reader not exactly that? because you pass the exact format using the StructT?

Maybe for some extra context. Here the Ktx2Reader defined as OrderedReader(Ktx2)

const Ktx2Reader = OrderedReader(Ktx2);

const Ktx2 = struct {
    pub const SuperCompression = enum(u32) { none, basislz, zstandard, zlib, _ };

    pub const Header = extern struct {
        format: u32,
        type_size: u32,
        width: u32,
        height: u32,
        depth: u32,
        layers: u32,
        faces: u32,
        levels: u32,
        supercompression: SuperCompression,
    };

    pub const Index = extern struct {
        fd_byte_offset: u32,
        dfd_byte_length: u32,
        kvd_byte_offset: u32,
        kvd_byte_length: u32,
        sgd_byte_offset: u64,
        sgd_byte_length: u64,
    };

    pub const signature = .{ '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' };

    identifier: [12]u8,
    header: Header,
    index: Index,
};

And here it’s usage:

test "Can read header" {
    const testing = std.testing;
    const io = testing.io;

    const cwd = Io.Dir.cwd();

    for (test_ktx2_files) |f| {
        const file = try cwd.openFile(io, f.path, .{});
        var buffer: [128]u8 = undefined;
        var fr = file.reader(io, &buffer);
        const reader = &fr.interface;

        comptime var ktx2_reader: Ktx2Reader = .{};

        try ktx2_reader.field(.identifier).discard(reader);
        const header = try ktx2_reader.field(.header).take(reader, .little);

        try testing.expectEqual(f.format, header.format);
        try testing.expectEqual(f.type_size, header.type_size);
        try testing.expectEqual(f.width, header.width);
        try testing.expectEqual(f.height, header.height);
        try testing.expectEqual(f.depth, header.depth);
        try testing.expectEqual(f.layers, header.layers);
        try testing.expectEqual(f.faces, header.faces);
        try testing.expectEqual(f.levels, header.levels);
        try testing.expectEqual(f.scomp, header.supercompression);
    }
}

This works but it misses the validation of the signature and forces the user to handle that, I could build a more specialized OrderedReader that focusses only on Ktx2 but I wanted to try the generic approach so I could use it for other file formats as well.

In case you mean provide a generic way to build a reader and than read the file in one go. The reason for not doing that is because KTX2 Is a file format focussed on streaming, allowing you to read each mipmap level and display it while moving on to the next.

I mean coming up with a way to describe it in one batch instead of piecemeal throughout many different functions, if you define it incrementally by accumulating the order than you need to be more careful about avoiding a situation where your comptime and runtime parts become too coupled to another.

Instead you could have something like:

pub const order = .{ .identifier, .header, .index };

Or just reflect on the field order and enforce that that has to be in the expected order (or if there is no order declaration on the struct).

You are passing a pointer to the comptime mutable

into:

pub fn Ktx2Reader(comptime order: *OrderedReader(Ktx2)) type {

here:

Ktx2Reader(&order)

That returns a type, which now captures that reference to the comptime var order.


If however you write your Ktx2Reader in such a way that all the uses of order happen outside of the returned type, than it works (because then the resulting type doesn’t have a dependency on order itself):

pub fn Ktx2Reader(comptime order: *OrderedReader(Ktx2)) type {
    const Identifier = order.field(.identifier);
    const Header = order.field(.header);
    const Index = order.field(.index);
    return struct {
        const Self = @This();
        pub const TakeHeaderError = error{ InvalidSignature, InvalidLevelCount } || Io.Reader.Error;

        reader: *Io.Reader,

        pub fn init(reader: *Io.Reader) Self {
            return .{
                .reader = reader,
            };
        }

        pub fn takeHeader(self: *Self) TakeHeaderError!Ktx2.Header {
            const sig = try Identifier.take(self.reader);

            if (!std.mem.eql(u8, &sig, &Ktx2.signature)) return error.InvalidSignature;

            const builtin = @import("builtin");
            const native_endian = builtin.cpu.arch.endian();
            return Header.take(self.reader, native_endian);
        }

        pub fn takeIndex(self: *Self) !Ktx2.Index {
            return Index.take(self.reader);
        }
    };
}

I can’t test completely because I don’t want to come up with my own test data.

1 Like

Ah this clears the comptime issue up for me. So as long as I don’t reference the comptime variable inside the returned type I should be ok, Thnx! Your solution looks valid with some minor tweaks, will try it and let you know!

1 Like

Tried it, it does work but this won’t have the required effect. Now I’m able to call takeIndex before takeHeader. This because you already have called the order.field() functions before the user actually calls them.

This is essentially achieved by

pub const FieldName = std.meta.FieldEnum(StructT);

You could pass order to the functions directly (but I find the excessive use of inline potentially troubling):

pub const Ktx2Reader = struct {
    const Self = @This();
    pub const TakeHeaderError = error{ InvalidSignature, InvalidLevelCount } || Io.Reader.Error;

    reader: *Io.Reader,

    pub fn init(reader: *Io.Reader) Self {
        return .{
            .reader = reader,
        };
    }

    pub inline fn takeHeader(self: *Self, comptime order: *OrderedReader(Ktx2)) TakeHeaderError!Ktx2.Header {
        const sig = try order.field(.identifier).take(self.reader);

        if (!std.mem.eql(u8, &sig, &Ktx2.signature)) return error.InvalidSignature;

        const builtin = @import("builtin");
        const native_endian = builtin.cpu.arch.endian();
        return order.field(.header).take(self.reader, native_endian);
    }

    pub inline fn takeIndex(self: *Self, comptime order: *OrderedReader(Ktx2)) !Ktx2.Index {
        return order.field(.index).take(self.reader);
    }
};

Hmm, I don’t like it either. Plus the API would require indeed the order to be passed each time, that doesn’t really feel ergonomic. I’m leaning more and more to a non generic version of OrderedReader where instead of switching on type to return the available functions, to switch on field index and return specific functions based on that.

No I mean creating a static stateless description about the data that completely describes everything about the fields, their order what is allowed to be read when etc., and then get that description and generate a type with methods from it, that do the right thing according to the description. Basically it would be similar to a more complex format function spitting out a custom piece of code.

But I see that you are pretty invested in this idea of comptime enforced piecemeal construction of function call order. While that makes everything statically enforceable which might be a good thing in certain situations, it also could make it so that you inline more stuff than necessary.

Basically sometimes it may make more sense to keep some runtime index that is used for asserting ordering checks and avoid the inline based explosion of code size. That said I think it is something you would have to investigate in detail, it is difficult to predict what code would result in duplication or what would be magically optimized away by the compiler.

Here a non generic version for example

const Ktx2 = struct {
    pub const SuperCompression = enum(u32) { none, basislz, zstandard, zlib, _ };

    pub const Header = extern struct {
        format: u32,
        type_size: u32,
        width: u32,
        height: u32,
        depth: u32,
        layers: u32,
        faces: u32,
        levels: u32,
        supercompression: SuperCompression,
    };

    pub const Index = extern struct {
        fd_byte_offset: u32,
        dfd_byte_length: u32,
        kvd_byte_offset: u32,
        kvd_byte_length: u32,
        sgd_byte_offset: u64,
        sgd_byte_length: u64,
    };

    pub const signature = .{ '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' };

    header: Header,
    index: Index,
};

pub const Ktx2Reader = struct {
    const Self = @This();
    pub const FieldName = std.meta.FieldEnum(Ktx2);
    next_field_index: comptime_int = 0,

    pub fn field(comptime self: *Self, comptime name: FieldName) type {
        if (comptime @intFromEnum(name) != self.next_field_index) {
            const expected: FieldName = @enumFromInt(self.next_field_index);
            @compileError("." ++ @tagName(expected) ++ " is expected, but received ." ++ @tagName(name));
        }

        self.next_field_index += 1;

        return switch (name) {
            .header => struct {
                pub fn take(reader: *Io.Reader) !Ktx2.Header {
                    const sig = try reader.takeArray(12);

                    if (!std.mem.eql(u8, &sig.*, &Ktx2.signature)) return error.InvalidSignature;

                    return reader.takeStruct(Ktx2.Header, .little);
                }
            },
            .index => struct {
                pub fn take(reader: *Io.Reader) !Ktx2.Index {
                    return reader.takeStruct(Ktx2.Index, .little);
                }
            },
        };
    }
};

This will force .header to be called before .index and allow .header to do the signature check. Unfortunately it loses the re-usability of the OrderedReader.

I dunno, call me old school if you like, but what about just calling those functions in the right order and telling / documenting that it needs to be done that way?

I think I am not completely sold on this idea of enforcing function call order, it seems like a neat gimmick, but I am not sure whether it is really worth it in so many cases.

1 Like

Hmm I’m not too invested, just experimenting, wanted to see how far I could take this. :slight_smile:
It sounds interesting and might try it that way as well, as I’m not seeing to much benefits of using the non-generic ordered reader. It will only enforce call order at comptime but you loose quite a bit of flexibility etc. And for a lot of formats, KTX2 included you will at some point run into the issue that the size of data to be read is defined inside the file, this will also make it difficult for example to discard till a certain field because you don’t know how much data is inbetween, only the order.

I agree, I have a feeling it might be better fitted in cases where you would read small non variable chunks of bytes for example. for complex formats you run quickly into difficult challenges that might be able to be overcome but most likely result in difficult to read code.

1 Like

If I find some other way to achieve this I’ll post it here, thnx anyways, I atleast learned some new stuff about comptime :smiley:

1 Like