Deserializing tagged JSON objects with metaprogramming

TL;DR: I want to get around that Type.Structs with decls can’t be reified, or figure out some other way to express my ideas below.

I’m trying to deserialize json objects, where some objects are part of a union that are discriminated by one of their fields, so for example, an integer object will look like this {"type": "integer", ...} and a boolean will look like this {"type": "boolean", ...}, and there is some other type which contain a field what is the union of these. My first attempt at doing this looked something like this

const std = @import("std");
const Int = struct { ty: []const u8 = "integer", max: ?i32, min: ?i32 };
const Bool = struct { ty: []const u8 = "boolean", default: ?bool };
const Field = union(enum) {
    int: Int,
    bool: Bool,
};
test parse {
    const input = \\
        \\ { "ty": "integer" }
    ;
    _ = std.json.parseFromSlice(Field, std.testing.allocator, input, .{});
}

This doesn’t work, for afaiu because the json parser expects the json object corresponding to Field to look like this { "int": { "ty": "integer" } }. I.e., the current parser expects the discriminator to be key and not part of the object.

I have created a version of field with a custom jsonParse and jsonParseFromValue functions, but this is a pattern that will be replicated for more unions, so I wanted to create some helper functions that allow me to do two things:

  1. Create structs with a const declaration of the discriminant, like this { const ty = "integer"; }, the other fields, and adds custom jsonParse and jsonParseFromValue to take into account that one of the fields of the json object is now a declaration and not a field.
  2. Create a helper to create unions that will deserialize the unions as explained above (i.e. based on the discriminant field), taking the decl’s name and adds custom jsonParse and jsonParseFromValue functions.

Here I ran into an issue with not being able to reify Type.Structs with decls in them, and I don’t see any way to create such helpers without adding decls to Type.Structs, which is forbidden by design.

Here's some WIP code that I've tested my idea with.
const std = @import("std");
const testing = std.testing;

const Integer = struct {
    pub const @"type": []const u8 = "integer";
    min: ?i64 = null,
    max: ?i64 = null,
    default: ?i64 = null,

    pub fn jsonParse(allocator: std.mem.Allocator, source: anytype, options: std.json.ParseOptions) !@This() {
        const parsed = try std.json.innerParse(std.json.Value, allocator, source, options);

        if (parsed != .object) {
            return error.UnexpectedToken;
        }

        return jsonParseFromValue(allocator, parsed, options);
    }
    pub fn jsonParseFromValue(allocator: std.mem.Allocator, source: std.json.Value, options: std.json.ParseOptions) !@This() {
        if (!std.mem.eql(u8, @This().type, source.object.get("type").?.string)) {
            return error.MissingField;
        }
        var v: @This() = undefined;
        inline for (@typeInfo(@This()).Struct.fields) |field| {
            if (source.object.get(field.name)) |field_name| {
                const p_ = try std.json.parseFromValue(field.type, allocator, field_name, options);
                @field(v, field.name) = p_.value;
            } else if (@typeInfo(field.type) == .Optional) {
                @field(v, field.name) = null;
            }
        }
        return v;
    }
};

pub fn TaggedJsonStruct(comptime s: type, comptime tag: []const u8) type {
    const MethodContainer = struct {
        pub fn jsonParse(allocator: std.mem.Allocator, source: anytype, options: std.json.ParseOptions) !@This() {
            const parsed = try std.json.innerParse(std.json.Value, allocator, source, options);

            if (parsed != .object) {
                return error.UnexpectedToken;
            }

            return jsonParseFromValue(allocator, parsed, options);
        }
        pub fn jsonParseFromValue(allocator: std.mem.Allocator, source: std.json.Value, options: std.json.ParseOptions) !@This() {
            if (!std.mem.eql(u8, @This().type, source.object.get("type").?.string)) {
                return error.MissingField;
            }
            var v: @This() = undefined;
            inline for (@typeInfo(@This()).Struct.fields) |field| {
                if (source.object.get(field.name)) |field_name| {
                    const p_ = try std.json.parseFromValue(field.type, allocator, field_name, options);
                    @field(v, field.name) = p_.value;
                } else if (@typeInfo(field.type) == .Optional) {
                    @field(v, field.name) = null;
                }
            }
            return v;
        }
    };
    _ = tag;
    var S_Type = @typeInfo(s);
    inline for (S_Type.Struct.decls) |decl| {
        @compileLog("struct ?= {}", .{decl});
    }
    S_Type.Struct.decls = S_Type.Struct.decls ++ @typeInfo(MethodContainer).Struct.decls;
    @compileLog("??? = {}", .{S_Type});
    return @Type(S_Type);
}

test "TaggedJsonStruct" {
    const T = TaggedJsonStruct(struct {
        const ty = "abc";
    }, "ty");
    _ = T;
}

const Boolean = struct {
    const @"type": []const u8 = "boolean";
    default: ?bool = null,
};

const Field = union(enum) {
    integer: Integer,
    boolean: Boolean,
};

test "integer.json" {
    const input =
        \\{ "type": "integer", "default": 0}
    ;
    const parsed = try std.json.parseFromSlice(Integer, std.testing.allocator, input, .{});
    defer parsed.deinit();
    try std.testing.expectEqual(null, parsed.value.max);
    try std.testing.expectEqual(0, parsed.value.default);
}

What I need help with is

  1. Is there any way to salvage my idea using metaprogramming to generate these functions or do I have to build them by hand?
  2. Is there some part of the json library that supports these kinds of discriminated tags that I’ve missed?
2 Likes

If you can provide a tag field ("int"/"bool") in the json string, the parsing will succeed.
Additionally, you neet to specify optional field defaults too.

For instance:

const std = @import("std");

const Int = struct { ty: []const u8 = "integer", max: ?i32 = null, min: ?i32 = null };
const Bool = struct { ty: []const u8 = "boolean", default: ?bool = null };

const Field = union(enum) {
    int: Int,
    bool: Bool,
};

test "parse" {
    const expect: Field = .{
        .int = .{.ty = "integer"},
    };

    const input = \\
        \\ { "int": { "ty": "integer" } }
    ;
    const result = try std.json.parseFromSlice(Field, std.testing.allocator, input, .{});
    result.deinit();
    try std.testing.expectEqualDeep(expect, result.value);
}

I am aware that this is a solution, but I do not control the format of the json objects.

If you add a jsonParse() to Field, you can do the json parsing yourself. I’m not sure of the json format so this is incomplete, but compiles with zig 0.14-dev:

const std = @import("std");

const Int = struct { max: ?i32, min: ?i32 };
const Bool = struct { default: ?bool };
const Field = union(enum) {
    integer: Int,
    boolean: Bool,
    const Ty = std.meta.Tag(Field);

    pub fn jsonParse(alloc: std.mem.Allocator, source: *std.json.Scanner, options: std.json.ParseOptions) !Field {
        const t1 = try source.next();
        if (t1 != .object_begin) return error.UnexpectedToken;
        const t2 = try source.next();
        if (t2 != .string) return error.UnexpectedToken;
        if (!std.mem.eql(u8, t2.string, "ty")) return error.UnexpectedToken;
        const t3 = try source.next();
        if (t3 != .string) return error.UnexpectedToken;
        const ty = std.meta.stringToEnum(Ty, t3.string) orelse return error.UnexpectedToken;
        switch (ty) {
            .integer => return .{ .integer = try std.json.innerParse(Int, alloc, source, options) },
            .boolean => return .{ .boolean = try std.json.innerParse(Bool, alloc, source, options) },
        }
    }
};

test "parse" {
    const input =
        \\
        \\ { "ty": "integer" }
    ;
    _ = try std.json.parseFromSlice(Field, std.testing.allocator, input, .{});
}

Sorry I should have read instead of skimmed. I see now that your question is more about the meta programming than json parsing.

To get around the ‘no decls’ issue when reify-ing types, you might just wrap the reified types so that you can add decls. Otherwise, you’ll likely have to write those out by hand.

So maybe something like this?:

pub fn TaggedJsonStruct(comptime s: type, comptime tag: []const u8) type {
    return struct {
        s: s,
        pub fn jsonParse(allocator: std.mem.Allocator, source: anytype, options: std.json.ParseOptions) !@This() {
// ...

I tried to simplify the code a bit and ended up removing some of the meta programming. What do you think about this solution? This code compiles and tests are passing.

const std = @import("std");
const testing = std.testing;

const Integer = struct {
    min: ?i64 = null,
    max: ?i64 = null,
    default: ?i64 = null,

    pub const @"type": []const u8 = "integer";
};

const Boolean = struct {
    default: ?bool = null,

    const @"type": []const u8 = "boolean";
};

const Field = union(enum) {
    integer: Integer,
    boolean: Boolean,

    pub fn jsonParse(allocator: std.mem.Allocator, source: *std.json.Scanner, options: std.json.ParseOptions) !@This() {
        const parsed = try std.json.innerParse(std.json.Value, allocator, source, options);

        if (parsed != .object) {
            return error.UnexpectedToken;
        }

        return jsonParseFromValue(allocator, parsed, options);
    }

    pub fn jsonParseFromValue(allocator: std.mem.Allocator, source: std.json.Value, options: std.json.ParseOptions) !@This() {
        if (source.object.get("type")) |ty| {
            var opts = options;
            opts.ignore_unknown_fields = true;
            if (std.mem.eql(u8, ty.string, Boolean.type)) {
                return .{ .boolean = try std.json.parseFromValueLeaky(Boolean, allocator, source, opts) };
            } else if (std.mem.eql(u8, ty.string, Integer.type)) {
                return .{ .integer = try std.json.parseFromValueLeaky(Integer, allocator, source, opts) };
            }
        }
        return error.MissingField;
    }
};

test "boolean.json" {
    const input =
        \\{ "type": "boolean", "default": false}
    ;
    const parsed = try std.json.parseFromSlice(Field, std.testing.allocator, input, .{});
    defer parsed.deinit();
    try std.testing.expectEqual(.boolean, std.meta.activeTag(parsed.value));
    try std.testing.expectEqual(false, parsed.value.boolean.default);
}

test "integer.json" {
    const input =
        \\{ "type": "integer", "default": 0}
    ;
    const parsed = try std.json.parseFromSlice(Field, std.testing.allocator, input, .{});
    defer parsed.deinit();
    std.debug.print("{}\n", .{parsed.value});
    try std.testing.expectEqual(.integer, std.meta.activeTag(parsed.value));
    try std.testing.expectEqual(null, parsed.value.integer.max);
    try std.testing.expectEqual(0, parsed.value.integer.default);
}

2 Likes

Sorry for confusing you, I wanted to describe my desired outcome to make sure I wasn’t skipping some obvious stdlib solution. Thanks for taking your time.

This is very close to what I have in another project. This works for small unions but I have multiple of these unions and I don’t want to have to change the function when adding or removing fields.

I think I know what I want to do now, got a bit frustrated that my initial approach to this problem (using my Python metaprogramming brain) didn’t work. I think that instead of creating a function that takes a struct and returns a struct I’ll make functions that create the specific functions I want and put them inside a structs I declare normally. Something like

const Field = union(enum) {
    ...
    const jsonParse = makeJsonParse(Field, ...);
};

I don’t see any reason that shouldn’t work but I thought my other solution would work as well :sweat_smile:

I’ll come back when I’ve tried this!

I think next time you should start with showing the non-generic version and then say in which ways you want to make it generic, that way people have a way clearer picture of what is going on.

So basically the combination of @Travis’s answer and yours, if you have all that information it is good when the answering person also has that info so they don’t suggest things that reverse engineer what you have already gone through and also it makes it easier for everyone to come up with a good generic version, when the non-generic one is already there outlining the goal, I find it much easier to work with than half working generic solutions where the intent is unclear.
Basically the non-generic version serves as the specification for the expected output, when the generic parameters are applied.

That said, I think your idea of writing the union manually and then adding the result of makeJsonParse to it looks good to me, it would basically turn @Travis’s example into the generic equivalent.

4 Likes

Ok, I have a working solution now, thank you all for your help!
The functions look like this:

fn makeJsonParse(comptime T: type) fn (std.mem.Allocator, *std.json.Scanner, std.json.ParseOptions) std.json.ParseError(std.json.Scanner)!T {
    return struct {
        fn jsonParse(allocator: std.mem.Allocator, source: *std.json.Scanner, options: std.json.ParseOptions) std.json.ParseError(std.json.Scanner)!T {
            const parsed = try std.json.innerParse(std.json.Value, allocator, source, options);

            if (parsed != .object) {
                return error.UnexpectedToken;
            }
            return T.jsonParseFromValue(allocator, parsed, options);
        }
    }.jsonParse;
}

fn makeJsonParseFromValue(comptime T: type, comptime discriminator: []const u8) fn (std.mem.Allocator, std.json.Value, std.json.ParseOptions) std.json.ParseFromValueError!T {
    switch (@typeInfo(T)) {
        .Union => {},
        else => @compileError("Type must be a union"),
    }
    return struct {
        pub fn jsonParseFromValue(allocator: std.mem.Allocator, source: std.json.Value, options: std.json.ParseOptions) std.json.ParseFromValueError!T {
            if (source.object.get(discriminator)) |t| {
                var opts = options;
                opts.ignore_unknown_fields = true;
                inline for (@typeInfo(T).Union.fields) |u_field| {
                    if (std.mem.eql(u8, t.string, u_field.name)) {
                        const p = try std.json.parseFromValue(u_field.type, allocator, source, opts);
                        const ret = @unionInit(T, u_field.name, p.value);
                        return ret;
                    }
                }
            }
            return error.MissingField;
        }
    }.jsonParseFromValue;
}

// Example usage
const Field = union(enum) {
    boolean: Boolean,
    integer: Integer,

    pub const jsonParse = makeJsonParse(@This());
    pub const jsonParseFromValue = makeJsonParseFromValue(@This(), "type");
};

const Boolean = struct {
    pub const @"type": []const u8 = "boolean";
    description: ?[]const u8 = null,
    default: ?bool = null,
    @"const": ?bool = null,
};

const Integer = struct {
    pub const @"type": []const u8 = "integer";
    description: ?[]const u8 = null,
    minimum: ?i64 = null,
    maximum: ?i64 = null,
    @"enum": ?[]i64 = null,
    default: ?i64 = null,
    @"const": ?i64 = null,
};

Which is more or less a generic version of @Travis’s solution. I learned a lot about how Zig works!

I think possible improvements would be to add a compile time check to make sure the union field types actually have the field as a declaration but I’ll leave that for later. If anybody has any other suggestions please tell me.

2 Likes