Exporting data to the build system

Ive had the idea for a while to be able to programmatically export data from source code to the build system to be used by tools that are less limiting in functionality and performance than comptime.

I think it would be quite useful for:

  • validating database queries with a test db
  • updating snapshot tests
  • anything that requires more performance and/or knowledge about a codebase than is practical with build options or hardcoded assumptions

It does get around comptimes sandboxedness, but i think it would require enough wiring in build.zig to use to prevent any shenanigans, maybe going so far as to artifitialy restrict the api to further that point.

1 Like

Does this need to be a language feature? Retrieving data from source files is very doable with the build system as-is.

For example, Polystate needs the comptime-known EnterFsmState at build time in order to generate a transition graph. It does this by simply compiling a code snippet that accesses EnterFsmState:

pub fn addGraphFile(
    b: *std.Build,
    module_name: []const u8,
    module: *std.Build.Module,
    graph_mode: GraphMode,
    polystate: *std.Build.Module,
    target: std.Build.ResolvedTarget,
) std.Build.LazyPath {
    const options = b.addOptions();
    const writer = options.contents.writer();
    writer.print(
        \\const std = @import("std");
        \\const ps = @import("polystate");
        \\const Target = @import("{s}");
        \\pub fn main() !void {{
        \\  var gpa_instance = std.heap.GeneralPurposeAllocator(.{{}}){{}};
        \\  const gpa = gpa_instance.allocator();
        \\  var graph = try ps.Graph.initWithFsm(gpa, Target.EnterFsmState);
        \\  defer graph.deinit();
        \\  const writer = std.io.getStdOut().writer();
        \\  try graph.{s}(writer);
        \\}}
    , .{ module_name, switch (graph_mode) {
        .graphviz => "generateDot",
        .mermaid => "generateMermaid",
        .json => "generateJson",
    } }) catch @panic("OOM");

    const opt_mod = b.createModule(.{
        .root_source_file = options.getOutput(),
        .target = target,
        .imports = &.{
            .{ .name = "polystate", .module = polystate },
            .{ .name = b.allocator.dupe(u8, module_name) catch @panic("OOM"), .module = module },
        },
    });

    const gen_exe_name = std.mem.concat(b.allocator, u8, &.{ "_generate_graph_for_", module_name }) catch @panic("OOM");
    const opt_exe = b.addExecutable(.{
        .name = gen_exe_name,
        .root_module = opt_mod,
    });
    const run = b.addRunArtifact(opt_exe);
    return run.captureStdOut();
}

Now, if you wanted something with a more tightly integrated back-and-forth, where the compilation would be suspended while the build system acted on the exported data, I could see why a language feature may be necessary:

const std = @import("std");

const num1 = 2 + 2;
const num2 = 3 + 3;

// Resolve num1 and num2 and give them to the build.zig,
// suspend compilation of anything depending on num3 while 
// the build.zig runs numberComputer, then resume compilation.
const num3 = @buildFn(.numberComputer, .{ num1, num2 });

pub fn main() void {
    std.debug.print("{}\n", .{num3});
} 

I’ve thought about a builtin like this before, but I feel that it could make things a little too easy. You don’t want everyone using opaque build system code for everything just because they couldn’t be bothered to get it to work in comptime.

Plus, it’s not even that hard to implement this kind of “inline build system” logic manually with @embedFile:

src/main.zig:

const std = @import("std");

pub const num1 = 2 + 2;
pub const num2 = 3 + 3;

const num3 = @embedFile("num3");

pub fn main() void {
    std.debug.print("{s}\n", .{num3});
} 

build.zig:

const std = @import("std");

pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

    const exe = b.addExecutable(.{
        .name = "demo",
        .root_module = b.createModule(.{
            .root_source_file = b.path("src/main.zig"),
            .target = target,
            .optimize = optimize,
            .imports = &.{
                .{
                    .name = "num3",
                    .module = b.createModule(.{
                        .root_source_file = addNumberComputerResultFile(
                            b,
                            b.createModule(.{
                                .root_source_file = b.path("src/main.zig"),
                                .target = target,
                                .optimize = optimize,
                            }),
                            target,
                        ),
                    }),
                },
            },
        }),
    });

    b.installArtifact(exe);

    const run_cmd = b.addRunArtifact(exe);

    run_cmd.step.dependOn(b.getInstallStep());

    if (b.args) |args| {
        run_cmd.addArgs(args);
    }

    const run_step = b.step("run", "Run the app");
    run_step.dependOn(&run_cmd.step);
}

fn addNumberComputerResultFile(
    b: *std.Build,
    module: *std.Build.Module,
    target: std.Build.ResolvedTarget,
) std.Build.LazyPath {
    const options = b.addOptions();

    options.contents.writer().writeAll(
        \\const std = @import("std");
        \\const mod = @import("mod");
        \\
        \\const num1 = mod.num1;
        \\const num2 = mod.num2;
        \\
        \\pub fn main() !void {{
        \\    try std.io.getStdOut().writer().print("{}", .{num1 + num2});
        \\}}
    ) catch @panic("OOM");

    const opt_exe = b.addExecutable(.{
        .name = "number_computer",
        .root_module = b.createModule(.{
            .root_source_file = options.getOutput(),
            .target = target,
            .imports = &.{
                .{ .name = "mod", .module = module },
            },
        }),
    });

    const run = b.addRunArtifact(opt_exe);
    return run.captureStdOut();
}

You could go even further with this if you wanted, creating a general purpose abstraction for build-provided functions:

src/main.zig:

const std = @import("std");

pub const res1_num1 = 2 + 2;
pub const res1_num2 = 3 + 3;

pub const res2_num1: f64 = 100;
pub const res2_num2: f64 = 23.4;

pub const res1_num3_build_result: BuildResult = .buildFn("numberComputer", &.{ .res1_num1, .res1_num2 });
const res1_num3 = res1_num3_build_result.payload.data;

pub const res2_num3_build_result: BuildResult = .buildFn("numberComputer", &.{ .res2_num1, .res2_num2 });
const res2_num3 = res2_num3_build_result.payload.data;

pub fn main() void {
    std.debug.print("{s}\n", .{res1_num3});
    std.debug.print("{s}\n", .{res2_num3});
}

pub const BuildResult = BuildResultGeneric(@This());

pub fn BuildResultGeneric(comptime Container: type) type {
    return struct {
        fn_name: []const u8,
        arg_names: []const []const u8,
        identifier: []const u8,
        payload: Payload,

        const Self = @This();

        pub const Payload = union(enum) {
            data: []const u8,
            uninitialized,
        };

        const build_result_map: std.StaticStringMap([]const u8) = .initComptime(blk: {
            @setEvalBranchQuota(2000000);

            var pairs: []const struct { []const u8, []const u8 } = &.{};
            var idx: usize = 0;

            while (idx < build_results.len) {
                var pair: struct { []const u8, []const u8 } = undefined;

                for (&pair) |*string| {
                    const string_len = std.mem.readInt(u64, build_results[idx..][0..8], .little);
                    idx += 8;

                    string.* = build_results[idx..][0..string_len];
                    idx += string_len;
                }

                pairs = pairs ++ &[_]struct { []const u8, []const u8 }{pair};
            }

            break :blk pairs;
        });

        const build_results = @embedFile("build_results");

        pub fn buildFn(comptime fn_name: []const u8, comptime args: []const std.meta.DeclEnum(Container)) Self {
            comptime {
                @setEvalBranchQuota(2000000);
                var res: Self = undefined;
                res.fn_name = fn_name;
                res.arg_names = argNames(args);
                res.identifier = makeIdentifier(fn_name, res.arg_names);
                res.payload = if (build_result_map.get(res.identifier)) |data| .{ .data = data } else .uninitialized;
                return res;
            }
        }

        fn argNames(comptime args: []const std.meta.DeclEnum(Container)) []const []const u8 {
            comptime {
                var res: [args.len][]const u8 = undefined;

                for (&res, args) |*string, arg| {
                    string.* = @tagName(arg);
                }

                const res_const = res;
                return &res_const;
            }
        }

        fn makeIdentifier(comptime fn_name: []const u8, comptime arg_names: []const []const u8) []const u8 {
            comptime {
                var res: []const u8 = doubleZeros(@typeName(Container)) ++ &[_]u8{0} ++ doubleZeros(fn_name);

                for (arg_names) |arg_name| {
                    res = res ++ &[_]u8{0} ++ doubleZeros(arg_name);
                }

                return res;
            }
        }

        fn doubleZeros(comptime string: []const u8) []const u8 {
            comptime {
                const res_size = std.mem.replacementSize(u8, string, &.{0}, &.{ 0, 0 });
                var res: [res_size]u8 = undefined;
                _ = std.mem.replace(u8, string, &.{0}, &.{ 0, 0 }, &res);

                const res_const = res;
                return &res_const;
            }
        }
    };
}

build.zig:

const std = @import("std");

pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

    const exe = b.addExecutable(.{
        .name = "demo",
        .root_module = b.createModule(.{
            .root_source_file = b.path("src/main.zig"),
            .target = target,
            .optimize = optimize,
            .imports = &.{
                .{
                    .name = "build_results",
                    .module = b.createModule(.{
                        .root_source_file = addFnRunnerResultFile(
                            b,
                            b.createModule(.{
                                .root_source_file = b.path("src/main.zig"),
                                .target = target,
                                .optimize = optimize,
                                .imports = &.{
                                    .{
                                        .name = "build_results",
                                        .module = b.addOptions().createModule(),
                                    },
                                },
                            }),
                            target,
                        ),
                    }),
                },
            },
        }),
    });

    b.installArtifact(exe);

    const run_cmd = b.addRunArtifact(exe);

    run_cmd.step.dependOn(b.getInstallStep());

    if (b.args) |args| {
        run_cmd.addArgs(args);
    }

    const run_step = b.step("run", "Run the app");
    run_step.dependOn(&run_cmd.step);
}

fn addFnRunnerResultFile(
    b: *std.Build,
    module: *std.Build.Module,
    target: std.Build.ResolvedTarget,
) std.Build.LazyPath {
    const options = b.addOptions();

    options.contents.writer().writeAll(
        \\const std = @import("std");
        \\const mod = @import("mod");
        \\pub fn main() !void {
        \\    const mod_decls = comptime std.meta.declarations(mod);
        \\
        \\    const writer = std.io.getStdOut().writer();
        \\
        \\    inline for (mod_decls) |decl_info| {
        \\        if (@TypeOf(@field(mod, decl_info.name)) == mod.BuildResult) {
        \\            const build_result: mod.BuildResult = @field(mod, decl_info.name);
        \\            if (build_result.payload == .uninitialized) {
        \\                try runFn(writer, build_result);
        \\            }
        \\        }
        \\    }
        \\}
        \\
        \\fn runFn(writer: anytype, comptime build_result: mod.BuildResult) !void {
        \\    const func = @field(fns, build_result.fn_name);
        \\
        \\    try writer.writeInt(u64, build_result.identifier.len, .little);
        \\    try writer.writeAll(build_result.identifier);
        \\
        \\    if (func == fns.numberComputer) {
        \\        var buf: [1024]u8 = undefined;
        \\
        \\        const number_computer_res = try fns.numberComputer(@field(mod, build_result.arg_names[0]), @field(mod, build_result.arg_names[1]), &buf);
        \\
        \\        try writer.writeInt(u64, number_computer_res.len, .little);
        \\        try writer.writeAll(number_computer_res);
        \\    } else {
        \\        @compileError("function '" ++ build_result.fn_name ++ "' not implemented");
        \\    }
        \\}
        \\
        \\const fns = struct {
        \\    pub fn numberComputer(a: anytype, b: anytype, buf: []u8) ![]const u8 {
        \\        return try std.fmt.bufPrint(buf, "{d}", .{a + b});
        \\    }
        \\};
    ) catch @panic("OOM");

    const opt_exe = b.addExecutable(.{
        .name = "fn_runner",
        .root_module = b.createModule(.{
            .root_source_file = options.getOutput(),
            .target = target,
            .imports = &.{
                .{ .name = "mod", .module = module },
            },
        }),
    });

    const run = b.addRunArtifact(opt_exe);
    return run.captureStdOut();
}