Slow compile time

Hi,

I have a simple zig file for generating chess moves. I have pasted it below, but it is not that interesting.

const std = @import("std");

pub fn main() !void {
    const attacks = Attacks{};
    _ = attacks;
}

const chess = struct {
    const Piece = enum { P, N, B, R, Q, K, p, n, b, r, q, k, none };
    pub const Color = enum(u1) { white, black };
    const Square = enum(u6) {
        // zig fmt: off
    a8, b8, c8, d8, e8, f8, g8, h8,
    a7, b7, c7, d7, e7, f7, g7, h7,
    a6, b6, c6, d6, e6, f6, g6, h6,
    a5, b5, c5, d5, e5, f5, g5, h5,
    a4, b4, c4, d4, e4, f4, g4, h4,
    a3, b3, c3, d3, e3, f3, g3, h3,
    a2, b2, c2, d2, e2, f2, g2, h2,
    a1, b1, c1, d1, e1, f1, g1, h1,
    // zig fmt: on
    };
};

const Attacks = struct {
    // leaper pieces
    pawn_attacks: [2][64]u64 = genPawnAttacks(),
    king_attacks: [64]u64 = genKingAttacks(),
    knight_attacks: [64]u64 = genKnightAttacks(),
    bishop_masks: [64]u64 = genBishopMasks(),
    rook_masks: [64]u64 = genRookMasks(),
    bishop_attacks: [64][512]u64 = genBishopAttacks(),
    rook_attacks: [64][4096]u64 = genRookAttacks(),

    // rider pieces
    const bishop_magic: [64]u64 = .{
        0x0002020202020200, 0x0002020202020000, 0x0004010202000000, 0x0004040080000000,
        0x0001104000000000, 0x0000821040000000, 0x0000410410400000, 0x0000104104104000,
        0x0000040404040400, 0x0000020202020200, 0x0000040102020000, 0x0000040400800000,
        0x0000011040000000, 0x0000008210400000, 0x0000004104104000, 0x0000002082082000,
        0x0004000808080800, 0x0002000404040400, 0x0001000202020200, 0x0000800802004000,
        0x0000800400A00000, 0x0000200100884000, 0x0000400082082000, 0x0000200041041000,
        0x0002080010101000, 0x0001040008080800, 0x0000208004010400, 0x0000404004010200,
        0x0000840000802000, 0x0000404002011000, 0x0000808001041000, 0x0000404000820800,
        0x0001041000202000, 0x0000820800101000, 0x0000104400080800, 0x0000020080080080,
        0x0000404040040100, 0x0000808100020100, 0x0001010100020800, 0x0000808080010400,
        0x0000820820004000, 0x0000410410002000, 0x0000082088001000, 0x0000002011000800,
        0x0000080100400400, 0x0001010101000200, 0x0002020202000400, 0x0001010101000200,
        0x0000410410400000, 0x0000208208200000, 0x0000002084100000, 0x0000000020880000,
        0x0000001002020000, 0x0000040408020000, 0x0004040404040000, 0x0002020202020000,
        0x0000104104104000, 0x0000002082082000, 0x0000000020841000, 0x0000000000208800,
        0x0000000010020200, 0x0000000404080200, 0x0000040404040400, 0x0002020202020200,
    };

    const rook_magic: [64]u64 = .{
        0x0080001020400080, 0x0040001000200040, 0x0080081000200080, 0x0080040800100080,
        0x0080020400080080, 0x0080010200040080, 0x0080008001000200, 0x0080002040800100,
        0x0000800020400080, 0x0000400020005000, 0x0000801000200080, 0x0000800800100080,
        0x0000800400080080, 0x0000800200040080, 0x0000800100020080, 0x0000800040800100,
        0x0000208000400080, 0x0000404000201000, 0x0000808010002000, 0x0000808008001000,
        0x0000808004000800, 0x0000808002000400, 0x0000010100020004, 0x0000020000408104,
        0x0000208080004000, 0x0000200040005000, 0x0000100080200080, 0x0000080080100080,
        0x0000040080080080, 0x0000020080040080, 0x0000010080800200, 0x0000800080004100,
        0x0000204000800080, 0x0000200040401000, 0x0000100080802000, 0x0000080080801000,
        0x0000040080800800, 0x0000020080800400, 0x0000020001010004, 0x0000800040800100,
        0x0000204000808000, 0x0000200040008080, 0x0000100020008080, 0x0000080010008080,
        0x0000040008008080, 0x0000020004008080, 0x0000010002008080, 0x0000004081020004,
        0x0000204000800080, 0x0000200040008080, 0x0000100020008080, 0x0000080010008080,
        0x0000040008008080, 0x0000020004008080, 0x0000800100020080, 0x0000800041000080,
        0x00FFFCDDFCED714A, 0x007FFCDDFCED714A, 0x003FFFCDFFD88096, 0x0000040810002101,
        0x0001000204080011, 0x0001000204000801, 0x0001000082000401, 0x0001FFFAABFAD1A2,
    };

    const bishop_relevant_bits = genRelevantOccupancy(.B);
    const rook_relevant_bits = genRelevantOccupancy(.R);

    fn genBishopAttacks() [64][512]u64 {
        var ret: [64][512]u64 = undefined;

        for (std.enums.values(chess.Square)) |square| {
            const square_int = @enumToInt(square);
            const attack_mask = genSteps(square, .B);

            // init relevant occupancy bit count
            const relevant_bits = @popCount(attack_mask);

            const occupancy_idx = @as(u64, 1) << @intCast(u6, relevant_bits);

            var idx: usize = 0;
            while (idx < occupancy_idx) : (idx += 1) {
                const occupancy = setOccupancy(idx, relevant_bits, attack_mask);
                const res = @mulWithOverflow(occupancy, bishop_magic[square_int])[0];
                const magic_index = res >>
                    @intCast(u6, @as(u7, 64) - bishop_relevant_bits[square_int]);

                ret[square_int][magic_index] =
                    attackWithBlocker(occupancy, square, .B);
            }
        }

        return ret;
    }

    fn genRookAttacks() [64][4096]u64 {
        var ret: [64][4096]u64 = undefined;

        for (std.enums.values(chess.Square)) |square| {
            const square_int = @enumToInt(square);
            const attack_mask = genSteps(square, .R);

            // init relevant occupancy bit count
            const relevant_bits = @popCount(attack_mask);

            const occupancy_idx = @as(u64, 1) << @intCast(u6, relevant_bits);

            var idx: usize = 0;
            while (idx < occupancy_idx) : (idx += 1) {
                const occupancy = setOccupancy(idx, relevant_bits, attack_mask);
                const res = @mulWithOverflow(occupancy, rook_magic[@enumToInt(square)])[0];
                const magic_index = res >>
                    @intCast(u6, @as(u7, 64) - rook_relevant_bits[square_int]);

                ret[square_int][magic_index] =
                    attackWithBlocker(occupancy, square, .R);
            }
        }

        return ret;
    }

    pub fn getBishopAttacks(self: *const @This(), square: u6, occupancy: u64) u64 {
        var mut_occupancy = occupancy;

        mut_occupancy &= self.bishop_masks[square];
        mut_occupancy = @mulWithOverflow(mut_occupancy, bishop_magic[square])[0];
        mut_occupancy >>= @intCast(u6, @as(u7, 64) - bishop_relevant_bits[square]);

        return self.bishop_attacks[square][mut_occupancy];
    }

    pub fn getRookAttacks(self: *const @This(), square: u6, occupancy: u64) u64 {
        var mut_occupancy = occupancy;

        mut_occupancy &= self.rook_masks[square];
        mut_occupancy = @mulWithOverflow(mut_occupancy, rook_magic[square])[0];
        mut_occupancy >>= @intCast(u6, @as(u7, 64) - rook_relevant_bits[square]);

        return self.rook_attacks[square][mut_occupancy];
    }

    pub fn getQueenAttacks(self: *const @This(), square: u6, occupancy: u64) u64 {
        return getBishopAttacks(self, square, occupancy) |
            getRookAttacks(self, square, occupancy);
    }

    pub fn rookXray(self: *const @This(), square: u6, occupancy: u64) u64 {
        const attacks = self.getRookAttacks(square, occupancy);
        return attacks ^ self.getRookAttacks(square, occupancy ^ (attacks & occupancy));
    }

    pub fn bishopXray(self: *const @This(), square: u6, occupancy: u64) u64 {
        const attacks = self.getBishopAttacks(square, occupancy);
        return attacks ^ self.getBishopAttacks(square, occupancy ^ (attacks & occupancy));
    }
};

fn genPawnAttacks() [@typeInfo(chess.Color).Enum.fields.len][@typeInfo(chess.Square).Enum.fields.len]u64 {
    var ret: [2][64]u64 = [_][64]u64{[_]u64{0} ** 64} ** 2;

    for (std.enums.values(chess.Square)) |square| {
        ret[@enumToInt(chess.Color.white)][@enumToInt(square)] = genSteps(square, .P);
        ret[@enumToInt(chess.Color.black)][@enumToInt(square)] = genSteps(square, .p);
    }

    return ret;
}

fn genKnightAttacks() [@typeInfo(chess.Square).Enum.fields.len]u64 {
    var ret: [64]u64 = [_]u64{0} ** 64;

    for (std.enums.values(chess.Square)) |square| {
        ret[@enumToInt(square)] = genSteps(square, .N);
    }

    return ret;
}

fn genKingAttacks() [@typeInfo(chess.Square).Enum.fields.len]u64 {
    var ret: [64]u64 = [_]u64{0} ** 64;

    for (std.enums.values(chess.Square)) |square| {
        ret[@enumToInt(square)] = genSteps(square, .K);
    }

    return ret;
}

fn genBishopMasks() [@typeInfo(chess.Square).Enum.fields.len]u64 {
    var ret: [64]u64 = [_]u64{0} ** 64;

    for (std.enums.values(chess.Square)) |square| {
        ret[@enumToInt(square)] = genSteps(square, .B);
    }

    return ret;
}

fn genRookMasks() [@typeInfo(chess.Square).Enum.fields.len]u64 {
    var ret: [64]u64 = [_]u64{0} ** 64;

    for (std.enums.values(chess.Square)) |square| {
        ret[@enumToInt(square)] = genSteps(square, .R);
    }

    return ret;
}

fn genSteps(square: chess.Square, piece: chess.Piece) u64 {
    const rank = @intCast(isize, @enumToInt(square) / 8);
    const file = @intCast(isize, @enumToInt(square) % 8);

    const rider = switch (piece) {
        .P, .p => false,
        .N, .n => false,
        .B, .b => true,
        .R, .r => true,
        .K, .k => false,
        .Q, .q, .none => @panic("unexpected piece"),
    };

    // TODO: I repeat some moves here to make zig happy, but ugly.
    const moves = switch (piece) {
        .P => [_][2]i3{ .{ -1, -1 }, .{ -1, 1 }, .{ -1, -1 }, .{ -1, 1 }, .{ -1, -1 }, .{ -1, 1 }, .{ -1, -1 }, .{ -1, 1 } },
        .p => [_][2]i3{ .{ 1, -1 }, .{ 1, 1 }, .{ 1, -1 }, .{ 1, 1 }, .{ 1, -1 }, .{ 1, 1 }, .{ 1, -1 }, .{ 1, 1 } },
        .N, .n => [_][2]i3{ .{ -2, -1 }, .{ -2, 1 }, .{ 2, -1 }, .{ 2, 1 }, .{ -1, -2 }, .{ 1, 2 }, .{ -1, 2 }, .{ 1, -2 } },
        .B, .b => [_][2]i3{ .{ -1, -1 }, .{ -1, 1 }, .{ 1, -1 }, .{ 1, 1 }, .{ -1, -1 }, .{ -1, 1 }, .{ 1, -1 }, .{ 1, 1 } },
        .K, .k => [_][2]i3{ .{ -1, -1 }, .{ -1, 1 }, .{ 1, -1 }, .{ 1, 1 }, .{ -1, 0 }, .{ 0, 1 }, .{ 1, 0 }, .{ 0, -1 } },
        .R, .r => [_][2]i3{ .{ -1, 0 }, .{ 1, 0 }, .{ 0, -1 }, .{ 0, 1 }, .{ -1, 0 }, .{ 1, 0 }, .{ 0, -1 }, .{ 0, 1 } },
        .Q, .q, .none => @panic("unexpected piece"),
    };

    var attacks: u64 = 0;

    @setEvalBranchQuota(10_000);

    var step: i5 = 1;
    while (step < 8 - 1) : (step += 1) {
        for (moves) |m| {
            const dr = rank + m[0] * step;
            const df = file + m[1] * step;

            switch (rider) {
                true => {
                    if (m[0] > 0 and dr > 6) continue;
                    if (m[0] < 0 and dr < 1) continue;
                    if (m[1] > 0 and df > 6) continue;
                    if (m[1] < 0 and df < 1) continue;
                },
                false => {
                    if (dr < 0 or dr > 7) continue;
                    if (df < 0 or df > 7) continue;
                },
            }

            const s = @intCast(u6, dr * 8 + df);

            attacks |= @as(u64, 1) << s;
        }
        if (!rider) break;
    }

    return attacks;
}

fn attackWithBlocker(blocks: u64, square: chess.Square, piece: chess.Piece) u64 {
    const rank = @intCast(isize, @enumToInt(square) / 8);
    const file = @intCast(isize, @enumToInt(square) % 8);

    const moves = switch (piece) {
        .B, .b => [_][2]i3{
            .{ -1, -1 },
            .{ -1, 1 },
            .{ 1, -1 },
            .{ 1, 1 },
        },
        .R, .r => [_][2]i3{
            .{ -1, 0 },
            .{ 1, 0 },
            .{ 0, -1 },
            .{ 0, 1 },
        },
        else => @panic("unexpected piece"),
    };

    var attacks: u64 = 0;

    @setEvalBranchQuota(10_000_000);

    for (moves) |m| {
        var step: i5 = 1;
        while (step < 8) : (step += 1) {
            const dr = rank + m[0] * step;
            const df = file + m[1] * step;

            if (m[0] > 0 and dr > 7) break;
            if (m[0] < 0 and dr < 0) break;
            if (m[1] > 0 and df > 7) break;
            if (m[1] < 0 and df < 0) break;

            const s = @intCast(u6, dr * 8 + df);
            const mask = @as(u64, 1) << s;

            attacks |= mask;

            // stop when we hit a blocker
            if (mask & blocks != 0) break;
        }
    }

    return attacks;
}

fn setOccupancy(index: usize, bits_in_mask: u64, attack_mask: u64) u64 {
    var occupancy: u64 = 0;

    var attack = attack_mask;

    var count: u6 = 0;
    while (count < bits_in_mask) : (count += 1) {
        const square = @intCast(u6, @ctz(attack));

        // pop square bit
        attack ^= @as(u64, 1) << square;

        if (index & @as(u64, 1) << count != 0) {
            occupancy |= @as(u64, 1) << square;
        }
    }

    return occupancy;
}

fn genRelevantOccupancy(piece: chess.Piece) [@typeInfo(chess.Square).Enum.fields.len]u4 {
    var ret: [64]u4 = [_]u4{0} ** 64;

    for (std.enums.values(chess.Square)) |square| {
        const count = @popCount(genSteps(square, piece));
        ret[@enumToInt(square)] = @intCast(u4, count);
    }

    return ret;
}

When I try to compile it on my rpi4 it takes 140 seconds. If I apply the following patch it drops down to 4 seconds.

--- main-comptime.zig
+++ main.zig
@@ -1,7 +1,7 @@
 const std = @import("std");

 pub fn main() !void {
-    const attacks = Attacks{};
+    const attacks = Attacks.init();
     _ = attacks;
 }

@@ -29,9 +29,10 @@
     knight_attacks: [64]u64 = genKnightAttacks(),
     bishop_masks: [64]u64 = genBishopMasks(),
     rook_masks: [64]u64 = genRookMasks(),
-    bishop_attacks: [64][512]u64 = genBishopAttacks(),
-    rook_attacks: [64][4096]u64 = genRookAttacks(),

+    var bishop_attacks: [64][512]u64 = undefined;
+    var rook_attacks: [64][4096]u64 = undefined;
+
     // rider pieces
     const bishop_magic: [64]u64 = .{
         0x0002020202020200, 0x0002020202020000, 0x0004010202000000, 0x0004040080000000,
@@ -74,6 +75,13 @@
     const bishop_relevant_bits = genRelevantOccupancy(.B);
     const rook_relevant_bits = genRelevantOccupancy(.R);

+    pub fn init() @This() {
+        var attacks: @This() = .{};
+        bishop_attacks = genBishopAttacks();
+        rook_attacks = genRookAttacks();
+        return attacks;
+    }
+
     fn genBishopAttacks() [64][512]u64 {
         var ret: [64][512]u64 = undefined;

@@ -135,7 +143,7 @@
         mut_occupancy = @mulWithOverflow(mut_occupancy, bishop_magic[square])[0];
         mut_occupancy >>= @intCast(u6, @as(u7, 64) - bishop_relevant_bits[square]);

-        return self.bishop_attacks[square][mut_occupancy];
+        return bishop_attacks[square][mut_occupancy];
     }

     pub fn getRookAttacks(self: *const @This(), square: u6, occupancy: u64) u64 {
@@ -145,7 +153,7 @@
         mut_occupancy = @mulWithOverflow(mut_occupancy, rook_magic[square])[0];
         mut_occupancy >>= @intCast(u6, @as(u7, 64) - rook_relevant_bits[square]);

-        return self.rook_attacks[square][mut_occupancy];
+        return rook_attacks[square][mut_occupancy];
     }

     pub fn getQueenAttacks(self: *const @This(), square: u6, occupancy: u64) u64 {

As You can see I just move the bishop and rook move precalculations from comptime to runtime. Can You tell me if this difference is expected? Can I do something about it besides the patch?

Is it possible with the new build system to create some kind of module from it and just link it?

Thanks,

As another data point, on my Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz using zig 0.11.0-dev.3739+939e4d81e, your code example takes 38s to compile. I’m not sure how much of this is accounted for by better hardware, but there was also a big branch merged recently that is expected to especially affect compilation speed for comptime code. Another optimization PR will be merged soon.

There are several planned enhancements to address this use case, all of which will independently help in different ways:

  • incremental compilation, which will cache comptime function call results such as this. Then the 136 seconds will only happen when the relevant code is edited.
  • better progress reporting and time reporting of compilation, which could provide visibility into why compile times are long in cases of larger codebases
  • improve the performance of compile-time code execution to be similar to CPython speed. This is achievable but will require some significant enhancements to several parts of the compiler pipeline.

Yes, this use case is now handled smoothly by the build system API. You can create files at build time, and then refer to them via a FileSource object, which can be given as a module to be imported.

This is probably the best workaround until some of these larger issues in Zig are addressed.

If you just want a separate compilation unit / static library for some code, that’s even easier.

1 Like

Hi,

I forgot to mention that the measurements were made with dev.3332, now I have updated to dev.3737 which contains the internPool changes, but not jacobly0’s optimizations, but it is even slower, around 200 seconds.

So these results are expected at this state of the compiler. I was not sure if I am doing something stupid or not. I will try to figure out the separate compilation.

Thanks,

That’s unfortunate, but is consistent with the results that we have seen while working on it. I expect jacobly0’s optimizations to cut that regression in half. For me, it reduced compilation time of your example from 38s to 32s.

The upshot of this is that it is a big step towards my first bullet point above - incremental compilation. And while it is an unfortunate regression, it does not compromise the strategy that I have in mind for the third bullet point.