A few days ago, I wrote a tool for checking whether duck types satisfy contracts, which is similar in functionality to C++ 's concept/requries. I have attempted to apply this mechanism to Zig and have achieved certain results. Currently, I have written two predefined traits for vftrait, but they have not yet been merged into the main branch.
I have implemented a static allocator interface, and in addition to the definition of the allocator interface itself, the packaging implementation of the allocator directly reuses the allocator code from the Zig standard library. I want to compare the efficiency difference between the two, so I wrote the following code:
const vftrait = @import("vftrait");
const std = @import("std");
fn static(io: std.Io, comptime times: usize, cost: *i96) void {
const start = std.Io.Clock.now(.awake, io);
{
var buffer: [64]u8 = undefined;
var allocator: vftrait.traits.Allocator(std.heap.FixedBufferAllocator) = .from(.init(&buffer));
for (0..times) |_| {
const p = allocator.create(u32) catch unreachable;
defer allocator.destroy(p);
p.* = 0;
const s = allocator.alloc(u32, 10) catch unreachable;
defer allocator.free(s);
for (s) |*e| e.* = 0;
}
}
const end = std.Io.Clock.now(.awake, io);
const ns = start.durationTo(end).toNanoseconds();
cost.* = ns;
}
fn dynamic(io: std.Io, comptime times: usize, cost: *i96) void {
const start = std.Io.Clock.now(.awake, io);
{
var buffer: [64]u8 = undefined;
var gpa: std.heap.FixedBufferAllocator = .init(&buffer);
const allocator = gpa.allocator();
for (0..times) |_| {
const p = allocator.create(u32) catch unreachable;
defer allocator.destroy(p);
p.* = 0;
const s = allocator.alloc(u32, 10) catch unreachable;
defer allocator.free(s);
for (s) |*e| e.* = 0;
}
}
const end = std.Io.Clock.now(.awake, io);
const ns = start.durationTo(end).toNanoseconds();
cost.* = ns;
}
pub fn main(init: std.process.Init) !void {
@setEvalBranchQuota(2000);
const io = init.io;
const times: usize = 2000;
const count: usize = 50;
var static_cost: [count]i96 = undefined;
var dynamic_cost: [count]i96 = undefined;
for (0..count) |i| {
static(io, times, &static_cost[i]);
dynamic(io, times, &dynamic_cost[i]);
}
for (0..count) |i| {
std.debug.print(
"static_cost[{}] = {} \t dynamic_cost[{}] = {} \t diff = {}\n",
.{ i, static_cost[i], i, dynamic_cost[i], dynamic_cost[i] - static_cost[i] },
);
}
}
In Debug mode, I received the following output:
static_cost[0] = 203400 dynamic_cost[0] = 260600 diff = 57200
static_cost[1] = 198500 dynamic_cost[1] = 304300 diff = 105800
static_cost[2] = 197200 dynamic_cost[2] = 239200 diff = 42000
static_cost[3] = 196000 dynamic_cost[3] = 239300 diff = 43300
static_cost[4] = 195500 dynamic_cost[4] = 239100 diff = 43600
static_cost[5] = 195100 dynamic_cost[5] = 244200 diff = 49100
static_cost[6] = 195900 dynamic_cost[6] = 238900 diff = 43000
static_cost[7] = 211200 dynamic_cost[7] = 240000 diff = 28800
static_cost[8] = 193700 dynamic_cost[8] = 238700 diff = 45000
static_cost[9] = 193500 dynamic_cost[9] = 239000 diff = 45500
static_cost[10] = 199800 dynamic_cost[10] = 241600 diff = 41800
static_cost[11] = 196300 dynamic_cost[11] = 244700 diff = 48400
static_cost[12] = 204300 dynamic_cost[12] = 363700 diff = 159400
static_cost[13] = 292500 dynamic_cost[13] = 341400 diff = 48900
static_cost[14] = 250100 dynamic_cost[14] = 239500 diff = -10600
static_cost[15] = 195000 dynamic_cost[15] = 323500 diff = 128500
static_cost[16] = 194100 dynamic_cost[16] = 238700 diff = 44600
static_cost[17] = 206600 dynamic_cost[17] = 244400 diff = 37800
static_cost[18] = 193100 dynamic_cost[18] = 242600 diff = 49500
static_cost[19] = 200800 dynamic_cost[19] = 244100 diff = 43300
static_cost[20] = 195000 dynamic_cost[20] = 241200 diff = 46200
static_cost[21] = 195400 dynamic_cost[21] = 238400 diff = 43000
static_cost[22] = 202800 dynamic_cost[22] = 239100 diff = 36300
static_cost[23] = 193500 dynamic_cost[23] = 239100 diff = 45600
static_cost[24] = 197800 dynamic_cost[24] = 239800 diff = 42000
static_cost[25] = 197500 dynamic_cost[25] = 239400 diff = 41900
static_cost[26] = 197100 dynamic_cost[26] = 239700 diff = 42600
static_cost[27] = 193600 dynamic_cost[27] = 239200 diff = 45600
static_cost[28] = 218100 dynamic_cost[28] = 242300 diff = 24200
static_cost[29] = 199100 dynamic_cost[29] = 238800 diff = 39700
static_cost[30] = 199700 dynamic_cost[30] = 238400 diff = 38700
static_cost[31] = 201700 dynamic_cost[31] = 238500 diff = 36800
static_cost[32] = 198800 dynamic_cost[32] = 246500 diff = 47700
static_cost[33] = 198900 dynamic_cost[33] = 238800 diff = 39900
static_cost[34] = 201500 dynamic_cost[34] = 239300 diff = 37800
static_cost[35] = 194000 dynamic_cost[35] = 238700 diff = 44700
static_cost[36] = 195300 dynamic_cost[36] = 238600 diff = 43300
static_cost[37] = 198200 dynamic_cost[37] = 238800 diff = 40600
static_cost[38] = 194200 dynamic_cost[38] = 238500 diff = 44300
static_cost[39] = 222300 dynamic_cost[39] = 245300 diff = 23000
static_cost[40] = 194300 dynamic_cost[40] = 238200 diff = 43900
static_cost[41] = 198100 dynamic_cost[41] = 239100 diff = 41000
static_cost[42] = 193200 dynamic_cost[42] = 238500 diff = 45300
static_cost[43] = 196800 dynamic_cost[43] = 238700 diff = 41900
static_cost[44] = 197800 dynamic_cost[44] = 238900 diff = 41100
static_cost[45] = 197900 dynamic_cost[45] = 243600 diff = 45700
static_cost[46] = 220900 dynamic_cost[46] = 239800 diff = 18900
static_cost[47] = 195900 dynamic_cost[47] = 238600 diff = 42700
static_cost[48] = 196900 dynamic_cost[48] = 243100 diff = 46200
static_cost[49] = 193600 dynamic_cost[49] = 242700 diff = 49100
It’s similar in ReleaseFast mode, but the difference is more stable than in Debug mode:
static_cost[0] = 5900 dynamic_cost[0] = 10500 diff = 4600
static_cost[1] = 5900 dynamic_cost[1] = 10400 diff = 4500
static_cost[2] = 5800 dynamic_cost[2] = 10400 diff = 4600
static_cost[3] = 5900 dynamic_cost[3] = 10500 diff = 4600
static_cost[4] = 5900 dynamic_cost[4] = 10400 diff = 4500
static_cost[5] = 5800 dynamic_cost[5] = 10500 diff = 4700
static_cost[6] = 5900 dynamic_cost[6] = 10500 diff = 4600
static_cost[7] = 5800 dynamic_cost[7] = 10400 diff = 4600
static_cost[8] = 5900 dynamic_cost[8] = 10500 diff = 4600
static_cost[9] = 5800 dynamic_cost[9] = 10400 diff = 4600
static_cost[10] = 5900 dynamic_cost[10] = 10500 diff = 4600
static_cost[11] = 5900 dynamic_cost[11] = 10500 diff = 4600
static_cost[12] = 5900 dynamic_cost[12] = 10400 diff = 4500
static_cost[13] = 5800 dynamic_cost[13] = 10500 diff = 4700
static_cost[14] = 5900 dynamic_cost[14] = 10500 diff = 4600
static_cost[15] = 5800 dynamic_cost[15] = 10400 diff = 4600
static_cost[16] = 5900 dynamic_cost[16] = 10500 diff = 4600
static_cost[17] = 5900 dynamic_cost[17] = 10500 diff = 4600
static_cost[18] = 5900 dynamic_cost[18] = 10400 diff = 4500
static_cost[19] = 5900 dynamic_cost[19] = 10500 diff = 4600
static_cost[20] = 5900 dynamic_cost[20] = 10400 diff = 4500
static_cost[21] = 5800 dynamic_cost[21] = 10500 diff = 4700
static_cost[22] = 5900 dynamic_cost[22] = 10400 diff = 4500
static_cost[23] = 5800 dynamic_cost[23] = 10500 diff = 4700
static_cost[24] = 5900 dynamic_cost[24] = 10400 diff = 4500
static_cost[25] = 5900 dynamic_cost[25] = 10500 diff = 4600
static_cost[26] = 5900 dynamic_cost[26] = 10400 diff = 4500
static_cost[27] = 5800 dynamic_cost[27] = 10400 diff = 4600
static_cost[28] = 5900 dynamic_cost[28] = 10500 diff = 4600
static_cost[29] = 5900 dynamic_cost[29] = 10400 diff = 4500
static_cost[30] = 5800 dynamic_cost[30] = 10500 diff = 4700
static_cost[31] = 5900 dynamic_cost[31] = 10400 diff = 4500
static_cost[32] = 5800 dynamic_cost[32] = 10500 diff = 4700
static_cost[33] = 5900 dynamic_cost[33] = 10500 diff = 4600
static_cost[34] = 5800 dynamic_cost[34] = 10400 diff = 4600
static_cost[35] = 5900 dynamic_cost[35] = 10500 diff = 4600
static_cost[36] = 5800 dynamic_cost[36] = 10400 diff = 4600
static_cost[37] = 5900 dynamic_cost[37] = 10500 diff = 4600
static_cost[38] = 5900 dynamic_cost[38] = 10400 diff = 4500
static_cost[39] = 5800 dynamic_cost[39] = 10400 diff = 4600
static_cost[40] = 5900 dynamic_cost[40] = 10400 diff = 4500
static_cost[41] = 5800 dynamic_cost[41] = 10400 diff = 4600
static_cost[42] = 5900 dynamic_cost[42] = 10500 diff = 4600
static_cost[43] = 5900 dynamic_cost[43] = 10400 diff = 4500
static_cost[44] = 5900 dynamic_cost[44] = 10500 diff = 4600
static_cost[45] = 5900 dynamic_cost[45] = 10400 diff = 4500
static_cost[46] = 5900 dynamic_cost[46] = 10500 diff = 4600
static_cost[47] = 5900 dynamic_cost[47] = 10400 diff = 4500
static_cost[48] = 5900 dynamic_cost[48] = 10500 diff = 4600
static_cost[49] = 5900 dynamic_cost[49] = 10400 diff = 4500
The performance of the allocator interface for dynamic dispatch is lower than that for static dispatch. This is not surprising, I speculate that the reason is that dynamically dispatched virtual functions cannot inline the interface implementation to the call point like static dispatch, thus introducing additional overhead.
I wonder if Zig needs to introduce some static distribution facilities into language or standard libraries?