Is it Ok to use std.heap.page_allocator directly?

squeek502 · December 28, 2023, 3:38am

There are a few ways to interpret your question, so here’s some code with a few different examples:

const std = @import("std");

pub fn main() !void {
    // Fill a normal ArrayList with page_size bytes.
    // This depends on the 'better capacity' calculations that the ArrayList
    // does to reduce the number of allocations when growing.
    std.debug.print("Fill a normal ArrayList with page_size bytes\n", .{});
    {
        var counting_allocator = CountingAllocator.init(std.heap.page_allocator);
        const allocator = counting_allocator.allocator();

        var array_list = std.ArrayList(u8).init(allocator);
        defer array_list.deinit();

        var last_capacity: usize = 0;
        for (0..std.mem.page_size) |i| {
            try array_list.append('a');
            if (array_list.capacity != last_capacity) {
                std.debug.print(" append #{}: capacity grew to {}\n", .{ i + 1, array_list.capacity });
            }
            last_capacity = array_list.capacity;
        }

        counting_allocator.summary();
    }

    // initCapacity with one page of data
    // This doesn't depend on the behavior of page_allocator at all, the allocator
    // could be swapped out for any other allocator and the behavior would be the same.
    std.debug.print("\ninitCapacity with one page of data\n", .{});
    {
        var counting_allocator = CountingAllocator.init(std.heap.page_allocator);
        const allocator = counting_allocator.allocator();

        // This will only allocate once as long as the array doesn't need to resize
        // at any point.
        var array_list = try std.ArrayList(u8).initCapacity(
            allocator,
            std.mem.page_size,
        );
        defer array_list.deinit();

        for (0..std.mem.page_size) |_| {
            try array_list.append('a');
        }
        // Any more append calls would require the ArrayList to resize
        // and allocate.

        counting_allocator.summary();
    }

    // Allocate one page and then use it as the buffer for an ArrayList
    // This doesn't depend on the behavior of page_allocator at all, the allocator
    // could be swapped out for any other allocator and the behavior would be the same.
    std.debug.print("\nAllocate one page and then use it as the buffer for an ArrayList\n", .{});
    {
        var counting_allocator = CountingAllocator.init(std.heap.page_allocator);
        const allocator = counting_allocator.allocator();

        const one_page = try allocator.alloc(u8, std.mem.page_size);
        defer allocator.free(one_page);

        // initBuffer is new as of https://github.com/ziglang/zig/pull/18067
        // Calling any function that takes an allocator is a footgun from
        // here on out.
        var array_list = std.ArrayListUnmanaged(u8).initBuffer(one_page);

        for (0..std.mem.page_size) |_| {
            array_list.appendAssumeCapacity('a');
        }
        // Any more appendAssumeCapacity calls would invoke illegal behavior:
        // panic: reached unreachable code
        // lib/std/array_list.zig:1056:19: in addOneAssumeCapacity
        //    assert(self.items.len < self.capacity);

        counting_allocator.summary();
    }
}

// modified version of https://github.com/nektro/gimme/blob/master/src/CountingAllocator.zig
const CountingAllocator = struct {
    child_allocator: std.mem.Allocator,
    total_bytes: u64 = 0,
    allocs: u64 = 0,
    attempted_resizes: u64 = 0,
    in_place_resizes: u64 = 0,

    pub fn init(child_allocator: std.mem.Allocator) CountingAllocator {
        return .{ .child_allocator = child_allocator };
    }

    pub fn allocator(self: *CountingAllocator) std.mem.Allocator {
        return .{
            .ptr = self,
            .vtable = &.{ .alloc = alloc, .resize = resize, .free = free },
        };
    }

    fn summary(self: *CountingAllocator) void {
        std.debug.print(" {} total bytes requested\n", .{self.total_bytes});
        std.debug.print(" alloc was called {} times\n", .{self.allocs});
        std.debug.print(" {} out of {} attempted resizes were able to resize in place\n", .{ self.in_place_resizes, self.attempted_resizes });
    }

    fn alloc(ctx: *anyopaque, len: usize, ptr_align: u8, ret_addr: usize) ?[*]u8 {
        var self: *CountingAllocator = @ptrCast(@alignCast(ctx));
        self.allocs += 1;
        const ptr = self.child_allocator.rawAlloc(len, ptr_align, ret_addr) orelse return null;
        self.total_bytes += len;
        return ptr;
    }

    fn resize(ctx: *anyopaque, buf: []u8, buf_align: u8, new_len: usize, ret_addr: usize) bool {
        var self: *CountingAllocator = @ptrCast(@alignCast(ctx));
        self.attempted_resizes += 1;
        const old_len = buf.len;
        const stable = self.child_allocator.rawResize(buf, buf_align, new_len, ret_addr);
        if (stable) {
            self.in_place_resizes += 1;
            if (new_len > old_len) {
                self.total_bytes += new_len;
                self.total_bytes -= old_len;
            } else {
                self.total_bytes -= old_len;
                self.total_bytes += new_len;
            }
        }
        return stable;
    }

    fn free(ctx: *anyopaque, buf: []u8, buf_align: u8, ret_addr: usize) void {
        var self: *CountingAllocator = @ptrCast(@alignCast(ctx));
        return self.child_allocator.rawFree(buf, buf_align, ret_addr);
    }
};

Running this on Linux gives:

Fill a normal ArrayList with page_size bytes
 append #1: capacity grew to 8
 append #9: capacity grew to 20
 append #21: capacity grew to 38
 append #39: capacity grew to 65
 append #66: capacity grew to 105
 append #106: capacity grew to 165
 append #166: capacity grew to 255
 append #256: capacity grew to 390
 append #391: capacity grew to 593
 append #594: capacity grew to 897
 append #898: capacity grew to 1353
 append #1354: capacity grew to 2037
 append #2038: capacity grew to 3063
 append #3064: capacity grew to 4602
 7665 total bytes requested
 alloc was called 2 times
 12 out of 13 attempted resizes were able to resize in place

initCapacity with one page of data
 4096 total bytes requested
 alloc was called 1 times
 0 out of 0 attempted resizes were able to resize in place

Allocate one page and then use it as the buffer for an ArrayList
 4096 total bytes requested
 alloc was called 1 times
 0 out of 0 attempted resizes were able to resize in place

You can see what @LucasSantos91 was talking about in action with the first example, since most growth of the ArrayList was able to be handled via successful resize instead of needing a new allocation.

If/when std.array_list: Add *NoResize functions to ArrayListUnmanaged by notcancername · Pull Request #18361 · ziglang/zig · GitHub is merged, then the NoResize functions can be used to make the third example nicer (they will return an error if the ArrayList needs to resize, whereas the AssumeCapacity functions will invoke illegal behavior).

Also of interest is the growth algorithm of ArrayList:

github.com

ziglang/zig/blob/ff17b11692f6a87b55470b7fc03babd382460886/lib/std/array_list.zig#L1152-L1161


      
          /// Called when memory growth is necessary. Returns a capacity larger than
          /// minimum that grows super-linearly.
          fn growCapacity(current: usize, minimum: usize) usize {
              var new = current;
              while (true) {
                  new +|= new / 2 + 8;
                  if (new >= minimum)
                      return new;
              }
          }

Note that it depends on both the current capacity and the requested capacity. Note also that this calculation is not done during initCapacity–that will always allocate exactly the requested size.