StringHashMap reach unreachable code

Hello alls,

I do not know if I found a bug in StringHashMap or if I’m not using it correctly. When inserting data, the hashmap reaches unreachable code unless I’m using map.ensureTotalCapacity(). I had the understanding ensureTotalCapacity was optional.

Edit: zig 0.14.1

I’m looking for your insight before posting a bug report. Thank you !

const std = @import("std");

test "string hash map" {
	var map = std.StringHashMap([]const u8).init(std.testing.allocator);
	defer map.deinit();
	var list = std.ArrayList(u8).init(std.testing.allocator);
	defer list.deinit();
	var beg: usize = 0;
	var end: usize = 0;
	var buf: []const u8 = undefined;
	const num: usize = 100;
    // try map.ensureTotalCapacity(num);
	for (0..num) |i| {
		beg = list.items.len;
		try list.writer().print("SomeKey:{d:0>4}", .{i});
		end = list.items.len;
		buf = list.items[beg..end];
		std.debug.print("buf: {s}\n", .{buf});
		try map.put(buf, buf);
	}
}
buf: SomeKey:0000
buf: SomeKey:0001
buf: SomeKey:0002
buf: SomeKey:0003
buf: SomeKey:0004
buf: SomeKey:0005
buf: SomeKey:0006
buf: SomeKey:0007
buf: SomeKey:0008
buf: SomeKey:0009
buf: SomeKey:0010
buf: SomeKey:0011
buf: SomeKey:0012
thread 6131 panic: reached unreachable code
/usr/lib/zig/std/debug.zig:550:14: 0x104b9fd in assert (test)
    if (!ok) unreachable; // assertion failure
             ^
/usr/lib/zig/std/hash_map.zig:872:19: 0x10b61c2 in putAssumeCapacityNoClobberContext (test)
            assert(!self.containsContext(key, ctx));
                  ^
/usr/lib/zig/std/hash_map.zig:1448:58: 0x10a460c in grow (test)
                    map.putAssumeCapacityNoClobberContext(k, v, ctx);
                                                         ^
/usr/lib/zig/std/hash_map.zig:1295:30: 0x108ce0a in growIfNeeded (test)
                try self.grow(allocator, capacityForSize(self.load() + new_count), ctx);
                             ^
/usr/lib/zig/std/hash_map.zig:1114:34: 0x106bf09 in getOrPutContextAdapted__anon_8244 (test)
                self.growIfNeeded(allocator, 1, ctx) catch |err| {
                                 ^
/usr/lib/zig/std/hash_map.zig:1099:56: 0x10508ed in getOrPutContext (test)
            const gop = try self.getOrPutContextAdapted(allocator, key, ctx, ctx);
                                                       ^
/usr/lib/zig/std/hash_map.zig:1025:52: 0x104e680 in putContext (test)
            const result = try self.getOrPutContext(allocator, key, ctx);
                                                   ^
/usr/lib/zig/std/hash_map.zig:322:45: 0x104b745 in put (test)
            return self.unmanaged.putContext(self.allocator, key, value, self.ctx);
                                            ^
/home/pj/doc/src/zig/kvlog/src/test.zig:18:14: 0x10494cb in test.string hash map (test)
  try map.put(buf, buf);
             ^
/usr/lib/zig/compiler/test_runner.zig:214:25: 0x10fa699 in mainTerminal (test)
        if (test_fn.func()) |_| {
                        ^
/usr/lib/zig/compiler/test_runner.zig:62:28: 0x10f47bd in main (test)
        return mainTerminal();
                           ^
/usr/lib/zig/std/start.zig:651:22: 0x10f3c32 in posixCallMainAndExit (test)
            root.main();
                     ^
/usr/lib/zig/std/start.zig:271:5: 0x10f380d in _start (test)
    asm volatile (switch (native_arch) {
    ^
???:?:?: 0x0 in ??? (???)
error: the following test command crashed:
/home/pj/doc/src/zig/kvlog/.zig-cache/o/a9570226793c87fc4528ecdde1425acf/test --seed=0x6dff189d

Well, with ensureTotalCapacity, it hits a segfault:

buf: SomeKey:0043
Segmentation fault at address 0x7f89fb5c00d8
/usr/lib/zig/std/mem.zig:745:79: 0x106e1e3 in eqlBytes (test)
            x |= @as(u32, @bitCast(a[n..][0..4].*)) ^ @as(u32, @bitCast(b[n..][0..4].*));
                                                                              ^
/usr/lib/zig/std/mem.zig:700:24: 0x1052c3d in eql__anon_6595 (test)
        return eqlBytes(sliceAsBytes(a), sliceAsBytes(b));
                       ^
/usr/lib/zig/std/hash_map.zig:85:19: 0x10b6a5c in eqlString (test)
    return mem.eql(u8, a, b);
                  ^
/usr/lib/zig/std/hash_map.zig:80:25: 0x10a539c in eql (test)
        return eqlString(a, b);
                        ^
/usr/lib/zig/std/hash_map.zig:1160:32: 0x108e549 in getOrPutAssumeCapacityAdapted__anon_11857 (test)
                    if (ctx.eql(key, test_key.*)) {
                               ^
/usr/lib/zig/std/hash_map.zig:1126:54: 0x106c8bb in getOrPutContextAdapted__anon_8281 (test)
            return self.getOrPutAssumeCapacityAdapted(key, key_ctx);
                                                     ^
/usr/lib/zig/std/hash_map.zig:1099:56: 0x1050bbd in getOrPutContext (test)
            const gop = try self.getOrPutContextAdapted(allocator, key, ctx, ctx);
                                                       ^
/usr/lib/zig/std/hash_map.zig:1025:52: 0x104e850 in putContext (test)
            const result = try self.getOrPutContext(allocator, key, ctx);
                                                   ^
/usr/lib/zig/std/hash_map.zig:322:45: 0x104b845 in put (test)
            return self.unmanaged.putContext(self.allocator, key, value, self.ctx);
                                            ^
/home/pj/doc/src/zig/kvlog/src/test.zig:22:14: 0x1049582 in test.string hash map (test)
  try map.put(buf, buf);
             ^
/usr/lib/zig/compiler/test_runner.zig:214:25: 0x10fa869 in mainTerminal (test)
        if (test_fn.func()) |_| {
                        ^
/usr/lib/zig/compiler/test_runner.zig:62:28: 0x10f498d in main (test)
        return mainTerminal();
                           ^
/usr/lib/zig/std/start.zig:651:22: 0x10f3e02 in posixCallMainAndExit (test)
            root.main();
                     ^
/usr/lib/zig/std/start.zig:271:5: 0x10f39dd in _start (test)
    asm volatile (switch (native_arch) {
    ^
???:?:?: 0x0 in ??? (???)
error: the following test command crashed:
/home/pj/doc/src/zig/kvlog/.zig-cache/o/a9570226793c87fc4528ecdde1425acf/test --seed=0xadf65f39

I don’t think the map is the problem, the problem is that your ArrayList(u8) will eventually run out of space and at that time it will be either resized in place or relocated, if it is relocated that will make all the slices that you have created previously in this line, invalid:

buf = list.items[beg..end];

Why? because those slices still point to the old memory of ArrayList(u8), which was freed, when it grew to a new size. Then because keys become garbage memory, that screws up the internal invariants of the hash map (that keys should remain valid unchanged memory).

Instead of the ArrayList you could use an arena and then call:

try std.fmt.allocPrint(arena.allocator(), "SomeKey:{d:0>4}", .{i});

That way you get stable memory addresses, because the arena allocates chunks of memory that remain valid (and aren’t moved) until the arena is cleared/deinit-ed.

const std = @import("std");

test "string hash map" {
    var map = std.StringHashMap([]const u8).init(std.testing.allocator);
    defer map.deinit();

    var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
    defer arena.deinit();

    const num: usize = 100;
    for (0..num) |i| {
        const slice = try std.fmt.allocPrint(arena.allocator(), "SomeKey:{d:0>4}", .{i});
        std.debug.print("slice: {s}\n", .{slice});
        try map.put(slice, slice);
    }
}
6 Likes

I think is is exactly what @Sze said if you modify the code to observe what is happening :

const std = @import("std");

pub fn main() !void {
    var gpa_instance: std.heap.GeneralPurposeAllocator(.{
        .verbose_log = true,
        .never_unmap = true,
        .retain_metadata = true,
    }) = .init;
    defer _ = gpa_instance.deinit();
    const gpa = gpa_instance.allocator();

    var map: std.StringHashMap([]const u8) = .init(gpa);
    defer map.deinit();

    var list: std.ArrayList(u8) = .init(gpa);
    defer list.deinit();

    var beg: usize = 0;
    var end: usize = 0;
    var buf: []const u8 = undefined;
    const num: usize = 100;

    for (0..num) |i| {
        beg = list.items.len;
        try list.writer().print("SomeKey:{d:0>4}", .{i});
        std.debug.print("ptr = {*}\n", .{list.items.ptr});
        end = list.items.len;
        buf = list.items[beg..end];
        std.debug.print("buf: {s}\n", .{buf});
        try map.put(buf, buf);
    }
}

And you run it :

pollivie@laptop:~/workspace/temp/foo/src$ zbr
info(gpa): small alloc 128 bytes at 0x7f8beec80000
ptr = u8@7f8beec80000
buf: SomeKey:0000
info(gpa): small alloc 288 bytes at 0x7f8beec60000
ptr = u8@7f8beec80000
buf: SomeKey:0001
ptr = u8@7f8beec80000
buf: SomeKey:0002
ptr = u8@7f8beec80000
buf: SomeKey:0003
ptr = u8@7f8beec80000
buf: SomeKey:0004
ptr = u8@7f8beec80000
buf: SomeKey:0005
ptr = u8@7f8beec80000
buf: SomeKey:0006
info(gpa): small alloc 552 bytes at 0x7f8beec40000
info(gpa): small free 288 bytes at u8@7f8beec60000
ptr = u8@7f8beec80000
buf: SomeKey:0007
ptr = u8@7f8beec80000
buf: SomeKey:0008
ptr = u8@7f8beec80000
buf: SomeKey:0009
info(gpa): small alloc 320 bytes at 0x7f8beec20000
info(gpa): small free 128 bytes at u8@7f8beec80000
ptr = u8@7f8beec20000
buf: SomeKey:0010
ptr = u8@7f8beec20000
buf: SomeKey:0011
ptr = u8@7f8beec20000
buf: SomeKey:0012
info(gpa): small alloc 1080 bytes at 0x7f8beec00000
thread 96428 panic: reached unreachable code
/home/pollivie/.local/share/zigup/0.14.1/files/lib/std/debug.zig:550:14: 0x104cb0d in assert (foo)
    if (!ok) unreachable; // assertion failure

What you can observe is that as soon as the ptr changes the code panics. The reason it wasn’t panicking with the first few “allocations” is simply because it was probably extending the actual size and not reallocating it entirely. But as soon as it changes it panics.

2 Likes

I think this is a good way to investigate the details, but if you use two different gpa’s one with .verbose_log and one without it and swap them arround, you can see that the small alloc 1080 bytes is caused by the HashMap, I think that makes sense, when the HashMap needs to resize, it also has to go through the old keys and recalculate their hash to place them at their new position in the new bigger hash table and at that point the old slices that point to garbage cause the segfault.

1 Like

Indeed, I thought my arraylist would be resized in place unless I call a specific function, such as orderedRemove.

Thank you so much !

PS: I like this one:

    var gpa_instance: std.heap.GeneralPurposeAllocator(.{
        .verbose_log = true,
        .never_unmap = true,
        .retain_metadata = true,
    }) = .init;

If you need some clarification here in the source code, when you look for how the arraylist (and basically almost all dynamic data structure in the std implement the “resize”)

pub fn ensureTotalCapacityPrecise(self: *Self, new_capacity: usize) Allocator.Error!void {
    if (@sizeOf(T) == 0) {
        self.capacity = math.maxInt(usize);
        return;
    }

    if (self.capacity >= new_capacity) return;

    // Here we avoid copying allocated but unused bytes by
    // attempting a resize in place, and falling back to allocating
    // a new buffer and doing our own copy. With a realloc() call,
    // the allocator implementation would pointlessly copy our
    // extra capacity.
    const old_memory = self.allocatedSlice();
    if (self.allocator.remap(old_memory, new_capacity)) |new_memory| {
        self.items.ptr = new_memory.ptr;
        self.capacity = new_memory.len;
    } else {
        const new_memory = try self.allocator.alignedAlloc(T, alignment, new_capacity);
        @memcpy(new_memory[0..self.items.len], self.items);
        self.allocator.free(old_memory);
        self.items.ptr = new_memory.ptr;
        self.capacity = new_memory.len;
    }
}

As you can see it first tries to make it grow in place, but if it fails it falls back to the classic alloc + copy

Yeah that’s why it succeeds the first few times, as soon as the arraylist can’t be resized in place, and changes it’s address, and the hashmap needs to also resize (and therefore as you mentioned recompute the hash) that’s when it fails.

1 Like

Thank you again !

All things aside, I hope you’re having fun at 42’s school. I’m living in Angoulême, where we host one of its campus, and I’m quite curious about it. :slight_smile:

1 Like

My pleasure, Yes a lot of fun if you need any information you can DM me :slight_smile: