Beginner StringHashMaps data gets corrupted

I have a struct “Context” that has a StringHashMap as one of its fields. This map is supposed to hold a rule name as key and the corresponding rule as its value. (Rule is another struct). When I put the data in the map it works initially, but the data is somehow corrupted (I am assuming this has to do with strings being slices, so the pointers are somehow getting corrupted but I’m not an expert since Zig is my initiation into memory management), so retrieving the values is not possible since the map damaged the keys somehow. I will post the relevant code, or you can view the project at GitHub - Clinton-Nmereole/Zoq: Miniature proof helper similar to Coq and inspired by Noq but written in Zig

pub const Expression = union(enum) {
    symbol: struct { str: []const u8 },
    function: struct { name: []const u8, args: []const Expression },
}

pub const Rule = struct {
    expression: Expression,
    equivalent: Expression,
}

pub const TokenType = union(enum) {
    //keywords
    Quit,
    Shape,
    Apply,
    Done,
    Rule,

    //symbols
    identifier,
    number,

    //special characters
    comma,
    equals,
    open_paren,
    close_paren,
    colon,

    //terminators
    eof,
    err: Errer,

    pub const Errer = union(enum) {
        unexpected_byte: u8,
    };

    pub fn iseql(a: TokenType, b: TokenType) bool {
        return if (std.meta.activeTag(a) == std.meta.activeTag(b)) true else false;
    }
};

pub fn keyword(name: []const u8) ?TokenType {
    const case = enum {
        quit,
        shape,
        apply,
        done,
        rule,
    };
    const cmd = std.meta.stringToEnum(case, name) orelse return null;

    switch (cmd) {
        .quit => return TokenType{ .Quit = {} },
        .shape => return TokenType{ .Shape = {} },
        .apply => return TokenType{ .Apply = {} },
        .done => return TokenType{ .Done = {} },
        .rule => return TokenType{ .Rule = {} },
    }
}

pub const Token = struct {
    token_type: TokenType,
    value: []const u8,
};

pub const Lexer = struct {
    const Self = @This();
    buffer: []const u8,
    pos: usize,
    read_pos: usize,
    ch: u8,
};

pub const Context = struct {
    rules_table: std.StringHashMap(Rule),
    current_expr: ?Expression,
    alloctor: std.mem.Allocator,

    pub fn init(alloctor: std.mem.Allocator) Context {
        return .{ .rules_table = std.StringHashMap(Rule).init(alloctor), .current_expr = null, .alloctor = alloctor };
    }

    pub fn get_rules_table(self: Context) std.StringHashMap(Rule) {
        return self.rules_table;
    }

    pub fn put_rule(self: *Context, key: []const u8, value: Rule) !void {
        var dupe_key = try allocator.dupe(u8, key[0..]);
        try self.rules_table.put(dupe_key, value);
    }

    pub fn get_rule(self: Context, key: []const u8) ?Rule {
        return self.rules_table.get(key);
    }

    pub fn deinit(self: *Context) void {
        self.rules_table.deinit();
    }

    pub fn get_current_expr(self: *Context) ?Expression {
        return self.current_expr;
    }

    pub fn set_current_expr(self: *Context, expr: ?Expression) void {
        self.current_expr = expr;
    }

    pub fn show_rules(self: Context) void {
        var table = self.get_rules_table();
        var it = table.iterator();
        while (it.next()) |kv| {
            std.debug.print("{s}: {any} = {any}\n", .{ kv.key_ptr.*, kv.value_ptr.expression, kv.value_ptr.* });
        }
    }

    //FIX: This function has errors in both the rule and apply switches. The StringHashMap contaminates the data stored and fails to retrieve it. This is probably due to pointer magic.
    pub fn process_command(self: *Context, lexer: *Lexer) !void {
        var peeked = lexer.next();

        switch (peeked.token_type) {
            .Rule => {
                var rule_name = lexer.nextIf(.identifier);
                std.debug.print("rule?: {any}\n", .{rule_name});
                if (self.get_rule(rule_name.?.value) != null) {
                    return error.DuplicateRule;
                }
                //var rule = try parseRule(lexer);
                var head = try parseexpr(lexer);
                _ = lexer.nextIf(.equals);
                var body = try parseexpr(lexer);
                var rule = Rule{ .expression = head, .equivalent = body };
                std.debug.print("defined rule: {any}\n", .{&rule});
                try self.put_rule(rule_name.?.value, rule);
            },
            .Shape => {
                if (self.get_current_expr() != null) {
                    return error.AlreadyShapingExpression;
                }
                var expr = try parseexpr(lexer);
                std.debug.print("Shaping: {any}\n", .{expr});
                self.set_current_expr(expr);
            },
            .Apply => {
                var expr = &self.current_expr;
                if (expr.* == null) {
                    return error.NoShapingInProgress;
                }
                var name = lexer.nextIf(.identifier);
                var rule_name: []const u8 = name.?.value;
                std.debug.print("applying rule: {s}\n", .{rule_name});
                var rule = self.get_rule(rule_name);
                std.debug.print("got rule: {any}\n", .{rule});
                self.show_rules();
                if (rule == null) {
                    return error.RuleNotFound;
                }
                var new_expr = try rule.?.apply(expr.*.?);
                std.debug.print("new expression: {any}\n", .{new_expr});
                self.set_current_expr(new_expr);
            },
            .Done => {
                std.debug.print("current expression: {any}\n", .{self.current_expr});
                if (self.get_current_expr() != null) {
                    std.debug.print("done shaping: {any}\n", .{self.current_expr});
                    self.set_current_expr(null);
                } else {
                    return error.NoShapingInProgress;
                }
            },
            else => {
                std.debug.print("unexpected token: {any}, expected token in set: {any}\n", .{ peeked, keywordset });
            },
        }
    }
};
pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    var prompt: []const u8 = "Zoq::> ";
    var r = std.io.getStdIn().reader();
    var w = std.io.getStdOut().writer();
    _ = w;
    var buf: [4096]u8 = undefined;
    var command: []const u8 = undefined;

    var context: Context = Context.init(allocator);
    defer context.deinit();
    while (true) {
        std.debug.print("{s}\n", .{prompt});
        var temp = try r.readUntilDelimiterOrEof(&buf, '\n');
        command = temp.?;
        var lexer = Lexer.init(command);
        var result = try context.process_command(&lexer);
        _ = result;
        //std.debug.print("{any}\n", .{context.rules_table});
    }
    context.show_rules();
}

The error is as follows:

Zoq::> 
rule add add(a,b) = add(b,a)
rule?: tokenizer.Token{ .token_type = tokenizer.TokenType{ .identifier = void }, .value = { 97, 100, 100 } }
defined rule: add(a, b) ≡ add(b, a)

Zoq::> 
shape add(x,y)
Shaping: add(x, y)
Zoq::> 
apply add
applying rule: add
got rule: 
x,(), b) ≡ add(b, a)

add: 
x,(), b) = 
x,(), b) ≡ add(b, a)

error: NO_MATCH
/home/clinton/Developer/Zig_projects/Zoq/src/expression.zig:305:9: 0x22908a in apply (main)
        return NoMatch.NO_MATCH;
        ^
/home/clinton/Developer/Zig_projects/Zoq/src/exprparser.zig:200:32: 0x22a1e5 in process_command (main)
                var new_expr = try rule.?.apply(expr.*.?);
                               ^
/home/clinton/Developer/Zig_projects/Zoq/src/main.zig:35:22: 0x22b0ad in main (main)
        var result = try context.process_command(&lexer);
                     ^

[Process exited 1]

Thanks in advance.

EDIT: The issue was with the Expression union that made up the Rule Struct. It contained strings. So after tokenization and parsing, those strings got damaged because they were not allocated on the heap. They didn’t persist and the data I was getting was just fragments of the reshaped memory. So the solution was when parsing an expression instead of creating it with the token value, make sure to use a duplicate to the token value. This way the string isn’t invalidated later. Thank you to all the kind people who helped me with this problem. I’ll keep working on my project and would return if I need any more guidance.

var table = self.get_rules_table();

This is creating a copy(!) of the table meta data(like its size and stuff). So when you add a new element your original table won’t be modified. You might still be able to see some changes correctly, but for example when the internal data structure gets resized the original table wouldn’t know about it.

Instead you should change the self.get_rulse_table(); to return a pointer of the table:

pub fn get_rules_table(self: *Context) *std.StringHashMap(Rule) {
    return &self.rules_table;
}

Thank you, I implemented your suggestion but it gives a new error:

src/exprparser.zig:152:25: error: expected type '*exprparser.Context', found '*const exprparser.Context'
        var table = self.get_rules_table();
                    ~~~~^~~~~~~~~~~~~~~~
src/exprparser.zig:152:25: note: cast discards const qualifier
src/exprparser.zig:126:34: note: parameter type declared here
    pub fn get_rules_table(self: *Context) *std.StringHashMap(Rule) {
                                 ^~~~~~~~
referenced by:
    process_command: src/exprparser.zig:196:21
    main: src/main.zig:35:33
    remaining reference traces hidden; use '-freference-trace' to see all reference traces

[Process exited 1]

So it’s now pointing to a ‘const Context’ instead of a ‘Context’

Oh, I think you are still in zig 0.11 right?
I’m getting confused by all those vars you use. Taking a closer look it seems like you are only reading from the table afterwards anyways, so that can’t be the issue, my bad.
You should probably try to get used to using const where possible. The compiler will treats unecessary vars like this as an error in 0.12.

I’m using zig 0.12. I’ll try to change the vars to const.

I think this is the problem. You need to change Context to *Context.

Unfortunately it is not. I just changed it and I get the exact same error as the initial post. The data is still somehow getting corrupted or fragmented.

Probably not the cause since I see in the file that it should be the same allocator, but here it should be try self.allocator.dupe.

Oh, yeah… this is a question about an “ownership”.
What parts of the code may/can modify some underlying data?..
What parts of the code should allocate on the heap and which one should/must free()?..

aside note: IMHO, rust-lang approach to these really (in general) hard issues to handle
is a bit “schizophrenic”:

  • at hardware level we have
    • physical memory cells in some data storage device
    • a state of that cells - it is a “value” of those memory cells

Note, that a physical memory cell can not exits apart of it’s content (“a value”)

  • at a programming language level we have
    • a variable name
    • and a “value”, assotiated with that exact name

Note again - one “variable” - one “value”, they can not exist apart, as some independent “entities”.

What rust-lang (the sooper-pooper, yeah) is doing with all that weird “ownership” conception?
It tries to separate a physical storage and it’s content (“who owns a value?”)
It is schizophrenic way of thinking, isn’t it?

My terms/words are probably was not the best in the post,
but let it be as is… :upside_down_face:

Using self.allocator.dupe leads to a memory leak

Zoq::> 
rule add add(a,b) = add(b,a)
rule?: tokenizer.Token{ .token_type = tokenizer.TokenType{ .identifier = void }, .value = { 97, 100, 100 } }
defined rule: add(a, b) ≡ add(b, a)

Zoq::> 
shape rule add(x,y)
error(gpa): memory address 0x7f607f58c000 leaked: 
/home/clinton/zig-linux-x86_64-0.12.0-dev.80+014d88ef6/lib/std/mem/Allocator.zig:319:40: 0x2271a7 in dupe__anon_4775 (main)
    const new_buf = try allocator.alloc(T, m.len);
                                       ^
/home/clinton/Developer/Zig_projects/Zoq/src/exprparser.zig:131:46: 0x227587 in put_rule (main)
        var dupe_key = try self.alloctor.dupe(u8, key[0..]);
                                             ^
/home/clinton/Developer/Zig_projects/Zoq/src/exprparser.zig:176:34: 0x229de2 in process_command (main)
                try self.put_rule(rule_name.?.value, rule);
                                 ^
/home/clinton/Developer/Zig_projects/Zoq/src/main.zig:35:49: 0x22b046 in main (main)
        var result = try context.process_command(&lexer);
                                                ^

error: NotAnExpression
/home/clinton/Developer/Zig_projects/Zoq/src/exprparser.zig:73:17: 0x22667d in parseexpr (main)
                return error.NotAnExpression;
                ^
/home/clinton/Developer/Zig_projects/Zoq/src/exprparser.zig:182:28: 0x229ebd in process_command (main)
                var expr = try parseexpr(lexer);
                           ^
/home/clinton/Developer/Zig_projects/Zoq/src/main.zig:35:22: 0x22b09d in main (main)
        var result = try context.process_command(&lexer);
                     ^

[Process exited 1]


I created a new topic with a link to this post so this thread can stay on topic helping with OP’s question.

1 Like

Sorry, but after reading your post I’m more confused than I was initially. I tried @IntegratedQuantum’s suggestion and it gave a different error. But I’m getting the idea that this is more a memory problem as opposed to a problem with StringHashMap specifically.

Yes! I think this is progress. From the output before the error, I think it’s now working, but yes, leaking memory. The thing is that you are allocating memory for each key when you call dupe on the Context’s allocator field. This means that when you finish with the Context you have to first iterate through the map and free all the keys with that same allocatorr, and then you can free the map itself with deinit. The map will not free keys or values that are allocated on the heap, you have to do that manually.

var iter = self.rules_table.iterator();
while (iter.next()) |kv| self.allocator.free(kv.key_ptr.*);
self.rules_table.deinit();

Also, it’s not recommended to have a global allocator in a file like the gpa you have there. It will cause this type of confusion. It’s more idiomatic to receive an allocator in the functions where you need to allocate.

I put the proposed code into the deinit function for context so now I have:

 pub fn deinit(self: *Context) void {
        var iter = self.rules_table.iterator();
        while (iter.next()) |kv| self.alloctor.free(kv.key_ptr.*);
        self.rules_table.deinit();
    }

in the main function I then have:

defer context.deinit();

It now gives an entirely different error:

Zoq::> 
rule add add(a,b) = add(b,a)
rule?: tokenizer.Token{ .token_type = tokenizer.TokenType{ .identifier = void }, .value = { 97, 100, 100 } }
defined rule: add(a, b) ≡ add(b, a)

Zoq::> 
shape add(x,y)
Shaping: add(x, y)
Zoq::> 
apply add
applying rule: add
got rule: 
x,(), b) ≡ add(b, a)

add: 
x,(), b) = 
x,(), b) ≡ add(b, a)

thread 1831787 panic: incorrect alignment
/home/clinton/zig-linux-x86_64-0.12.0-dev.80+014d88ef6/lib/std/hash_map.zig:944:44: 0x272210 in header (main)
            return @ptrCast(@as([*]Header, @ptrCast(@alignCast(self.metadata.?))) - 1);
                                           ^
/home/clinton/zig-linux-x86_64-0.12.0-dev.80+014d88ef6/lib/std/hash_map.zig:958:31: 0x25e838 in capacity (main)
            return self.header().capacity;
                              ^
/home/clinton/zig-linux-x86_64-0.12.0-dev.80+014d88ef6/lib/std/hash_map.zig:821:50: 0x22e256 in next (main)
                assert(it.index <= it.hm.capacity());
                                                 ^
/home/clinton/Developer/Zig_projects/Zoq/src/exprparser.zig:141:25: 0x22b346 in deinit (main)
        while (iter.next()) |kv| self.alloctor.free(kv.key_ptr.*);
                        ^
/home/clinton/Developer/Zig_projects/Zoq/src/main.zig:29:25: 0x22b0b5 in main (main)
    defer context.deinit();
                        ^
/home/clinton/zig-linux-x86_64-0.12.0-dev.80+014d88ef6/lib/std/start.zig:370:37: 0x22579e in posixCallMainAndExit (main)
            var i: usize = 0;
                                    ^
/home/clinton/zig-linux-x86_64-0.12.0-dev.80+014d88ef6/lib/std/start.zig:243:5: 0x225281 in _start (main)
    asm volatile (switch (native_arch) {
    ^
???:?:?: 0x0 in ??? (???)
Aborted: oops, process 'zig' core dumped

[Process exited 0]

But I notice that ‘got rule’ is still blank which means the get function in the Apply block is still returning null, so the data is still being corrupted in some form before the improper alignment error.

EDIT: Also, when you say that the gpa shouldn’t be in the file what do you mean? Should I remove it from the main function or do you mean that it should only be created inside a function that would need to allocate memory?

I was referring to the gpa at the top of the exprparser.zig file in the repo.

The code that was returning NO_MATCH is:

var entry = amap.get(s.str).?;
if (!entry.eql(other)) {
    if (amap.remove(s.str)) {
        return NoMatch.NO_MATCH;
    }
}

If I understand it correctly, you check if the stringmap already contains the expression. If it doesn’t, you add it. If it does, you check if it’s eql to the the new expression.
If it is eql, you do nothing.
But if it’s not eql, then it gets confusing. You remove the old expression from the stringmap, but you don’t put anything in its place. Also, if amap.remove returns true, it means that there was something already in there, but you are returning NO_MATCH. Shouldn’t it be the opposite?
I didn’t see any signs in the stack trace that memory got corrupted. How did you come to that conclusion? It seems more likely to be just a logic error.
As an aside, instead of checking if an entry exists and then getting it, it’s more efficient to use getOrPut.

TLDR: It is supposed to return NO_MATCH to account for functions that take in exactly 2 of the same parameter so that it doesn’t match them to functions that take 2 of different parameters.

so f(x,x) should return NO_MATCH when it encounters f(a,b). One takes in 2 arguments that are equal while the other takes in 2 completely different arguments.
f(x,x) => x^2. If I made it match with f(a,b) then this would mean f(a,b) => b^2 which we know is false.

The logic is this, the putinmap function is supposed to help pattern matching with expression. So lets say I have the expression f(x,x) and another expression f(a,b). If I used putinmap it would first match x => a, then I see the next x, I check if it is in the map, if it is in the map then that means that it is already matched to some other value in this case ‘a’. Is ‘a’ equal to the next variable I’m trying to match b, no so I know the expression is not a match and return NO_MATCH.
The reason for this is because something like a Rule called square with the initial expression “Mult(x,x)” which would have an equivalent of
“Square(x)”, should not match “Mult(a,b)” with “Square(b)”.
It should see the second x and go it’s already matched to something, so this function only accepts 2 of the exact same values so if in this case I see a value that is not equal to the value already stored I should return NO_MATCH.

Also the reason I think it is a memory issue, is that the error is happening because the Context is trying to read was altered.

Zoq::> 
rule add add(a,b) = add(b,a)
rule?: tokenizer.Token{ .token_type = tokenizer.TokenType{ .identifier = void }, .value = { 97, 100, 100 } }
defined rule: add(a, b) ≡ add(b, a)

Zoq::> 
shape add(x,y)
Shaping: add(x, y)
Zoq::> 
apply add
applying rule: add
got rule: 
x,(), b) ≡ add(b, a)

add: 
x,(), b) = 
x,(), b) ≡ add(b, a)

error: NO_MATCH
/home/clinton/Developer/Zig_projects/Zoq/src/expression.zig:306:9: 0x22911a in apply (main)
        return NoMatch.NO_MATCH;
        ^
/home/clinton/Developer/Zig_projects/Zoq/src/exprparser.zig:202:32: 0x22a275 in process_command (main)
                var new_expr = try rule.?.apply(expr.*.?);
                               ^
/home/clinton/Developer/Zig_projects/Zoq/src/main.zig:35:22: 0x22b11d in main (main)
        var result = try context.process_command(&lexer);
                     ^

[Process exited 1]

So as you can see the rule is correctly printed out in ‘defined rule’, but look at what it looks like when I try to retrieve it, it goes from “add(a, b) ≡ add(b, a)” to “x,(), b) ≡ add(b, a)”. So it’s gonna return NO_MATCH because it can’t match "x,(),b) with add(x,y).

1 Like

I see now. I think the best option is to put a watch in your debugger for the memory inside the stringmap, and see what statement corrupts it.

Zig has a debugger??? :eyes: :eyes:

Not Zig specific, but you can use any run-of-the-mill debugger with it.