Code review: arithmetic evaluator

Here’s a parser and evaluator capable of handling really basic arithmetic expressions consisting of literals, sums, and negations. This is one of my first Zig programs, and also my first time trying out a “handle-based” approach to representing recursive objects. What can I improve?

const std = @import("std");

const TokenKind = enum {
    lparen,
    rparen,
    plus,
    minus,
    int,
    eof,
};

const Token = struct {
    kind: TokenKind,
    text: []const u8,

    pub fn format(self: Token, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
        try writer.print("Token{{ .kind = {}, .text = \"{s}\" }}", .{ self.kind, self.text });
    }
};

const Lexer = struct {
    source: []const u8,
    pos: u32,
    buf: ?Token,

    pub fn init(source: []const u8) Lexer {
        return Lexer{
            .source = source,
            .pos = 0,
            .buf = null,
        };
    }

    pub fn peek(self: *Lexer) !Token {
        if (self.buf == null) {
            self.buf = try self.next();
        }
        return self.buf.?;
    }

    pub fn pop(self: *Lexer) !Token {
        if (self.buf) |peeked| {
            self.buf = null;
            return peeked;
        } else {
            return self.next();
        }
    }

    fn next(self: *Lexer) !Token {
        self.skipWhitespace();

        if (self.pos >= self.source.len) {
            return Token{ .kind = TokenKind.eof, .text = "" };
        }

        const start_pos = self.pos;
        const c = self.source[self.pos];
        self.pos += 1;

        const kind = try switch (c) {
            '(' => TokenKind.lparen,
            ')' => TokenKind.rparen,
            '+' => TokenKind.plus,
            '-' => TokenKind.minus,
            else => if (std.ascii.isDigit(c)) self.readInt() else error.UnrecognizedToken,
        };

        const text = self.source[start_pos..self.pos];
        return Token{ .kind = kind, .text = text };
    }

    fn skipWhitespace(self: *Lexer) void {
        self.skipWhile(std.ascii.isWhitespace);
    }

    fn readInt(self: *Lexer) TokenKind {
        self.skipWhile(std.ascii.isDigit);
        return TokenKind.int;
    }

    fn skipWhile(self: *Lexer, pred: fn (u8) bool) void {
        while (self.pos < self.source.len and pred(self.source[self.pos])) {
            self.pos += 1;
        }
    }
};

const SyntaxError = error{
    UnrecognizedToken,
    UnmatchedParen,
    InvalidExpr,
};

const ParseError = SyntaxError || std.mem.Allocator.Error || std.fmt.ParseIntError;

fn parseExpr(lexer: *Lexer, arena: *ExprArena) ParseError!ExprHandle {
    var expr = try parseAtom(lexer, arena);
    while ((try lexer.peek()).kind == TokenKind.plus) {
        _ = try lexer.pop();
        const rhs = try parseAtom(lexer, arena);
        expr = try arena.create(ExprNode{ .add = .{ .lhs = expr, .rhs = rhs } });
    }
    return expr;
}

fn parseAtom(lexer: *Lexer, arena: *ExprArena) ParseError!ExprHandle {
    const peeked = try lexer.peek();
    return switch (peeked.kind) {
        TokenKind.int => {
            const text = (try lexer.pop()).text;
            const value = try std.fmt.parseInt(i32, text, 10);
            return arena.create(ExprNode{ .literal = value });
        },
        TokenKind.lparen => {
            _ = try lexer.pop();
            const inner = try parseExpr(lexer, arena);
            if ((try lexer.pop()).kind != TokenKind.rparen) {
                return error.UnmatchedParen;
            }
            return inner;
        },
        TokenKind.minus => {
            _ = try lexer.pop();
            const rand = try parseAtom(lexer, arena);
            return arena.create(ExprNode{ .neg = .{ .rand = rand } });
        },
        else => error.InvalidExpr,
    };
}

const ExprHandle = struct {
    idx: u32,
};

const ExprNode = union(enum) {
    literal: i32,
    add: struct { lhs: ExprHandle, rhs: ExprHandle },
    neg: struct { rand: ExprHandle },
};

const ExprArena = struct {
    nodes: std.ArrayList(ExprNode),

    pub fn init(allocator: std.mem.Allocator) ExprArena {
        return ExprArena{
            .nodes = std.ArrayList(ExprNode).init(allocator),
        };
    }

    pub fn deinit(self: *ExprArena) void {
        self.nodes.deinit();
    }

    pub fn create(self: *ExprArena, node: ExprNode) !ExprHandle {
        try self.nodes.append(node);
        return .{ .idx = @intCast(self.nodes.items.len - 1) };
    }

    pub fn eval(self: *const ExprArena, handle: ExprHandle) i32 {
        const node = self.nodes.items[handle.idx];
        return switch (node) {
            .literal => |v| v,
            .add => |add| self.eval(add.lhs) + self.eval(add.rhs),
            .neg => |neg| -self.eval(neg.rand),
        };
    }
};

test "simple expression" {
    const source = "-(3 + 4)";
    var lexer = Lexer.init(source);
    var expr_arena = ExprArena.init(std.testing.allocator);
    defer expr_arena.deinit();

    const e = try parseExpr(&lexer, &expr_arena);
    const v = expr_arena.eval(e);
    try std.testing.expectEqual(v, -7);
}
1 Like

It looks like you got a solid start, with base framework in place that can easily be expanded.

The biggest difference I would have personally made is instead of using a TokenKind and a Token type, combined these into a single tagged union:

pub const Token = union(enum) {
    lparen: void,
    rparen: void,
    plus; void,
    minus: void,
    int: i32,
    eof: void,
};

The voids aren’t really necessary, I just added them for clarity. If you wished, you could give them a payload for easier printing, or just use a switch.

Thank you, I’ll give that a shot!