Python while calling zig shared library segfaults on calling internal function.
Here’s the python code
# sample.py
import ctypes
lib = ctypes.CDLL("zig-out/lib/libfasttokenizer.so")
lib.token_ranker.restype=ctypes.c_void_p
def encode(text:bytes):
token_ranker = lib.token_ranker()
if __name__=="__main__":
print(encode(b"Operations on vectors shorter than the target machine's native SIMD size will typically compile to single "))
Here’s the partial zig code
// src/asclib.zig
const std = @import("std");
const Rank = @import("./rank.zig");
pub export fn token_ranker() *anyopaque {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
const rank = Rank.TokenRanker.from_file("scratchpad/gpt2tokens", "c100k_base", allocator) catch @panic("Cannot initialize TokenRanker");
return @constCast(&rank);
}
// src/rank.zig
pub const TokenRanker = struct {
str_to_id: std.StringHashMap(usize),
id_to_str: std.HashMap(usize, []const u8, std.hash_map.AutoContext(usize), std.hash_map.default_max_load_percentage),
tokens: [100256][]const u8,
allocator: std.mem.Allocator,
regex: Regex,
const Self = @This();
pub fn free(self: *Self) void {
self.regex.deinit();
self.str_to_id.deinit();
self.id_to_str.deinit();
for (self.tokens) |token| {
self.allocator.free(token);
}
}
pub fn from_file(comptime file_path: []const u8, comptime model_type: []const u8, allocator: std.mem.Allocator) !Self {
const current_dir = fs.cwd();
const file = try current_dir.openFile(file_path, .{});
defer file.close();
var buffer_reader = io.bufferedReader(file.reader());
const content = try buffer_reader.reader().readAllAlloc(allocator, 5 * 1024 * 1024);
defer allocator.free(content);
return Self.from_string(content, model_type, allocator);
}
pub fn from_string(content: []const u8, comptime model_type: []const u8, allocator: std.mem.Allocator) !Self {
std.debug.print("{d}\n", .{123});
const Model = model.get_model(model_type);
var tokens: [Model.n_tokens][]const u8 = undefined;
...
...
Here’s the video of the application going segfault