I’m creating a program for Alpine Linux that counts the number of installed packages on the system. It does this by parsing the file /lib/apk/db/installed
. I first did it with buffered I/O, but now I’m trying to do it with memory mapped I/O to benchmark the two and see which one is faster. Here’s my main.zig
file with both implementations for reference:
const std = @import("std");
const page_size_min = std.heap.page_size_min;
const posix = std.posix;
const kib = 1 << 10;
pub fn main() !void {
std.debug.print("{}\n", .{try get_packages_mmap()});
}
fn get_packages_mmap() !u16 {
const key = "C:Q";
const cwd = std.fs.cwd();
const file = cwd.openFile("/lib/apk/db/installed", .{}) catch return 0;
const metadata = try file.metadata();
const size = metadata.size();
const map: posix.MAP = .{ .TYPE = .PRIVATE };
const array_ptr: [*]align(page_size_min) u8 = undefined;
var counter: u16 = 0;
var offset: u64 = 0;
while (true) {
const str = try posix.mmap(
array_ptr, page_size_min, posix.PROT.READ, map, file.handle, offset
);
var iter = std.mem.splitScalar(u8, str, '\n');
while (iter.next()) |entry| {
if (entry.len >= key.len
and std.mem.eql(u8, entry[0..key.len], key))
{
counter += 1;
}
}
offset += page_size_min;
if (offset >= size) {
break;
}
}
return counter;
}
fn get_packages() !u16 {
const key = "C:Q";
const file = std.fs.cwd().openFile("/lib/apk/db/installed", .{}) catch {
return 0;
};
var counter: u16 = 0;
{
var file_buf: [8 * kib]u8 = undefined;
var offset: u64 = 0;
while (true) {
const bytes_read = try file.preadAll(&file_buf, offset);
const str = file_buf[0..bytes_read];
var iter = std.mem.splitScalar(u8, str, '\n');
while (iter.next()) |entry| {
if (entry.len >= key.len
and std.mem.eql(u8, entry[0..key.len], key))
{
counter += 1;
}
}
if (bytes_read != file_buf.len) {
break;
}
offset += bytes_read - key.len + 1;
}
}
return counter;
}
Compiling with zig build-exe -target x86_64-linux main.zig
, the output on my machine when running ./main
is 1042
, which is the expected result. When I run it with Valgrind though, I get this output:
==13233== Memcheck, a memory error detector
==13233== Copyright (C) 2002-2024, and GNU GPL'd, by Julian Seward et al.
==13233== Using Valgrind-3.24.0 and LibVEX; rerun with -h for copyright info
==13233== Command: ./main
==13233==
thread 13233 panic: reached unreachable code
/usr/local/lib/zig/std/posix.zig:4790:19: 0x103b1a1 in mmap (main)
.INVAL => unreachable, // Invalid parameters to mmap()
^
/home/loremayer/source/main.zig:27:35: 0x103a7f6 in get_packages_mmap (main)
const str = try posix.mmap(
^
/home/loremayer/source/main.zig:9:52: 0x103b607 in main (main)
std.debug.print("{}\n", .{try get_packages_mmap()});
^
/usr/local/lib/zig/std/start.zig:656:37: 0x103a51a in posixCallMainAndExit (main)
const result = root.main() catch |err| {
^
/usr/local/lib/zig/std/start.zig:271:5: 0x103a0cd in _start (main)
asm volatile (switch (native_arch) {
^
???:?:?: 0x0 in ??? (???)
==13233==
==13233== Process terminating with default action of signal 6 (SIGABRT)
==13233== at 0x1069717: os.linux.x86_64.syscall4 (x86_64.zig:58)
==13233== by 0x1077A4C: os.linux.sigprocmask (linux.zig:1724)
==13233== by 0x106B0BB: posix.sigprocmask (posix.zig:5791)
==13233== by 0x106B033: posix.raise (posix.zig:733)
==13233== by 0x105890C: posix.abort (posix.zig:677)
==13233== by 0x103EF0C: debug.defaultPanic (debug.zig:672)
==13233== by 0x103D846: debug.FullPanic((function 'defaultPanic')).reachedUnreachable (debug.zig:59)
==13233== by 0x103B1A1: posix.mmap (posix.zig:4790)
==13233== by 0x103A7F6: main.get_packages_mmap (main.zig:27)
==13233== by 0x103B607: main.main (main.zig:9)
==13233== by 0x103A51A: callMain (start.zig:656)
==13233== by 0x103A51A: callMainWithArgs (start.zig:616)
==13233== by 0x103A51A: start.posixCallMainAndExit (start.zig:571)
==13233== by 0x103A0CD: (below main) (start.zig:271)
==13233==
==13233== HEAP SUMMARY:
==13233== in use at exit: 0 bytes in 0 blocks
==13233== total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==13233==
==13233== All heap blocks were freed -- no leaks are possible
==13233==
==13233== For lists of detected and suppressed errors, rerun with: -s
==13233== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Aborted
Also, if in the main
function I put a while loop that runs the function over and over again, I eventually get an OutOfMemory
error.