There are problems with your new code, but first here you go with a patch of my changes over your code and the revised sources:
Patch
$ diff -Naur original.zig main.zig
--- original.zig 2024-12-02 10:44:39.976921577 +0000
+++ main.zig 2024-12-02 10:44:13.243951405 +0000
@@ -8,55 +8,44 @@
);
defer file.close();
- var str_from_i: []const u8 = undefined;
- var buf: [1024]u8 = undefined;
- var bytes_written = @as(void, undefined);
for (1..4) |i| {
- bytes_written = try file.writeAll("Test line ");
- str_from_i = try std.fmt.bufPrint(&buf, "{}", .{i});
- bytes_written = try file.writeAll(str_from_i);
+ var buf: [1024]u8 = undefined;
+ const str = try std.fmt.bufPrint(&buf, "Test line {d}", .{i});
+ try file.writeAll(str);
if (i < 3) {
- bytes_written = try file.writeAll("\n");
+ try file.writeAll("\n");
}
}
//#1. Get file size (source: https://stackoverflow.com/a/72334950/18251410)
- file = try std.fs.cwd().openFile("junk_file.txt", .{ .mode = .read_only });
- const file_size = (try file.stat()).size;
+ const file_size = (try file.stat()).size; // <- incorrect
//#2. Generate a slice with an allocator to store the lines (source: https://www.reddit.com/r/Zig/comments/13pjtp3/comment/jl9w6ng/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button)
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ defer if (gpa.deinit() != .ok) std.debug.print("memory leak detected!", .{});
const allocator = gpa.allocator();
+
+ var lines = try allocator.alloc([]u8, file_size);
+ var allocated_lines: usize = 0;
defer {
- _ = gpa.deinit();
- }
- const dynamic_size: usize = file_size;
- var lines = try allocator.alloc([]u8, dynamic_size);
- defer {
- for (lines) |line| allocator.free(line);
+ for (0..allocated_lines) |i| allocator.free(lines[i]);
allocator.free(lines);
}
//#3. Read the file line by line (source: https://stackoverflow.com/a/68879352/18251410)
- file = try std.fs.cwd().openFile("junk_file.txt", .{});
+ try file.seekTo(0);
var buf_reader = std.io.bufferedReader(file.reader());
- var in_stream = buf_reader.reader();
+ const in_stream = buf_reader.reader();
var i: u8 = 0;
- var gpa2 = std.heap.GeneralPurposeAllocator(.{}){};
- const allocator2 = gpa2.allocator();
- defer {
- _ = gpa2.deinit();
- }
- const max_size: usize = 1024;
- while (try in_stream.readUntilDelimiterOrEofAlloc(allocator2, '\n', max_size)) |line| : (i += 1) {
+ while (try in_stream.readUntilDelimiterOrEofAlloc(allocator, '\n', 1024)) |line| : (i += 1) {
lines[i] = line;
std.log.debug("First loop: lines[{}] = `{s}'", .{ i, lines[i] });
}
+ allocated_lines = i;
//#4. Second print of slice
- const number_of_lines = i;
- for (0..number_of_lines) |index| {
+ for (0..allocated_lines) |index| {
std.log.debug("Second loop: lines[{}] = `{s}'", .{ index, lines[index] });
}
}
Revised source code
const std = @import("std");
pub fn main() !void {
//#0. Create a junk file (just for demonstration)
var file = try std.fs.cwd().createFile(
"junk_file.txt",
.{ .read = true },
);
defer file.close();
for (1..4) |i| {
var buf: [1024]u8 = undefined;
const str = try std.fmt.bufPrint(&buf, "Test line {d}", .{i});
try file.writeAll(str);
if (i < 3) {
try file.writeAll("\n");
}
}
//#1. Get file size (source: https://stackoverflow.com/a/72334950/18251410)
const file_size = (try file.stat()).size; // <- incorrect
//#2. Generate a slice with an allocator to store the lines (source: https://www.reddit.com/r/Zig/comments/13pjtp3/comment/jl9w6ng/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button)
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer if (gpa.deinit() != .ok) std.debug.print("memory leak detected!", .{});
const allocator = gpa.allocator();
var lines = try allocator.alloc([]u8, file_size);
var allocated_lines: usize = 0;
defer {
for (0..allocated_lines) |i| allocator.free(lines[i]);
allocator.free(lines);
}
//#3. Read the file line by line (source: https://stackoverflow.com/a/68879352/18251410)
try file.seekTo(0);
var buf_reader = std.io.bufferedReader(file.reader());
const in_stream = buf_reader.reader();
var i: u8 = 0;
while (try in_stream.readUntilDelimiterOrEofAlloc(allocator, '\n', 1024)) |line| : (i += 1) {
lines[i] = line;
std.log.debug("First loop: lines[{}] = `{s}'", .{ i, lines[i] });
}
allocated_lines = i;
//#4. Second print of slice
for (0..allocated_lines) |index| {
std.log.debug("Second loop: lines[{}] = `{s}'", .{ index, lines[index] });
}
}
- You don’t need to reopen the file every time to operate on it (if you’re still holding a reference to the original
std.fs.File
object, which you do here). I removed that.
- I’m not sure what you’re doing with
var bytes_written = @as(void, undefined);
but that is a useless variable (a variable of type void
cannot store any information). Just remove it.
- You do:
const file_size = (try file.stat()).size; // <- size in bytes
...
const dynamic_size: usize = file_size;
var lines = try allocator.alloc([]u8, dynamic_size); // <- allocates that many pointers to `[]u8`
which is allocating as many []u8
entries as there are bytes in the file; you want one per line, so this is going to allocate way more elements than needed.
4. Due to the above point, many of those []u8
s are going to be uninitialized, so the cleanup I suggested earlier doesn’t work there as you may be trying to free random memory locations:
defer {
for (lines) |line| allocator.free(line);
allocator.free(lines);
}
- Not sure why you initialize a second GPA.
- The
const number_of_lines = i;
does precisely what I was suggesting in my last post, i.e. keep track of how many lines were actually allocated. If you make this into a variable and declare it just before the defer
statement which deallocates memory, then you can use that to keep track of where you need to stop calling allocator.free()
:
var allocated_lines: usize = 0;
defer {
for (0..allocated_lines) |i| allocator.free(lines[i]);
allocator.free(lines);
}
...
while (try in_stream.readUntilDelimiterOrEofAlloc(allocator, '\n', 1024)) |line| : (i += 1) {
...
}
allocated_lines = i;
for (0..allocated_lines) |index| {
...
}
Yeah, again, since you’re effectively reading the entire file anyway, @AndrewKraevskii’s approach of just reading the whole contents in memory and then simply splitting on newlines is the easiest and more robust (and likely faster too?).
Additionally, as I pointed out above, you cannot know ahead of time how much memory you need to reserve because you don’t know how many lines are going to be in the file. (OK, you do know in this toy example, but that’s not going to be true in the general case!)
In order to allocate the right amount of memory with base allocator.alloc()
you may need to allocate room for some elements, keep track of the elements you’ve filled and the remaining capacity, and once capacity is exhausted reallocate memory with a larger capacity, etc… Basically, you’re going to be manually doing the work of std.ArrayList()
yourself. See @AndrewKraevskii’s post which also uses std.ArrayList()
to make this aspect simpler as well.