I’m trying to write a tool that will copy files in a sorted order and with as less fragmentation as possible on the filesystem. Comparing to GNU cp, I do get ~44% less fragmentation but the code is 15x slower. I’m thinking one of the issues is how I’m using readers and writers to copy file data. Checking the numbers, bottlenecks are falloc, file writing and syncing. Maybe something can be improved in sorting function as well.
I’m allocating also two buffers per file size, which I know I should not do as well. I thought of using stream to directly pass the data from a reader to a writer, but is there a way to do it with out userspace, or a buffer? In case someone is interested how I thought of writing files sequentially and sorted, I’m sorting them firstly by type and using ICU type of sorting but for ASCII only.
Maybe I’m suing Io model wrongly as well. I just set it to init_single_threaded.
I’d be glad to hear ideas if I can make something parallel or, if I even need to.
I’m recurcively populating a list with std.fs.Dir.Entry struct which I’m sorting, and then running a copy function for each item in a list.
Functions:
How I managed to get the GNU cp behavior of using last arg as a copy point:
pub fn newPath(
gpa: std.mem.Allocator,
path: []const u8,
first: []const u8,
) ![]u8 {
var it = std.fs.path.componentIterator(path);
var parts = std.ArrayList([]const u8).empty;
defer parts.deinit(gpa);
var replaced = false;
while (it.next()) |comp| {
if (replaced) {
try parts.append(gpa, comp.name);
} else {
try parts.append(gpa, first);
replaced = true;
}
}
return std.fs.path.join(gpa, parts.items);
}
This was what I was writing about when I meant file processing:
.file => {
const new_path = try newPath(
gpa,
entry.name,
dest,
);
std.log.debug("Copying: {s} to {s}", .{
entry.name,
new_path,
});
const file = try std.Io.Dir.cwd().statPath(
io,
entry.name,
.{ .follow_symlinks = false },
);
//TODO: Swap to Io after writer implementation
const open = try std.fs.cwd().openFile(
entry.name,
.{ .follow_symlinks = false },
);
defer open.close();
const reader_buf = try gpa.alloc(u8, file.size);
defer gpa.free(reader_buf);
var reader_init = open.reader(io, reader_buf);
const reader = &reader_init.interface;
// File writer
var new_file = try std.fs.cwd().createFile(
new_path,
.{ .lock = .exclusive },
);
defer new_file.close();
const writer_buf = try gpa.alloc(u8, file.size);
defer gpa.free(writer_buf);
var writer_init = new_file.writer(writer_buf);
const writer = &writer_init.interface;
if (falloc_switch.*) {
fallocateFile(new_file, file.size) catch |err| {
std.log.warn("Falloc fail:\n{s}: {s}", .{
@errorName(err),
entry.name,
});
falloc_switch.* = false;
};
}
_ = try reader.stream(writer, .unlimited);
try new_file.chmod(file.mode);
try new_file.sync();
},
falloc function:
fn fallocateLinux(file: std.fs.File, size: u64) !void {
// 0.16-dev: std.os.linux.fallocate returns the raw syscall result
const rc = std.os.linux.fallocate(
file.handle,
0, // mode 0: Allocate space and update file size
0, // offset
@intCast(size),
);
if (rc != 0) {
// 1. Cast the raw result (usize) to the linux error enum (E)
// 2. Use unexpectedErrno to convert that enum into a Zig error
const err_enum: std.os.linux.E = @enumFromInt(@as(u16, @intCast(rc)));
return std.posix.unexpectedErrno(err_enum);
}
// No need for setEndPos(size) here; fallocate already updates the size.
}
My sorting function, which goes into std.mem.sortUnstable()
pub fn entryLess(_: void, a: std.fs.Dir.Entry, b: std.fs.Dir.Entry) bool {
// Group by type
const ta = @intFromEnum(a.kind);
const tb = @intFromEnum(b.kind);
if (ta != tb) return ta < tb;
// Same type; Sort by name
return naturalLess({}, a.name, b.name);
}
// Natural/Alphanumeric asc order following ICU standard
pub fn naturalLess(_: void, a: []const u8, b: []const u8) bool {
var i: usize = 0;
var j: usize = 0;
var a_zero_count: usize = 0;
var b_zero_count: usize = 0;
while (i < a.len and j < b.len) {
const ca = a[i];
const cb = b[j];
// If both are digits, compare the full number
if (std.ascii.isDigit(ca) and std.ascii.isDigit(cb)) {
// Ignore starting zeros from a
var start_i = i;
while (i < a.len and a[i] == '0') : (i += 1) {}
a_zero_count += i - start_i;
start_i = i;
// Create integer slice from a
while (i < a.len and std.ascii.isDigit(a[i])) : (i += 1) {}
const da = a[start_i..i];
// Ignore starting zeors from b
var start_j = j;
while (j < b.len and b[j] == '0') : (j += 1) {}
b_zero_count += j - start_j;
start_j = j;
// Create integer slice from b
while (j < b.len and std.ascii.isDigit(b[j])) : (j += 1) {}
const db = b[start_j..j];
// Compare numeric size
if (da.len != db.len)
return da.len < db.len;
switch (std.mem.order(u8, da, db)) {
.lt => return true,
.gt => return false,
.eq => continue,
}
}
// If characters differ, normal ASCII comparison
if (ca != cb) {
return ca < cb;
}
// Otherwise continue scanning one char at a time
i += 1;
j += 1;
}
// After the loop, compare string length ignoring starting zeros
return a.len - a_zero_count < b.len - b_zero_count;
}
I shared most of the things that I thought is impacting the performance. In case this post is in a wrong category or too long, I apologize. Please tell me so I can be more complaint in the future.