I’m currently doing some experiments to try some different control flow patterns in Zig and I came up with a simple example for string splitting.
Here’s what the calling code looks like:
var last: ?Split = null;
while (split("|", "a|b|c", last)) |s| : (last = s) {
std.debug.print("split: {s}\n", .{ s.slice });
}
This will print out each split in the loop (the delimiter can be any number of characters). The goal here is to make the most of the Zig loop capture syntax. I think of it as a “null-in-null-out” control flow pattern.
Here’s the naive splitting algorithm - I haven’t bothered to optimize it, it’s just a sketch to get the ball rolling and maybe talk about how we could achieve something similar but better!
const Split = struct {
const Self = @This();
slice: []const u8,
span: [2]usize = .{ 0, 0 },
pub fn empty(self: *const Self) bool {
return (self.span[0] == self.span[1]);
}
pub fn init(slice: ?[] const u8, i: usize, j: usize) Self {
return Split {
.slice = if (slice) |s| s else &[_]u8{ },
.span = .{ i, j }
};
}
};
pub fn split(delimiter: [] const u8, string: [] const u8, previous: ?Split) ?Split {
if ((delimiter.len == 0) or (string.len == 0)) {
return null;
}
var start: usize = 0;
if (previous) |*last| {
// we've reached the end of the string
if (last.span[1] >= string.len) {
// check if we ended our string on a delimiter
if (!last.empty() and (delimiter[0] == string[last.span[1] - delimiter.len])) {
// we have matched the first character, so
// we move up by one and continue matching
var a: usize = 1;
var b: usize = (last.span[1] - delimiter.len) + 1;
while (a < delimiter.len) : ({ a += 1; b += 1; }) {
if(delimiter[a] != string[b]) return null;
}
return Split.init(null, b, b);
}
return null; // final termination point
}
start = last.span[1]; // begin where we last left off
}
var i = start; while (i < string.len) : (i += 1) {
if (delimiter[0] == string[i]) {
// we have matched the first character, so
// we move up by one and continue matching
var a: usize = 1;
var b: usize = i + 1;
// check if we can fit another substring into the remainder
var match: bool = ((string.len - i) >= delimiter.len);
while (match and (a < delimiter.len)) : ({ a += 1; b += 1; }) {
match = delimiter[a] == string[b];
}
if (match) {
// Imagine that we had a string that started with a delimiter.
// In that case, we have "" + delimiter as our output.
if (delimiter.len == (b - start)) {
return Split.init(null, start, b);
}
// else, we found a substring where the delimiter is not first
return Split.init(string[start..i], start, b);
}
}
}
// if no splits occured, return the entire string
return Split.init(string[start..string.len], start, string.len);
}