Hobbyist, coding in Zig ver. 0.15.2, compiling in/for Windows 11. In this question I may be asking you to help me find a needle in a haystack (pun intended) – if it seems like a waste of your valuable time, please let me know and I’ll move on. As an exercise, I’m trying to write a CLI util for finding all instances of a byte sequence in a file and replacing them with a different byte sequence. I’m trying to accomplish this with std.mem.replaceOwned. To represent nonprintable bytes on the command line, I want to support the use of familiar escape sequences (\n for newline, etc.), including \0NNN for byte with octal value NNN, \dNNN for byte with decimal value NNN, and \xNN for byte with hex value NN. The code I’m posting below (change.zig) is a prototype using a tiny dataset (two lines of “ABCDE”) for illustration purposes.
My problem is this: If I command change.exe C x, ABCDE is changed to ABxDE, as expected. But if I write the same command with an escape code, for example change.exe \d67 x, ABCDE is not changed. This is so even though it appears that the escape code is correctly being converted to an uppercase C, via func escape2Bytes(), as shown by the printed display. On the other hand, if I put the escape sequence in the replace string, change.exe C \d120, once again the result is the expected ABxDE. The same func escape2Bytes() is used to convert the escape sequence to bytes. I expect and assume that there is a bug in escape2Bytes(), but if so it’s too subtle for me to detect. Any pointers as to what may be going wrong would be greatly appreciate it. (I tried using std.mem.replace, but it made no difference.) Many thanks in advance. Again, I hope this is not a wild goose chase.
// change.zig
// Find and Replace Across (One) File
// CLD rev. 2026-01-04
// Zig ver. 0.15.0-dev.77+aa8aa6625
const std = @import("std");
const print = std.debug.print;
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
const zalloc = arena.allocator();
pub fn main() !void {
defer arena.deinit();
const args = try std.process.argsAlloc(zalloc);
defer std.process.argsFree(zalloc, args);
if (args.len < 3) {
showHelp(args[0]);
return;
}
if (args[1].len >= 2) {
if (args[1][0] == '-' and (args[1][1] == '-' or args[1][1] == 'h')) {
showHelp(args[0]);
return;
}
}
const data = [_]u8{ 'A', 'B', 'C', 'D', 'E', '\n', 'A', 'B', 'C', 'D', 'E', '\n' };
const input = data[0..];
print("In:\n{s}\n", .{input});
const search_str: []u8 = try escape2Bytes(zalloc, args[1]);
const replace_str: []u8 = try escape2Bytes(zalloc, args[2]);
print("search : {s}\nreplace: {s}\n\n", .{ search_str, replace_str });
// pub fn replaceOwned(comptime T: type, allocator: Allocator,
// input: []const T, needle: []const T, replacement: []const T)
// Allocator.Error![]T
const output = try std.mem.replaceOwned(u8, zalloc, input, search_str[0..], replace_str[0..]);
print("Out:\n{s}", .{output});
}
pub fn isBaseN(ch: u8, base: u8) bool {
var yn: bool = false;
if (base > 16) return yn;
const chu: u8 = std.ascii.toUpper(ch);
const b_digits = [_]u8{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
if (chu >= b_digits[0] and chu <= b_digits[base - 1]) yn = true;
return yn;
}
pub fn escape2Bytes(allocator: anytype, arg_string: []u8) ![]u8 {
// Convert escape sequences to bytes
var bytes_out = try std.mem.Allocator.alloc(allocator, u8, arg_string.len);
var i: usize = 0;
var j: usize = 0;
var k: usize = 0;
var base_in: u8 = 10;
var max_len: u2 = 0;
var testval: u16 = 0;
var tmp: [3]u8 = undefined;
var tmp2: []u8 = undefined;
while (i < arg_string.len) {
max_len = 0;
if (i == arg_string.len - 1) {
bytes_out[j] = arg_string[i];
i += 1;
j += 1;
break;
}
if (arg_string[i] != 92) {
bytes_out[j] = arg_string[i];
i += 1;
j += 1;
continue;
}
sw: switch (arg_string[i + 1]) {
'\\' => {
bytes_out[j] = 92;
i += 2;
j += 1;
},
'a' => {
bytes_out[j] = 7;
i += 2;
j += 1;
},
'b' => {
bytes_out[j] = 8;
i += 2;
j += 1;
},
't' => {
bytes_out[j] = 9;
i += 2;
j += 1;
},
'n' => {
bytes_out[j] = 10;
i += 2;
j += 1;
},
'v' => {
bytes_out[j] = 11;
i += 2;
j += 1;
},
'f' => {
bytes_out[j] = 12;
i += 2;
j += 1;
},
'r' => {
bytes_out[j] = 13;
i += 2;
j += 1;
},
'e' => {
bytes_out[j] = 27;
i += 2;
j += 1;
},
'0' => {
base_in = 8;
max_len = 3;
continue :sw 255;
},
'd' => {
base_in = 10;
max_len = 3;
continue :sw 255;
},
'x' => {
base_in = 16;
max_len = 2;
continue :sw 255;
},
255 => {
if (max_len < 1) {
bytes_out[j] = arg_string[i + 1];
i += 2;
j += 1;
continue;
}
i += 2;
k = 0;
while (i < arg_string.len and isBaseN(arg_string[i], base_in)) {
tmp[k] = arg_string[i];
i += 1;
k += 1;
}
tmp2 = tmp[0..k];
testval = try std.fmt.parseInt(u16, tmp2, base_in);
if (testval > 255) {
i -= 1;
k -= 1;
} else {
bytes_out[j] = @intCast(testval);
j += 1;
}
},
else => {
bytes_out[j] = arg_string[i + 1];
i += 2;
j += 1;
},
}
}
for (j..bytes_out.len) |c| {
bytes_out[c] = 0;
}
return bytes_out;
}
pub fn showHelp(progname: [:0]u8) void {
print("\nGlobal Find & Replace\nUsage: {s} [\"]FIND_STR[\"] [\"]REPLACE_STR[\"]\n\nThe following escape sequences are recognized in FIND_STR and\nREPLACE_STR:\n\n \\\\ backslash\n \\\" double quote\n \\a alert (BEL)\n \\b backspace\n \\e escape\n \\f form feed\n \\n newline\n \\r carriage return\n \\t horizontal tab\n \\v vertical tab\n \\0NNN byte with octal value NNN (1 to 3 digits)\n \\dNNN byte with decimal value NNN (1 to 3 digits)\n \\xNN byte with hexadecimal value NN (1 or 2 digits)\n", .{progname});
}