Initialization of slices/arrays in more complicated scenarios

Hey everyone,

I’d like to better understand the range syntax and slice/array initialization in Zig. Let’s say I’d want to have an array (perhaps comptime known) with all letters in the alphabet. This is the way that I came up with:

const default_word_characters = init: {
    const length: usize = ('Z' - 'A' + 1) + ('z'- 'a' + 1);
    var word_characters: [length]u8 = undefined;
    var idx: usize = 0;
    for ('a'..'z') |char| {
        word_characters[idx] = @intCast(char);
        idx += 1;
    }
    word_characters[idx] = 'z';
    idx += 1;
    for ('A'..'Z') |char| {
        word_characters[idx] = @intCast(char);
        idx += 1;
    }
    word_characters[idx] = 'Z';
    break :init word_characters;
};

Another alternative that came to mind was doing this:

const default_word_characters = init: {
    const length: usize = ('Z' - 'A' + 1) + ('a' - 'a' + 1);
    var word_characters: [length]u8 = undefined;
    var idx: usize = 0;
    for (0..std.math.maxInt(u8)) |char| {
        switch (char) {
            'a'...'z', 'A'...'Z' => {
                word_characters[idx] = @intCast(char);
                idx += 1;
            }
        }
    }
    break :init word_characters;
};

However, in my opinion, this gets quite messy. This there any simpler option? I was thinking of something like

const default_word_characters = [_]u8{'a'...'z','A'...'Z'};

I’m curious how you deal with a similar situation in your projects, or any ideas that make the initialization a bit less verbose.

Hi! You can simplify a bit like this:

const std = @import("std");

test {
    const default_word_characters = init: {
        const num_letters = 'z' - 'a' + 1;
        var word_characters: [num_letters * 2]u8 = undefined;
        for ('A'..'Z' + 1, 0..) |char, idx| {
            word_characters[idx] = @intCast(char);
        }
        for ('a'..'z' + 1, num_letters..) |char, idx| {
            word_characters[idx] = @intCast(char);
        }
        break :init word_characters;
    };
    for (default_word_characters) |char| {
        std.debug.print("{c}\n", .{char});
    }
}

But, to avoid storing constant ASCII character arrays see std.ascii, for instance, isAlphabetic.

3 Likes

Ah, okay, I see. My goal was to also understand the general idea of these kinds of scenarios, in these situations it would no longer be obvious how to use std.ascii, but thank you for the hint! So do I understand correctly that a syntax similar to the one I’d like to have does not exist?

Yeah, integer range array initialization like that doesn’t exist atm.

Here are 3 more possibilities that use comptime:

  1. creates arrays and then concatenates those with ++
  2. takes a tuple of 2 element-tuples which describe ranges and then creates one long tuple from those at comptime
  3. combines 1. and 2. for a simpler use syntax

I think 1. is better then 2. because it is simpler and doesn’t require manually specifying the length. I wasn’t able to get 2. to a point where it is able to infer the length.

The downside of 1. is that you need to specify the type of the target array (it would be nice if we could write comptime functions and get access to the inferred return type, similar to what builtins do), but I think the downside isn’t that big.

Because 2. is sadly not able to infer the length (Is there a way to get 2. to infer the size?) of the comptime generated tuple, it isn’t really helpful, so I created 3. which is a mix of 1. and 2.

const std = @import("std");

fn literalRange(comptime T: type, comptime start: comptime_int, comptime end: comptime_int) [end - start + 1]T {
    var res: [end - start + 1]T = undefined;
    for (start..end + 1, 0..) |element, i| {
        res[i] = @intCast(element);
    }
    return res;
}

fn rangesLength(comptime ranges: anytype) comptime_int {
    var length = 0;
    for (ranges) |range| {
        const start = range[0];
        const end = range[1];
        length += end - start + 1;
    }
    return length;
}

fn TupleRanges(comptime ranges: anytype) type {
    const types = [1]type{comptime_int} ** rangesLength(ranges);
    return std.meta.Tuple(&types);
}
fn tupleRanges(comptime ranges: anytype) TupleRanges(ranges) {
    var res: TupleRanges(ranges) = undefined;
    var next = 0;
    inline for (ranges) |range| {
        const current = next;
        const start = range[0];
        const end = range[1];
        inline for (start..end + 1, current..) |element, i| {
            res[i] = element;
            next += 1;
        }
    }
    return res;
}

fn LiteralRanges(comptime T: type, comptime ranges: anytype) type {
    return [rangesLength(ranges)]T;
}
fn literalRanges(comptime T: type, comptime ranges: anytype) LiteralRanges(T, ranges) {
    comptime var res: LiteralRanges(T, ranges) = undefined;
    comptime var next = 0;
    inline for (ranges) |range| {
        const current = next;
        const start = range[0];
        const end = range[1];
        inline for (start..end + 1, current..) |element, i| {
            res[i] = element;
            next += 1;
        }
    }
    return res;
}

pub fn main() !void {
    const default_word_characters = init: {
        const num_letters = 'z' - 'a' + 1;
        var word_characters: [num_letters * 2]u8 = undefined;
        for ('A'..'Z' + 1, 0..) |char, idx| {
            word_characters[idx] = @intCast(char);
        }
        for ('a'..'z' + 1, num_letters..) |char, idx| {
            word_characters[idx] = @intCast(char);
        }
        break :init word_characters;
    };
    for (default_word_characters) |char| {
        std.debug.print("{c}\n", .{char});
    }

    std.debug.print("\n-------------------------\n", .{});

    // 1.
    const default_word_characters2 = literalRange(u8, 'A', 'Z') ++ literalRange(u8, 'a', 'z');
    for (default_word_characters2) |char| {
        std.debug.print("{c}\n", .{char});
    }

    std.debug.print("\n-------------------------\n", .{});

    // 2.
    const tuple = tupleRanges(.{ .{ 'A', 'Z' }, .{ 'a', 'z' } });
    const default_word_characters3: [tuple.len]u8 = tuple;
    for (default_word_characters3) |char| {
        std.debug.print("{c}\n", .{char});
    }

    std.debug.print("\n-------------------------\n", .{});

    // 3.
    const default_word_characters4 = literalRanges(u8, .{ .{ 'A', 'Z' }, .{ 'a', 'z' } });
    for (default_word_characters4) |char| {
        std.debug.print("{c}\n", .{char});
    }
}
4 Likes

Maybe it would be nice if the standard library would provide some helper functions to make this a bit easier. I like the third idea in particular. Nevertheless, I kind of refuse to implement this as it feels like bloat of a codebase to me. Thank you for sharing the idea though!

1 Like