const std = @import("std");
fn Character(
comptime character_count: comptime_int,
) type {
return std.math.IntFittingRange(
0,
character_count + 1, // +1 for whitespace
);
}
fn maxIndex(
comptime word_length: comptime_int,
comptime character_count: comptime_int,
) comptime_int {
const a: comptime_int = std.math.powi(usize, character_count + 1, word_length) catch unreachable;
return a - 1;
}
fn Index(
comptime word_length: comptime_int,
comptime character_count: comptime_int,
) type {
return std.math.IntFittingRange(0, maxIndex(word_length, character_count));
}
fn hash(
comptime word_length: comptime_int,
comptime character_count: comptime_int,
word: [word_length]Character(character_count),
) Index(word_length, character_count) {
const adjusted_character_count = character_count + 1; // 1 for whitespace
var count: [adjusted_character_count]Character(character_count) = .{0} ** adjusted_character_count;
for (word) |char| count[char] += 1;
const Result = Index(word_length, character_count);
var result: Result = 0;
var base: Result = 1;
for (count[1..], 1..) |this_char_count, character| {
const first_term = character * base;
const pow = std.math.powi(Result, adjusted_character_count, this_char_count) catch unreachable;
base *= pow;
const multiplier = @divExact(pow - 1, character_count);
const to_add = first_term * multiplier;
result += @intCast(to_add);
}
return result;
}
const test_namespace = struct {
const test_character_count = 26;
const test_word_length = 7;
fn testHash(word: []const u8) void {
var word_adjusted: [test_word_length]std.math.IntFittingRange(0, character_count) = undefined;
for (word, word_adjusted[0..word.len]) |src, *dst| {
dst.* = @intCast(src - 'A' + 1);
}
@memset(word_adjusted[word.len..], 0);
const index = hash(test_word_length, test_character_count, word_adjusted);
std.debug.print("{}\n", .{index});
}
};
pub fn main() void {
const testHash = test_namespace.testHash;
testHash("");
testHash("A");
testHash("AABC");
testHash("BCAA");
testHash("ZZZZZZZ");
}
output:
0
1
60535
60535
10460353202
godbolt
Explanation:
Like @Retro_Dev mentioned, we can encode the index as a base-27 number:
A A B Z 0 0 0
=
[1] [1] [2] [26] [0] [0] [0]
Each one of the number in brackets is a single digit, in a base-27 number. The number is in little-endian for convenience. To convert to decimal:
[1] [1] [2] [26] [0] [0] [0]
=
1 * 27^0 + 1 * 27^1 + 2 * 27^2 + 26 * 27^3 + 0 * 27^4 + 0 * 27^5 + 0 * 27^6
Since order doesn’t matter, we can order the word lexicographically. Empty space counts as 0.
Note that:
A A A 0 0 0 0
=
1 * 27^0 + 1 * 27^1 + 1 * 27^2
Which is the sum of a geometric progression, with first element 1 * 27^0
, ratio 27
and number of elements 3
.
1 * 27^0 + 1 * 27^1 + 1 * 27^2
=
1 * 27^0 *(27^3 - 1)/(27 - 1)
In the implementation, we don’t actually waste time ordering, we just count the ocurrence of each digit.