I actually use @popCount’ a lot, it’s great when you are working with a bitfield:
Example 1, counting the number of valid entries. Let’s say I have a bitfield where each number has 1’s where an entry is valid or not.
fn countValidEntries(T: type, bitfield: [ ]const T) usize {
var ans: usize = 0;
for (bitfield) |tmp| {
ans += @popCount(tmp);
}
return ans;
}
Also works with vectors.
Other use case, iterating over 1 bits on a bitmask, you can do that with a while loop, but if you popcount you can get a more predictable branch!
fn method1(T: type, mask: T) void {
var copy = mask;
while (copy != 0) {
const bit = @ctz(copy);
copy &= copy - 1;
foo(bit);
}
}
fn method2(T: type, mask: T) void {
var copy = mask;
for (0..@popCount(mask)) |_| {
const bit = @ctz(copy);
copy &= copy - 1;
foo(bit);
}
}
// The previous code was bugged I Just noticed...
// It's fixed now
Actually, here’s a snippet from an actual project:
fn bitMaskFilterPass(bitField: []@Vector(4, usize), bitmasks: []const u32, firstBitFieldIndex: usize, needleMask: u32,
atomicInterrupt: *const std.atomic.Value(bool),
stateAddr: *CachedStateCompletion,
comptime settings: struct {
optimization: union (enum) {
SIMDonly: enum {
IgnorePreviousZeroes,
RespectPreviousZeroes,
},
Dynamic: void,
SparseCachedOnly: void,
},
}
) void {
// std.debug.print("needleMask: {b}\nOther masks:\n", .{needleMask});
// for (bitmasks, 0..) |bm, i| {
// std.debug.print("{}: {b}\n", .{i, bm});
// }
// TODO find the best way to do this constant and switching thingy. You could even take comptime information.
const oneBitVsManyCutoff = 20;
var currentOffset: usize = firstBitFieldIndex * bitsPerVector;
defer stateAddr.* = .{ .bitMaskPass = @intCast(currentOffset / bitsPerVector) };
for (bitField[firstBitFieldIndex..]) |*currentVectorPtr| {
if (atomicInterrupt.load(.monotonic)) return; // Early return if we get interrupted
var vec = currentVectorPtr.*;
if (@reduce(.Or, vec) != 0 or (settings.optimization == .SIMDonly and settings.optimization.SIMDonly == .IgnorePreviousZeroes)) {
for (0..4) |i| {
// TODO - could potentially use a pointer instead of totalOffset and save some CPU cycles per loop.
const totalOffset = currentOffset + i * @typeInfo(usize).int.bits;
vec[i] = switch (settings.optimization) {
.SIMDonly => |SIMDsettings| blk: {
const result = SIMDfullFilter(@ptrCast(@alignCast(&bitmasks[totalOffset])), needleMask);
break :blk switch (SIMDsettings) {
.IgnorePreviousZeroes => result, // This allows us to pass in undefined memory for vec[i]
.RespectPreviousZeroes => vec[i] & result,
};
},
.Dynamic => switch (@popCount(vec[i])) {
0 => 0, // Do nothing if everything is zero
1...oneBitVsManyCutoff => sparseCachedFilter(vec[i], @ptrCast(&bitmasks[totalOffset]), needleMask),
// & because it's possible current vector has discarded entries for different reasons than bitmask, so we want to keep them discarded
else => vec[i] & SIMDfullFilter(@ptrCast(@alignCast(&bitmasks[totalOffset])), needleMask), //If we are above that, SIMD math
},
.SparseCachedOnly => sparseCachedFilter(vec[i], @ptrCast(&bitmasks[totalOffset]), needleMask),
};
}
currentVectorPtr.* = vec;
}
currentOffset += bitsPerVector;
}
}