Find the nth set bit

If you want to find the nth set bit on a much larger scale, I describe how to do that in this video: (the first portion of the video is counting bits prior to the nth bit, the second part is finding the nth set bit and counting the 0’s below it and immediately above)

2 Likes

Was away for a while; hope I’m not too late to the party. I had the same need a while back for implementing “select” (basically finding the (n-1)th set bit). I ended up with several versions of the functions. I included the fastest versions below. The fastest is using PDEP when it’s available. For non-PDEP systems, divide-and-conquer approach is the fastest, which works on the bits in batches.

/// Finds the bit index of the (n-1)th set bit in a 64-bit word.
inline fn selectInWordSafe(comptime pdep: bool, word: u64, n: usize) ?usize {
    return if (n < @popCount(word)) selectInWord(pdep, word, n) else null;
}

/// Finds the bit index of the (n-1)th set bit in a 64-bit word.
/// Return 64 if not found.
inline fn selectInWord(comptime pdep: bool, word: u64, n: usize) usize {
    return if (pdep)
        selectInWordPDEP(word, n)
    else
        selectInWordDivConquer(word, n);
}

/// Finds the bit index of the (n-1)th 1-bit in a 64-bit word. PDEP variant.
/// Return 64 if not found.
pub inline fn selectInWordPDEP(word: u64, n: usize) usize {
    // TODO: Use built-in Zip operation when Zig adds support for PDEP.
    const mask: u64 = @as(u64, 1) << @intCast(n - 1);
    const scattered: u64 = asm volatile (
        "pdep %[word], %[mask], %[ret]"
        : [ret] "=r" (-> u64),
        : [word] "r" (word), [mask] "r" (mask)
    );
    return @ctz(scattered);                 // trailing zeros gives the bit index
}

/// Finds the bit index of the (n-1)th 1-bit in a 64-bit word. Divide-and-conquer.
inline fn selectInWordDivConquer(word: u64, n: usize) usize {
    var bits            = word;
    var target          = n;
    var bit_idx: usize  = 0;

    // Split 64 bits into 32-bit halves. Count bits in the lower half in one shot.
    const lower_bit32_count = @popCount(bits & 0xFFFFFFFF);
    if (lower_bit32_count < target) {
        target -= lower_bit32_count;    // discount the lower bits.
        bits >>= 32;                    // remove the lower bits.
        bit_idx += 32;
    }
    // Split the lower 32-bit part into 16-bit halves. Count bits in the lower half.
    const lower_bit16_count = @popCount(bits & 0xFFFF);
    if (lower_bit16_count < target) {
        target -= lower_bit16_count;    // discount the lower bits.
        bits >>= 16;                    // remove the lower bits.
        bit_idx += 16;
    }
    // Further dividing 16-bit to 8-bit or dividing 8-bit to 4-bit doesn't speed things up.

    // The target bit is in the lowest 16 bits. Use loop and clear to process them.
    var remaining   = bits & 0xFFFF;
    var i: usize    = 1;
    while (i < target) : (i += 1) {
        remaining &= (remaining - 1);       // Clear the lowest set bit
    }
    return bit_idx + @ctz(remaining);       // trailing zeros gives the bit index
}
1 Like