Are vectors flipped on big-endian machines?

Validark · October 31, 2023, 4:22pm

I have some vectors I want to work on both little-endian and big-endian machines. My question is whether vectors read in bytes in the opposite order on big-endian machines.

On a little-endian machine, when I read 64 bytes into a vector and movmask it into a 64-bit bitstring, the least significant bits correspond to the initial bytes at the source memory address.

export fn beep(source: [*]align(64) u8) u64 {
    const vec_t = @Vector(64, u8);
    return @as(u64, @bitCast(@as(vec_t, source[0..64].*) == @as(vec_t, @splat(' '))));
}

If I do @ctz(beep(source)), it will tell me how many spaces there are in a row, starting at source[0]. On a big-endian machine, would I use @clz instead?

Here’s another one. Let’s say I want to combine these bitstrings myself:

export fn beep2(source: [*]align(64) u8) u64 {
    const vec_t = @Vector(16, u8);
    return
          @as(u64, @as(u16, @bitCast(@as(vec_t, source[ 0..16].*) == @as(vec_t, @splat(' '))))) <<  0
        | @as(u64, @as(u16, @bitCast(@as(vec_t, source[16..32].*) == @as(vec_t, @splat(' '))))) << 16
        | @as(u64, @as(u16, @bitCast(@as(vec_t, source[32..48].*) == @as(vec_t, @splat(' '))))) << 32
        | @as(u64, @as(u16, @bitCast(@as(vec_t, source[48..64].*) == @as(vec_t, @splat(' '))))) << 48;
}

export fn beep3(source: [*]align(64) u8) u64 {
    const vec_t = @Vector(16, u8);
    return
          @as(u64, @as(u16, @bitCast(@as(vec_t, source[ 0..16].*) == @as(vec_t, @splat(' '))))) << 48
        | @as(u64, @as(u16, @bitCast(@as(vec_t, source[16..32].*) == @as(vec_t, @splat(' '))))) << 32
        | @as(u64, @as(u16, @bitCast(@as(vec_t, source[32..48].*) == @as(vec_t, @splat(' '))))) << 16
        | @as(u64, @as(u16, @bitCast(@as(vec_t, source[48..64].*) == @as(vec_t, @splat(' '))))) <<  0;
}

On a little-endian machine, beep2 is equivalent to beep. Is beep3 equivalent on a big-endian machine?

Thank you.

Validark · November 3, 2023, 12:24pm

I think I figured this out now. We can compile code like this for thumb (little endian) vs thumbeb (big-endian) for the cortex m23, a device that famously did not have vectors.

export fn beep(source: [*]align(2) u8) u8 {
    const vec_t = @Vector(2, u8);
    return @as(u2, @bitCast(@as(vec_t, source[0..2].*) == @as(vec_t, @splat(' '))));
}

Here is the godbolt link.

Here is the emit for little-endian:

beep:
        ldrb    r1, [r0]
        subs    r1, #32
        rsbs    r2, r1, #0
        adcs    r2, r1
        ldrb    r0, [r0, #1]
        subs    r0, #32
        rsbs    r1, r0, #0
        adcs    r1, r0
        lsls    r0, r1, #1
        adds    r0, r2, r0
        bx      lr

And for big-endian:

beep:
        ldrb    r1, [r0, #1]
        subs    r1, #32
        rsbs    r2, r1, #0
        adcs    r2, r1
        ldrb    r0, [r0]
        subs    r0, #32
        rsbs    r1, r0, #0
        adcs    r1, r0
        lsls    r0, r1, #1
        adds    r0, r2, r0
        bx      lr

It appears that the only difference is that the little endian code reads source[0] first, whereas the big endian code reads source[1] first. So yes, it’s flipped.