With function signature fixed:
const std = @import("std");
export fn improved_memset(dest: ?[*]u8, c: u8, len: usize) callconv(.c) ?[*]u8 {
@setRuntimeSafety(false);
const n = std.simd.suggestVectorLength(u8) orelse @sizeOf(usize);
var i: usize = 0;
while (i + n <= len) : (i += n) {
const p: *align(1) @Vector(n, u8) = @ptrCast(dest.?[i..]);
p.* = @splat(c);
}
while (i < len) : (i += 1) {
dest.?[i] = c;
}
return dest;
}
0000000000000000 <improved_memset>:
0: 55 push rbp
1: 48 89 e5 mov rbp,rsp
4: 53 push rbx
5: 50 push rax
6: 48 89 fb mov rbx,rdi
9: 48 83 fa 40 cmp rdx,0x40
d: 73 04 jae 13 <improved_memset+0x13>
f: 31 ff xor edi,edi
11: eb 24 jmp 37 <improved_memset+0x37>
13: 62 f2 7d 48 7a c6 vpbroadcastb zmm0,esi
19: 31 c0 xor eax,eax
1b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
20: 62 f1 fe 48 7f 04 03 vmovdqu64 ZMMWORD PTR [rbx+rax*1],zmm0
27: 48 8d 78 40 lea rdi,[rax+0x40]
2b: 48 83 e8 80 sub rax,0xffffffffffffff80
2f: 48 39 d0 cmp rax,rdx
32: 48 89 f8 mov rax,rdi
35: 76 e9 jbe 20 <improved_memset+0x20>
37: 48 29 fa sub rdx,rdi
3a: 76 0f jbe 4b <improved_memset+0x4b>
3c: 48 01 df add rdi,rbx
3f: 40 0f b6 f6 movzx esi,sil
43: c5 f8 77 vzeroupper
46: e8 00 00 00 00 call 4b <improved_memset+0x4b>
4b: 48 89 d8 mov rax,rbx
4e: 48 83 c4 08 add rsp,0x8
52: 5b pop rbx
53: 5d pop rbp
54: c5 f8 77 vzeroupper
57: c3 ret
With the illegal @ptrCast fixed:
const std = @import("std");
export fn improved_memset(dest: ?[*]u8, c: u8, len: usize) callconv(.c) ?[*]u8 {
@setRuntimeSafety(false);
const n = std.simd.suggestVectorLength(u8) orelse @sizeOf(usize);
const splatted: @Vector(n, u8) = @splat(c);
var i: usize = 0;
while (i + n <= len) : (i += n) {
dest.?[i..][0..n].* = splatted;
}
while (i < len) : (i += 1) {
dest.?[i] = c;
}
return dest;
}
(same machine code, except it won’t miscompile under various conditions, and it won’t become a compile error when we make language changes to vectors)