Is it possible to compile a function to two targets in one compilation unit? This would make it possible to detect which CPU features are allowed at runtime, and, depending on the result, use faster instructions.
When I use seperate modules for each target I get this error:
src/file1.zig:1:1: error: file exists in modules 'noavx2' and 'avx2'
src/file1.zig:1:1: note: files must belong to only one module
src/file1.zig:1:1: note: file is the root of module 'noavx2'
src/file1.zig:1:1: note: file is the root of module 'avx2'
But it seems to work if I copy the file.
build.zig:
const std = @import("std");
pub fn build(b: *std.Build) void {
const optimize = b.standardOptimizeOption(.{});
const noavx2_target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64, .os_tag = .linux, .abi = .musl, .cpu_model = .baseline });
const avx2_target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64, .os_tag = .linux, .abi = .musl, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 } });
const mod_noavx2 = b.addModule("noavx2", .{
.root_source_file = b.path("src/file0.zig"),
.target = noavx2_target,
.optimize = optimize,
});
const mod_avx2 = b.addModule("avx2", .{
.root_source_file = b.path("src/file0_copy.zig"),
.target = avx2_target,
.optimize = optimize,
});
const mod = b.addModule("lib", .{
.root_source_file = b.path("src/file1.zig"),
.target = noavx2_target,
.optimize = optimize,
.imports = &.{
.{
.name = "noavx2",
.module = mod_noavx2,
},
.{
.name = "avx2",
.module = mod_avx2,
},
},
});
const lib = b.addLibrary(.{
.name = "lib",
.root_module = mod,
});
b.installArtifact(lib);
}
src/file0.zig, src/file0_copy.zig:
const std = @import("std");
pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
}
src/file1.zig:
const noavx2 = @import("noavx2");
const avx2 = @import("avx2");
export fn addArrays(a: *[4]u64, b: *const [4]u64, supports_avx2: bool) void {
if (supports_avx2) {
avx2.addArrays(a, b);
} else {
noavx2.addArrays(a, b);
}
}
$ zig build -Doptimize=ReleaseFast
$ objdump -S zig-out/lib/liblib.a --visualize-jumps
In archive zig-out/lib/liblib.a:
.zig-cache/o/3d6aae1c966852a2ef0dd7d54fe12a06/liblib_zcu.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <addArrays>:
const noavx2 = @import("noavx2");
const avx2 = @import("avx2");
export fn addArrays(a: *[4]u64, b: *const [4]u64, supports_avx2: bool) void {
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
if (supports_avx2) {
4: 85 d2 test %edx,%edx
6: ,-- 74 03 je b <addArrays+0xb>
avx2.addArrays(a, b);
8: | 5d pop %rbp
9: | eb 25 jmp 30 <file0_copy.addArrays>
} else {
noavx2.addArrays(a, b);
b: '-> f3 0f 6f 06 movdqu (%rsi),%xmm0
f: f3 0f 6f 4e 10 movdqu 0x10(%rsi),%xmm1
const std = @import("std");
pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
14: f3 0f 6f 17 movdqu (%rdi),%xmm2
18: 66 0f d4 d0 paddq %xmm0,%xmm2
1c: f3 0f 6f 47 10 movdqu 0x10(%rdi),%xmm0
21: 66 0f d4 c1 paddq %xmm1,%xmm0
25: f3 0f 7f 17 movdqu %xmm2,(%rdi)
29: f3 0f 7f 47 10 movdqu %xmm0,0x10(%rdi)
2e: 5d pop %rbp
2f: c3 ret
0000000000000030 <file0_copy.addArrays>:
const std = @import("std");
pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
30: 55 push %rbp
31: 48 89 e5 mov %rsp,%rbp
a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
34: c5 fe 6f 06 vmovdqu (%rsi),%ymm0
38: c5 fd d4 07 vpaddq (%rdi),%ymm0,%ymm0
3c: c5 fe 7f 07 vmovdqu %ymm0,(%rdi)
40: 5d pop %rbp
41: c5 f8 77 vzeroupper
44: c3 ret
Is there any other option than copying the file?