Compile a function to two targets

Is it possible to compile a function to two targets in one compilation unit? This would make it possible to detect which CPU features are allowed at runtime, and, depending on the result, use faster instructions.

When I use seperate modules for each target I get this error:

src/file1.zig:1:1: error: file exists in modules 'noavx2' and 'avx2'
src/file1.zig:1:1: note: files must belong to only one module
src/file1.zig:1:1: note: file is the root of module 'noavx2'
src/file1.zig:1:1: note: file is the root of module 'avx2'
But it seems to work if I copy the file.

build.zig:

const std = @import("std");

pub fn build(b: *std.Build) void {
    const optimize = b.standardOptimizeOption(.{});

    const noavx2_target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64, .os_tag = .linux, .abi = .musl, .cpu_model = .baseline });
    const avx2_target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64, .os_tag = .linux, .abi = .musl, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 } });

    const mod_noavx2 = b.addModule("noavx2", .{
        .root_source_file = b.path("src/file0.zig"),
        .target = noavx2_target,
        .optimize = optimize,
    });
    const mod_avx2 = b.addModule("avx2", .{
        .root_source_file = b.path("src/file0_copy.zig"),
        .target = avx2_target,
        .optimize = optimize,
    });

    const mod = b.addModule("lib", .{
        .root_source_file = b.path("src/file1.zig"),
        .target = noavx2_target,
        .optimize = optimize,
        .imports = &.{
            .{
                .name = "noavx2",
                .module = mod_noavx2,
            },
            .{
                .name = "avx2",
                .module = mod_avx2,
            },
        },
    });

    const lib = b.addLibrary(.{
        .name = "lib",
        .root_module = mod,
    });

    b.installArtifact(lib);
}

src/file0.zig, src/file0_copy.zig:

const std = @import("std");

pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
    a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
}

src/file1.zig:

const noavx2 = @import("noavx2");
const avx2 = @import("avx2");

export fn addArrays(a: *[4]u64, b: *const [4]u64, supports_avx2: bool) void {
    if (supports_avx2) {
        avx2.addArrays(a, b);
    } else {
        noavx2.addArrays(a, b);
    }
}
$ zig build -Doptimize=ReleaseFast
$ objdump -S zig-out/lib/liblib.a --visualize-jumps
In archive zig-out/lib/liblib.a:

.zig-cache/o/3d6aae1c966852a2ef0dd7d54fe12a06/liblib_zcu.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <addArrays>:
const noavx2 = @import("noavx2");
const avx2 = @import("avx2");

export fn addArrays(a: *[4]u64, b: *const [4]u64, supports_avx2: bool) void {
   0:       55                          push   %rbp
   1:       48 89 e5                    mov    %rsp,%rbp
    if (supports_avx2) {
   4:       85 d2                       test   %edx,%edx
   6:   ,-- 74 03                       je     b <addArrays+0xb>
        avx2.addArrays(a, b);
   8:   |   5d                          pop    %rbp
   9:   |   eb 25                       jmp    30 <file0_copy.addArrays>
    } else {
        noavx2.addArrays(a, b);
   b:   '-> f3 0f 6f 06                 movdqu (%rsi),%xmm0
   f:       f3 0f 6f 4e 10              movdqu 0x10(%rsi),%xmm1
const std = @import("std");

pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
    a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
  14:       f3 0f 6f 17                 movdqu (%rdi),%xmm2
  18:       66 0f d4 d0                 paddq  %xmm0,%xmm2
  1c:       f3 0f 6f 47 10              movdqu 0x10(%rdi),%xmm0
  21:       66 0f d4 c1                 paddq  %xmm1,%xmm0
  25:       f3 0f 7f 17                 movdqu %xmm2,(%rdi)
  29:       f3 0f 7f 47 10              movdqu %xmm0,0x10(%rdi)
  2e:       5d                          pop    %rbp
  2f:       c3                          ret

0000000000000030 <file0_copy.addArrays>:
const std = @import("std");

pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
  30:   55                      push   %rbp
  31:   48 89 e5                mov    %rsp,%rbp
    a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
  34:   c5 fe 6f 06             vmovdqu (%rsi),%ymm0
  38:   c5 fd d4 07             vpaddq (%rdi),%ymm0,%ymm0
  3c:   c5 fe 7f 07             vmovdqu %ymm0,(%rdi)
  40:   5d                      pop    %rbp
  41:   c5 f8 77                vzeroupper
  44:   c3                      ret

Is there any other option than copying the file?

Relevant issue:

4 Likes