Compile a function to two targets

rpkak · August 14, 2025, 11:27am

Is it possible to compile a function to two targets in one compilation unit? This would make it possible to detect which CPU features are allowed at runtime, and, depending on the result, use faster instructions.

When I use seperate modules for each target I get this error:

src/file1.zig:1:1: error: file exists in modules 'noavx2' and 'avx2'
src/file1.zig:1:1: note: files must belong to only one module
src/file1.zig:1:1: note: file is the root of module 'noavx2'
src/file1.zig:1:1: note: file is the root of module 'avx2'

But it seems to work if I copy the file.

build.zig:

const std = @import("std");

pub fn build(b: *std.Build) void {
    const optimize = b.standardOptimizeOption(.{});

    const noavx2_target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64, .os_tag = .linux, .abi = .musl, .cpu_model = .baseline });
    const avx2_target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64, .os_tag = .linux, .abi = .musl, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 } });

    const mod_noavx2 = b.addModule("noavx2", .{
        .root_source_file = b.path("src/file0.zig"),
        .target = noavx2_target,
        .optimize = optimize,
    });
    const mod_avx2 = b.addModule("avx2", .{
        .root_source_file = b.path("src/file0_copy.zig"),
        .target = avx2_target,
        .optimize = optimize,
    });

    const mod = b.addModule("lib", .{
        .root_source_file = b.path("src/file1.zig"),
        .target = noavx2_target,
        .optimize = optimize,
        .imports = &.{
            .{
                .name = "noavx2",
                .module = mod_noavx2,
            },
            .{
                .name = "avx2",
                .module = mod_avx2,
            },
        },
    });

    const lib = b.addLibrary(.{
        .name = "lib",
        .root_module = mod,
    });

    b.installArtifact(lib);
}

src/file0.zig, src/file0_copy.zig:

const std = @import("std");

pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
    a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
}

src/file1.zig:

const noavx2 = @import("noavx2");
const avx2 = @import("avx2");

export fn addArrays(a: *[4]u64, b: *const [4]u64, supports_avx2: bool) void {
    if (supports_avx2) {
        avx2.addArrays(a, b);
    } else {
        noavx2.addArrays(a, b);
    }
}

$ zig build -Doptimize=ReleaseFast
$ objdump -S zig-out/lib/liblib.a --visualize-jumps
In archive zig-out/lib/liblib.a:

.zig-cache/o/3d6aae1c966852a2ef0dd7d54fe12a06/liblib_zcu.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <addArrays>:
const noavx2 = @import("noavx2");
const avx2 = @import("avx2");

export fn addArrays(a: *[4]u64, b: *const [4]u64, supports_avx2: bool) void {
   0:       55                          push   %rbp
   1:       48 89 e5                    mov    %rsp,%rbp
    if (supports_avx2) {
   4:       85 d2                       test   %edx,%edx
   6:   ,-- 74 03                       je     b <addArrays+0xb>
        avx2.addArrays(a, b);
   8:   |   5d                          pop    %rbp
   9:   |   eb 25                       jmp    30 <file0_copy.addArrays>
    } else {
        noavx2.addArrays(a, b);
   b:   '-> f3 0f 6f 06                 movdqu (%rsi),%xmm0
   f:       f3 0f 6f 4e 10              movdqu 0x10(%rsi),%xmm1
const std = @import("std");

pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
    a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
  14:       f3 0f 6f 17                 movdqu (%rdi),%xmm2
  18:       66 0f d4 d0                 paddq  %xmm0,%xmm2
  1c:       f3 0f 6f 47 10              movdqu 0x10(%rdi),%xmm0
  21:       66 0f d4 c1                 paddq  %xmm1,%xmm0
  25:       f3 0f 7f 17                 movdqu %xmm2,(%rdi)
  29:       f3 0f 7f 47 10              movdqu %xmm0,0x10(%rdi)
  2e:       5d                          pop    %rbp
  2f:       c3                          ret

0000000000000030 <file0_copy.addArrays>:
const std = @import("std");

pub fn addArrays(a: *[4]u64, b: *const [4]u64) void {
  30:   55                      push   %rbp
  31:   48 89 e5                mov    %rsp,%rbp
    a.* = @as(@Vector(4, u64), a.*) + @as(@Vector(4, u64), b.*);
  34:   c5 fe 6f 06             vmovdqu (%rsi),%ymm0
  38:   c5 fd d4 07             vpaddq (%rdi),%ymm0,%ymm0
  3c:   c5 fe 7f 07             vmovdqu %ymm0,(%rdi)
  40:   5d                      pop    %rbp
  41:   c5 f8 77                vzeroupper
  44:   c3                      ret

Is there any other option than copying the file?

matklad · August 14, 2025, 11:50am

Relevant issue:

github.com/ziglang/zig

Proposal: Function multi-versioning

opened 06:20PM - 17 May 18 UTC

bheads

proposal accepted

A really interesting concept is function multi-versioning. The general idea is t…o support implementing multiple versions of a function for different hardware and having the correct version of the function selected at run time. Made up sample code: ```C pub fn someMathFunction(vec: Vector) Vector [target: sse4.2] { // optimized for SSE 4.2 } pub fn someMathFunction(vec: Vector) Vector [target: avx2] { // optimized for avx2 } pub fn someMathFunction(vec: Vector) Vector [target: default] { // no asm/intrinsics optimization } // later on const v = giveMeAVect(); const v2 = someMathFunction(v); // calls the best version based on run time selection ``` There are ways to simulate this using function pointers, but the compiler would be better at optimizing this, plus implementing that over and over by hand would suck. LLVM https://llvm.org/docs/LangRef.html#ifuncs GCC https://lwn.net/Articles/691932/