C/C++ macro challenge #3: omitting an argument

chung-leong · December 5, 2023, 5:24am

For this week’s challenge, we’ll be looking at macros from a couple decades ago. They’re from the source code of PHP 4, more specific its thread-safe resource management module.

When PHP was compiled to operate in a multi-thread environment, the macros are defined as follows:

#define TSRMLS_D	void ***tsrm_ls
#define TSRMLS_DC	, TSRMLS_D
#define TSRMLS_C	tsrm_ls
#define TSRMLS_CC	, TSRMLS_C

The “D” macros are used in function declaration, while the “C” macros are used in function calls.

When PHP was compiled for single-thread operation, the macros resolve to basically nothing:

#define TSRMLS_D	void
#define TSRMLS_DC
#define TSRMLS_C
#define TSRMLS_CC

tsrm_ls points to the thread’s local storage. It’s passed from function to function so that when some of them need to access this or that global structure, they wouldn’t need to query the operation system and thereby incurring a performance hit.

The following is a macro used to access the interpreter’s core settings:

#ifdef ZTS
# define PG(v) TSRMG(core_globals_id, php_core_globals *, v)
#else
# define PG(v) (core_globals.v)
#endif

As you can see, when thread-safety is off, it’s simply a reference to a field of a global struct. The address is known at comptime. Accessing the field requires zero indirection.

When thread-safety is on, the followng macros are used:

#define TSRM_UNSHUFFLE_RSRC_ID(rsrc_id)		((rsrc_id)-1)
/* ... */
#define TSRMG(id, type, element)	(((type) (*((void ***) tsrm_ls))[TSRM_UNSHUFFLE_RSRC_ID(id)])->element)

If I’m reading the code right, there’re two indirections.

How would we deal with the same scenario in Zig? What strategy should be employed to maximize performance for both configurations?

chung-leong · December 7, 2023, 11:20am

Think we need some answers from Godbolt:

const std = @import("std");

export fn main() void {
    control(1234);
    subject({});
}

fn control(arg: usize) void {
    std.debug.print("{any}\n", .{arg});
}

fn subject(arg: void) void {
    std.debug.print("{any}\n", .{arg});
}

main:
        push    rbp
        mov     rbp, rsp
        mov     edi, 1234
        call    example.control
        call    example.subject
        pop     rbp
        ret

example.control:
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        mov     qword ptr [rbp - 8], rdi
        lea     rdi, [rbp - 8]
        call    debug.print__anon_306
        add     rsp, 16
        pop     rbp
        ret

example.subject:
        push    rbp
        mov     rbp, rsp
        call    debug.print__anon_307
        pop     rbp
        ret

const std = @import("std");

const Burger = struct {
    cheese: u32 = 1,
    patties: u32 = 2,
};

var burger1: Burger = .{};

fn getPointer(comptime T: type) *T {
    const ns = struct {
        var object: T = .{};
    };
    return &ns.object;
}

export fn main() void {
    std.debug.print("{d}\n", .{control()});
    std.debug.print("{d}\n", .{subject()});
}

fn control() u32 {
    return burger1.patties;
}

fn subject() u32 {
    const ptr = comptime getPointer(Burger);
    return ptr.patties;
}

main:
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        call    example.control
        mov     dword ptr [rbp - 16], eax
        lea     rdi, [rbp - 16]
        call    debug.print__anon_309
        call    example.subject
        mov     dword ptr [rbp - 8], eax
        lea     rdi, [rbp - 8]
        call    debug.print__anon_310
        add     rsp, 16
        pop     rbp
        ret

example.control:
        push    rbp
        mov     rbp, rsp
        mov     eax, dword ptr [example.burger1+4]
        pop     rbp
        ret

example.subject:
        push    rbp
        mov     rbp, rsp
        mov     eax, dword ptr [example.getPointer.ns.object+4]
        pop     rbp
        ret

const std = @import("std");

var id1: u32 = undefined;
var id2: void = undefined;

export fn main() void {
    id1 = control();
    id2 = subject();
}

fn control() u32 {
    return 1234;
}

fn subject() void {
    return void{};
}

<source>:8:5: error: cannot assign to constant
    id2 = subject();
    ^~~
Compiler returned: 1

chung-leong · December 10, 2023, 12:41am

The most obvious way to deal with the problem at hand is to take advantage of the fact that Zig supports 0-bit types. We can simply make the local storage argument empty when compiling for non thread-safe operation:

// nts.zig
const std = @import("std");

pub const LocalStorage = struct {
    pub const Position = struct {};

    pub fn getPointer(self: @This(), comptime T: type, pos: Position) *T {
        _ = self;
        _ = pos;
        const ns = struct {
            var object: T = undefined;
        };
        return &ns.object;
    }

    pub fn reserve(comptime T: type) Position {
        _ = T;
        return .{};
    }

    pub fn create(allocator: std.mem.Allocator) !@This() {
        _ = allocator;
        return .{};
    }
};

For thread-safe operation, reserve() would reserve space for the struct while getPointer() would calculate the address based on fields in the structs:

const std = @import("std");

pub const LocalStorage = struct {
    bytes: []u8,

    pub const Position = struct {
        offset: usize,
    };

    var size: usize = 0;

    pub fn getPointer(self: @This(), comptime T: type, pos: Position) *T {
        const start = pos.offset;
        const end = start + @sizeOf(T);
        return @ptrCast(@alignCast(self.bytes[start..end].ptr));
    }

    pub fn reserve(comptime T: type) Position {
        const offset = std.mem.alignForward(usize, size, @alignOf(T));
        size = offset + @sizeOf(T);
        return .{ .offset = offset };
    }

    pub fn create(allocator: std.mem.Allocator) !@This() {
        const bytes = try allocator.alloc(u8, size);
        return .{ .bytes = bytes };
    }
};

And here’s how you’d employ the code:

const std = @import("std");
const ts = false;
const tsrm = if (ts) @import("./ts.zig") else @import("./nts.zig");
const LocalStorage = tsrm.LocalStorage;

const Globals1 = struct {
    number1: u32,
    number2: u32,

    var pos: LocalStorage.Position = undefined;
};

const Globals2 = struct {
    number1: u32,
    number2: u32,
    number3: u32,

    var pos: LocalStorage.Position = undefined;
};

fn set1(ls: LocalStorage) void {
    const g1 = ls.getPointer(Globals1, Globals1.pos);
    g1.number1 = 123;
    g1.number2 = 456;
}

fn show1(ls: LocalStorage) void {
    const g1 = ls.getPointer(Globals1, Globals1.pos);
    std.debug.print("{d} {d}\n", .{ g1.number1, g1.number2 });
}

fn set2(ls: LocalStorage) void {
    const g2 = ls.getPointer(Globals2, Globals1.pos);
    g2.number1 = 123;
    g2.number2 = 456;
    g2.number3 = 789;
}

fn show2(ls: LocalStorage) void {
    const g2 = ls.getPointer(Globals2, Globals1.pos);
    std.debug.print("{d} {d} {d}\n", .{ g2.number1, g2.number2, g2.number3 });
}

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    var allocator = gpa.allocator();
    Globals1.pos = LocalStorage.reserve(Globals1);
    Globals2.pos = LocalStorage.reserve(Globals2);
    const ls = try LocalStorage.create(allocator);
    set1(ls);
    set2(ls);
    show1(ls);
    show2(ls);
}

The only wrinkle in this scheme is that functions accepting 0-bit types cannot be exported.