Cryptography performance of Zig vs Go

I am busy building a SigV4 package (for s3 authentication). When benchmarking the signing process with -Doptimize=ReleaseFast it takes 3575ns, which seems fast enough. However, i made the exact same package in Go about 2 months ago and managed to do this closer to 1000ns. I am running on a M2 Pro so not sure if there are some ARM specific optimizations that Golang does that Zig does not do by default. Worth pointing out that I am still in my first month of writing zig so if there are any more optimizations below please let me know.

```

const std = @import("std");
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const HmacSha256 = std.crypto.auth.hmac.sha2.HmacSha256;
const assert = std.debug.assert;
const fmt = std.fmt;
const testing = std.testing;
const Bench = @import("bench").Bench;
const Mutex = std.Io.Mutex;
const Io = std.Io;

test {
    var signer: Signer = .init(testing.io, .{
        .algorithm = "AWS4-HMAC-SHA256",
        .key = "AWS4wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
        .region = "us-east-1",
        .service = "s3",
        .request_type = "aws4_request",
    });
    const timestamp_string = "20130524T000000Z".*;
    const canonical_request_hash = "7344ae5b7ee6c3e7e6b0fe0640412a37625d1fbfff95c48bbb2dc43964946972".*;
    const signature_bytes = try signer.sign(testing.allocator, timestamp_string, canonical_request_hash);
    try testing.expectEqualStrings("f0e8bdb87c964420e857bd35b5d6ed310bd44f0170aba48dd91039c6036bdb41", &signature_bytes);

    // benchmarks
    var bench: Bench = try .init(".benchmarks/Signer", testing.io, std.heap.smp_allocator);
    const benchmark = try bench.run("sign", sign, .{ &signer, bench.allocator(), timestamp_string, canonical_request_hash });
    try testing.expectEqual(0, benchmark.allocs_per_call);
}

// Refer to following doc for best overview of how signature is created.
// https://docs.aws.amazon.com/images/AmazonS3/latest/API/images/sigV4-auth-header-chunked-seed-signature.png
const Signer = @This();

config: Config,
/// Set by init
last_date: [8]u8 = undefined,
last_date_signing_key: [32]u8 = undefined,
mutex: Mutex = .init,
io: Io,

const Config = struct {
    // Mostly AWS4-HMAC-SHA256 or GOOG4-HMAC-SHA256
    algorithm: []const u8,
    /// Format: AWS4{SecretAccessKey}
    key: []const u8,
    /// E.g. "auto" or "us-east-1"
    region: []const u8,
    /// E.g. "s3" or "kms"
    service: []const u8,
    /// E.g. "aws4_request" or "goog4_request"
    request_type: []const u8,
};

pub inline fn init(io: Io, config: Config) Signer {
    // asserts
    assert(config.key.len > 4);
    assert(std.mem.eql(u8, config.key[0..4], "AWS4"));

    // return signer
    return .{
        .io = io,
        .config = config,
    };
}

pub fn resetSigningKey(self: *Signer, date_string: [8]u8) !void {
    try self.mutex.lock(self.io);
    defer self.mutex.unlock(self.io);
    if (std.mem.eql(u8, &self.last_date, &date_string)) return;
    const signing_key = @constCast(&self.last_date_signing_key);
    HmacSha256.create(signing_key, &date_string, self.config.key);
    HmacSha256.create(signing_key, self.config.region, &self.last_date_signing_key);
    HmacSha256.create(signing_key, self.config.service, &self.last_date_signing_key);
    HmacSha256.create(signing_key, self.config.request_type, &self.last_date_signing_key);
    @memcpy(@constCast(&self.last_date), &date_string);
}

// DateKey              = HMAC-SHA256 ("AWS4" + "<SecretAccessKey>", "<yyyymmdd>")
// DateRegionKey        = HMAC-SHA256(DateKey, "<aws-region>")
// DateRegionServiceKey = HMAC-SHA256(DateRegionKey, "<aws-service>")
// SigningKey           = HMAC-SHA256(DateRegionServiceKey, "aws4_request")
//
// Signature = hex(HMAC-SHA256(SigningKey, StringToSign))
pub fn sign(
    self: *Signer,
    allocator: Allocator,
    /// UTC time in ISO 8601 Basic Format
    /// E.g. 20260501T120524Z
    timestamp_string: [16]u8,
    /// Sha256 hash of canonical request
    canonical_request_hash: [64]u8,
) ![64]u8 {
    _ = allocator;
    // const string_to_sign = try self.stringToSign(allocator, timestamp_string, canonical_request_hash);
    // defer allocator.free(string_to_sign);
    const date_string: [8]u8 = timestamp_string[0..8].*;
    if (!std.mem.eql(u8, &self.last_date, &date_string)) {
        try self.resetSigningKey(date_string);
    }
    var out: [32]u8 = undefined;
    var hasher: HmacSha256 = .init(&self.last_date_signing_key);
    hasher.update(self.config.algorithm);
    hasher.update("\n");
    hasher.update(&timestamp_string);
    hasher.update("\n");
    hasher.update(&date_string);
    hasher.update("/");
    hasher.update(self.config.region);
    hasher.update("/");
    hasher.update(self.config.service);
    hasher.update("/");
    hasher.update(self.config.request_type);
    hasher.update("\n");
    hasher.update(&canonical_request_hash);
    hasher.final(&out);
    return fmt.bytesToHex(out, .lower);
}