Writing a program using TurboJPEG. I have a C version and a Zig version. The C version completely blows the Zig version out of the water. What am I doing wrong?
Zig code:
const std = @import("std");
const c = @cImport({
@cInclude("turbojpeg.h");
});
const stdout = std.io.getStdOut().writer();
// const density = "N@#W$987654321!abc;:+=-,._ ";
pub fn main() !void {
const allocator = std.heap.c_allocator;
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const alloc = arena.allocator();
if (std.os.argv.len != 2) {
std.debug.print("Usage: {s} <JPEG file>\n", .{std.os.argv[0]});
return;
}
var args = std.process.ArgIteratorPosix.init();
_ = args.skip();
const filename = args.next().?;
var file = try std.fs.cwd().openFile(filename, .{});
defer file.close();
const file_size = try file.getEndPos();
const jpeg_buf = try alloc.alloc(u8, file_size);
defer alloc.free(jpeg_buf);
_ = try file.read(jpeg_buf);
const tj_instance = c.tjInitDecompress();
if (tj_instance == null) {
std.debug.print("Error initializing TurboJPEG decompressor: {s}\n", .{c.tjGetErrorStr()});
return;
}
var width: c_int = 0;
var height: c_int = 0;
var jpeg_subsamp: c_int = 0;
var jpeg_colorspace: c_int = 0;
const jpeg_len: c_ulong = @intCast(jpeg_buf.len);
if (c.tjDecompressHeader3(tj_instance, jpeg_buf.ptr, jpeg_len, &width, &height, &jpeg_subsamp, &jpeg_colorspace) != 0) {
std.debug.print("Error reading JPEG header: {s}\n", .{c.tjGetErrorStr()});
return;
}
const pixel_size = 3;
const img_buf_size: usize = @intCast(width * height * pixel_size);
const img_buf = try alloc.alloc(u8, img_buf_size);
if (c.tjDecompress2(tj_instance, jpeg_buf.ptr, jpeg_len, img_buf.ptr, width, 0, height, c.TJPF_RGB, 0) != 0) {
std.debug.print("Error decompressing JPEG image: {s}\n", .{c.tjGetErrorStr()});
return;
}
_ = c.tjDestroy(tj_instance);
var i: usize = 0;
while (i < width * height) : (i += 1) {
const r: u32 = img_buf[i * pixel_size];
const g: u32 = img_buf[i * pixel_size + 1];
const b: u32 = img_buf[i * pixel_size + 2];
const rf: f32 = @floatFromInt(r);
const gf: f32 = @floatFromInt(g);
const bf: f32 = @floatFromInt(b);
const rgray: f32 = 0.299 * rf;
const ggray: f32 = 0.587 * gf;
const bgray: f32 = 0.114 * bf;
const gray: u8 = @intFromFloat(rgray + ggray + bgray);
try stdout.print("Pixel {d}: Brightness={d}\n", .{ i, gray });
// std.debug.print("Pixel {d}: Brightness={d}\n", .{ i, gray });
// std.debug.print("Pixel {d}: R={d}, G={d}, B={d}\n", .{i, img_buf[i * pixel_size], img_buf[i * pixel_size + 1], img_buf[i * pixel_size + 2]});
}
// std.debug.print("Image dimensions: {d} x {d}\n", .{ width, height });
}
C:
#include <stdio.h>
#include <stdlib.h>
#include <turbojpeg.h>
#include <string.h>
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <JPEG file>\n", argv[0]);
return EXIT_FAILURE;
}
const char *filename = argv[1];
FILE *jpegFile = fopen(filename, "rb");
if (!jpegFile) {
perror("Error opening file");
return EXIT_FAILURE;
}
fseek(jpegFile, 0, SEEK_END);
long jpegSize = ftell(jpegFile);
fseek(jpegFile, 0, SEEK_SET);
unsigned char *jpegBuf = (unsigned char *)malloc(jpegSize);
if (!jpegBuf) {
perror("Error allocating memory");
fclose(jpegFile);
return EXIT_FAILURE;
}
fread(jpegBuf, jpegSize, 1, jpegFile);
fclose(jpegFile);
tjhandle tjInstance = tjInitDecompress();
if (!tjInstance) {
fprintf(stderr, "Error initializing TurboJPEG decompressor: %s\n", tjGetErrorStr());
free(jpegBuf);
return EXIT_FAILURE;
}
int width, height, jpegSubsamp, jpegColorspace;
if (tjDecompressHeader3(tjInstance, jpegBuf, jpegSize, &width, &height, &jpegSubsamp, &jpegColorspace) != 0) {
fprintf(stderr, "Error reading JPEG header: %s\n", tjGetErrorStr());
tjDestroy(tjInstance);
free(jpegBuf);
return EXIT_FAILURE;
}
printf("Image dimensions: %d x %d\n", width, height);
unsigned char *imgBuf = (unsigned char *)malloc(width * height * tjPixelSize[TJPF_RGB]);
if (!imgBuf) {
perror("Error allocating memory");
tjDestroy(tjInstance);
free(jpegBuf);
return EXIT_FAILURE;
}
if (tjDecompress2(tjInstance, jpegBuf, jpegSize, imgBuf, width, 0 /* pitch */, height, TJPF_RGB, 0) != 0) {
fprintf(stderr, "Error decompressing JPEG image: %s\n", tjGetErrorStr());
tjDestroy(tjInstance);
free(jpegBuf);
free(imgBuf);
return EXIT_FAILURE;
}
tjDestroy(tjInstance);
free(jpegBuf);
for (int i = 0; i < width * height; i++) {
int r = imgBuf[i * 3];
int g = imgBuf[i * 3 + 1];
int b = imgBuf[i * 3 + 2];
unsigned char gray = (unsigned char)(0.299 * r + 0.587 * g + 0.114 * b);
printf("Pixel %d: Brightness=%d\n", i, gray);
}
free(imgBuf);
return EXIT_SUCCESS;
}
using gcc 13.2.1 20240210
zig 0.14.0-dev.43+96501d338
performance tests using hyperfine on a clean reboot:
hyperfine -r 10 "/home/wizard/fun/c/things/tjpg/decode_jpeg /home/wizard/downloads/suisei-pfp.jpg" "/home/wizard/fun/zig/learn/tag/zig-out/bin/tag /home/wizard/downloads/suisei-pfp.jpg"
Benchmark 1: /home/wizard/fun/c/things/tjpg/decode_jpeg /home/wizard/downloads/suisei-pfp.jpg
Time (mean ± σ): 154.5 ms ± 4.0 ms [User: 148.0 ms, System: 5.6 ms]
Range (min … max): 149.3 ms … 163.4 ms 10 runs
Benchmark 2: /home/wizard/fun/zig/learn/tag/zig-out/bin/tag /home/wizard/downloads/suisei-pfp.jpg
Time (mean ± σ): 1.894 s ± 0.074 s [User: 0.777 s, System: 1.144 s]
Range (min … max): 1.746 s … 2.002 s 10 runs
Summary
/home/wizard/fun/c/things/tjpg/decode_jpeg /home/wizard/downloads/suisei-pfp.jpg ran
12.26 ± 0.58 times faster than /home/wizard/fun/zig/learn/tag/zig-out/bin/tag /home/wizard/downloads/suisei-pfp.jpg
clang benchmarks using 17.0.6
wizard@oz ~/fun/zig/learn/tag/src $ hyperfine -r 10 "/home/wizard/fun/c/things/tjpg/clang_jpeg /home/wizard/downloads/suisei-pfp.jpg" "/home/wizard/fun/zig/learn/tag/zig-out/bin/tag /home/wizard/downloads/suisei-pfp.jpg"
Benchmark 1: /home/wizard/fun/c/things/tjpg/clang_jpeg /home/wizard/downloads/suisei-pfp.jpg
Time (mean ± σ): 153.6 ms ± 3.6 ms [User: 148.8 ms, System: 4.6 ms]
Range (min … max): 147.1 ms … 157.3 ms 10 runs
Benchmark 2: /home/wizard/fun/zig/learn/tag/zig-out/bin/tag /home/wizard/downloads/suisei-pfp.jpg
Time (mean ± σ): 1.945 s ± 0.058 s [User: 0.742 s, System: 1.201 s]
Range (min … max): 1.882 s … 2.015 s 10 runs
Summary
/home/wizard/fun/c/things/tjpg/clang_jpeg /home/wizard/downloads/suisei-pfp.jpg ran
12.66 ± 0.48 times faster than /home/wizard/fun/zig/learn/tag/zig-out/bin/tag /home/wizard/downloads/suisei-pfp.jpg