Segmentation fault in embree when using any allocator anywhere in code

Hello good zig people!

I’m replicating most simple embree example, taken from here: API Examples

It works fine and well,

but

If I try to use an allocator to alloc a slice anywhere in the code (even in the end, in an unreachable branch) - embree call segfaults.

I’m new to ZIG, but this seems highly strange.

example is here:

const std = @import("std");
const print = std.debug.print;
const embree = @cImport({
    @cInclude("embree3/rtcore.h");
});

const Err = error{general};

pub fn main() !void {
    var device = embree.rtcNewDevice(null);
    var scene = embree.rtcNewScene(device);
    var geo = embree.rtcNewGeometry(device, embree.RTC_GEOMETRY_TYPE_TRIANGLE);

    var vertex_buff: [*]f32 = @ptrCast(@alignCast(embree.rtcSetNewGeometryBuffer(geo, embree.RTC_BUFFER_TYPE_VERTEX, 0, embree.RTC_FORMAT_FLOAT3, 3 * @sizeOf(f32), 3) orelse {
        return Err.general;
    }));
    vertex_buff[0..9].* = .{
        0.0, 0.0, 0.0,
        1.0, 0.0, 0.0,
        0.0, 1.0, 0.0,
    };
    var index_buff: [*]u32 = @ptrCast(@alignCast(embree.rtcSetNewGeometryBuffer(geo, embree.RTC_BUFFER_TYPE_INDEX, 0, embree.RTC_FORMAT_UINT3, 3 * @sizeOf(u32), 1)));
    index_buff[0..3].* = .{
        0, 1, 2,
    };

    embree.rtcCommitGeometry(geo);
    _ = embree.rtcAttachGeometry(scene, geo);
    embree.rtcReleaseGeometry(geo);
    embree.rtcCommitScene(scene);

    var rayhit: embree.RTCRayHit = undefined;
    rayhit.ray.org_x = 0;
    rayhit.ray.org_y = 0;
    rayhit.ray.org_z = -1;
    rayhit.ray.dir_x = 0;
    rayhit.ray.dir_y = 0;
    rayhit.ray.dir_z = 1;
    rayhit.ray.tnear = 0;
    rayhit.ray.tfar = std.math.inf(f32);
    rayhit.hit.geomID = embree.RTC_INVALID_GEOMETRY_ID;

    var inter_context: embree.RTCIntersectContext = undefined;
    embree.rtcInitIntersectContext(&inter_context);

    embree.rtcIntersect1(scene, &inter_context, &rayhit);

    if (rayhit.hit.geomID != embree.RTC_INVALID_GEOMETRY_ID) {
        print("hit at distance {}", .{rayhit.ray.tfar});
    } else {
        print("no hit", .{});
    }

    // if (rayhit.hit.geomID == embree.RTC_INVALID_GEOMETRY_ID) {
    //     const alloc: std.mem.Allocator = std.heap.c_allocator;
    //     var data: []f32 = try alloc.alloc(f32, 0);
    //     defer alloc.free(data);
    // }
}

this all is basically one-to-one embree example, with that commented extra code below. If you uncomment that - the call to rtcIntersect1 crashes with

Segmentation fault at address 0x0
???:?:?: 0x7f76a0268098 in ??? (libembree3.so.3)
Unwind information for `libembree3.so.3:0x7f76a0268098` was not available, trace may be incomplete

???:?:?: 0x0 in ??? (???)
Aborted (core dumped)

tried other allocators, such as page_allocator - all same. also the size passed to alloc does not make any difference, it crashes with size 0 as well as size 100k

using zig 0.11.0, embree 3.13.5

does anyone have any insight?

It’s a known problem when a library needs libc.
Link to libc using exe.linkLibC(); or -lc.

i do have it in my build.zig

things I added to default one are these:

    exe.linkLibC();
    exe.addSystemIncludePath(.{ .path = "/usr/include" });
    exe.linkSystemLibrary("embree3");

Since you have a core file, try to get a backtrace using a debugger.
e.g.

zig-out/bin/exe
lldb --core core zig-out/bin/exe
(lldb) bt all

or

zig-out/bin/exe
gdb -c core zig-out/bin/exe
(gbb) bt full

debugging is something i’m not experienced with, but the relevant part here is

#5  <signal handler called>
No symbol table info available.
#6  0x00007fc3dfe00098 in embree::avx2::BVHNIntersector1<4, 1, false, embree::avx2::ArrayIntersector1<embree::avx2::TriangleMIntersector1Moeller<4, true> > >::intersect(embree::Accel::Intersectors const*, embree::RayHitK<1>&, embree::IntersectContext*) () from /usr/lib/libembree3.so.3
No symbol table info available.
#7  0x00007fc3de851ae2 in rtcIntersect1 () from /usr/lib/libembree3.so.3
No symbol table info available.
#8  0x000000000022d8da in bug.main () at bug.zig:46
        device = 0x1671470
        geo = 0x16738d0
        index_buff = 0x16727d0
        scene = 0x16727f0
        vertex_buff = 0x1674750
        rayhit = {ray = {org_x = 0, org_y = 0, org_z = -1, tnear = 0, dir_x = 0, dir_y = 0, dir_z = 1, time = -3.03164883e-13, tfar = inf, mask = 2863311530, id = 2863311530, flags = 2863311530}, 
          hit = {Ng_x = -3.03164883e-13, Ng_y = -3.03164883e-13, Ng_z = -3.03164883e-13, u = -3.03164883e-13, v = -3.03164883e-13, primID = 2863311530, geomID = 4294967295, instID = {2863311530}}}
        inter_context = {flags = 0, filter = 0x0, instID = {4294967295}}

that does not really tell me anything

#7 ... in rtcIntersect1 () from /usr/lib/libembree3.so.3 means the crash is in this call
#8 ... in bug.main () at bug.zig:46 means the call is in main function of module bug, line 46 in file bug.zig

embree.rtcIntersect1(scene, &inter_context, &rayhit);

These are the contents of the variables while executing the line that crash.

scene = 0x16727f0
rayhit = {ray = {org_x = 0, org_y = 0, org_z = -1, tnear = 0, dir_x = 0, dir_y = 0, dir_z = 1, time = -3.03164883e-13, tfar = inf, mask = 2863311530, id = 2863311530, flags = 2863311530}, 
          hit = {Ng_x = -3.03164883e-13, Ng_y = -3.03164883e-13, Ng_z = -3.03164883e-13, u = -3.03164883e-13, v = -3.03164883e-13, primID = 2863311530, geomID = 4294967295, instID = {2863311530}}}
inter_context = {flags = 0, filter = 0x0, instID = {4294967295}}

rayhit is partially initialized. You can zero the entire structure using std.mem.zeros:

      var rayhit = std.mem.zeros(embree.RTCRayHit);
      rayhit.ray.org_x = 0;
      rayhit.ray.org_y = 0;
      rayhit.ray.org_z = -1;
      rayhit.ray.dir_x = 0;
      rayhit.ray.dir_y = 0;
      rayhit.ray.dir_z = 1;
      rayhit.ray.tnear = 0;
      rayhit.ray.tfar = std.math.inf(f32);
      rayhit.hit.geomID = embree.RTC_INVALID_GEOMETRY_ID;

did that, it still segfaults, now with prettier values it trace :slight_smile:

#5  <signal handler called>
No symbol table info available.
#6  0x00007ff52f95f098 in embree::avx2::BVHNIntersector1<4, 1, false, embree::avx2::ArrayIntersector1<embree::avx2::TriangleMIntersector1Moeller<4, true> > >::intersect(embree::Accel::Intersectors const*, embree::RayHitK<1>&, embree::IntersectContext*) () from /usr/lib/libembree3.so.3
No symbol table info available.
#7  0x00007ff52e3b0ae2 in rtcIntersect1 () from /usr/lib/libembree3.so.3
No symbol table info available.
#8  0x000000000022d91d in bug.main () at bug.zig:46
        device = 0x193b470
        geo = 0x193d8d0
        index_buff = 0x193c7d0
        scene = 0x193c7f0
        vertex_buff = 0x193e750
        rayhit = {ray = {org_x = 0, org_y = 0, org_z = -1, tnear = 0, dir_x = 0, dir_y = 0, dir_z = 1, time = 0, tfar = inf, mask = 0, id = 0, flags = 0}, hit = {Ng_x = 0, Ng_y = 0, Ng_z = 0, 
            u = 0, v = 0, primID = 0, geomID = 4294967295, instID = {0}}}
        inter_context = {flags = 0, filter = 0x0, instID = {4294967295}}

but if not using alloc.alloc in the end of the file - everything works fine either way.
btw using alloc.create alloc.destroy do not cause segfault in embree, only presence of alloc.alloc in code (even in unreachable branch) causes it.

Try to build the original hello_embree.cpp using zig c++ hello_embree.cpp, maybe it is a problem with the installed version of the library.

Another idea is that embree3 requires more libraries to work.
Try to link also with:

exe.linkSystemLibrary("tbb");
exe.linkSystemLibrary("glfw3");

zig c++ hello_embree.cpp works fine, just as the zig version without alloc part - they both work fine and as expected, no segfaults, correct result.

it’s only when you add that alloc part in the end when embree starts to crash.

i’ve tried adding tbb - same problem,
and glfw i’m not using here and embree does not depend on it, so it should not be required

Replace the comment with these, to see which fails and which dos not fail.

const alloc: std.mem.Allocator = std.heap.c_allocator;
_ = alloc;
const alloc: std.mem.Allocator = std.heap.c_allocator;
_ = try alloc.alloc(f32, 1);
const alloc: std.mem.Allocator = std.heap.c_allocator;
const data = try alloc.alloc(f32, 1);
alloc.free(data);
const alloc: std.mem.Allocator = std.heap.c_allocator;
const data = try alloc.alloc(f32, 1);
defer alloc.free(data);

1 = ok
2 = segfault (same as before)
3 = segfault (same as before)
4 = segfault (same as before)

var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
_ = try allocator.alloc(f32, 1);

interesting. This works.

but: duplicate the last line, and it segfaults again (in the same place, before reaching this code)

var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
_ = try allocator.alloc(f32, 1);
_ = try allocator.alloc(f32, 1);

also the size being allocated does not seem to matter. behaviour is the same with 0, 1 and 10000

  1. Generate rtcore.zig from rtcore.h (it’s the same output with @cInclude).
zig translate-c /path/to/embree3/rtcore.h -I /usr/include > rtcore.zig
  1. Find in rtcore.zig all the definitions for the types and functions used in the example.
    e.g. How is embree.RTCIntersectContext defined? We give a pointer to an allocation in stack, is the allocated size correct?

file generated

but i’m not sure how to check for size correctness.

just in case - generated file is here: https://privatebin.net/?c2e743ee3f15f93a#6r89tUAHRaSQARc74wyDb2mGTrrZ42e8nAuaRzsanSdS

and it should correspond to this blob of embree: embree/include/embree3/rtcore_common.h at 698442324ccddd11725fb8875275dc1384f7fb40 · RenderKit/embree · GitHub

looking at RTCIntersectContext - i see some macro ifs and now i wonder, since they can change struct’s size

from what I could gather - zig created correct struct, at least in case of RTCIntersectContext since mentioned inside RTC_MAX_INSTANCE_LEVEL_COUNT should default to 1. actually, how did zig manage to generate it correctly? :slight_smile: how did it know default value should be 1 ? (ok, my bad, found all defines in rtcore_config.h)

run: lldb ./bug

in lldb:

breakpoint set -b bug.main
run

frame variable displays all the local variables
s runs the next line
print name where name is a variable name, prints the variable contents
gui enters a curses ui, s also single steps there

Single step the lines, until the program crashes.
Try to print the variables before crashing.

I am out of ideas.

Thank you for the help so far!

did that,
right before segfault variables are these:

(lldb) frame variable 
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCDeviceTy *) device = 0x00000000002fd470
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCSceneTy *) scene = 0x00000000002fe810
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCGeometryTy *) geo = 0x0000000000300210
(float *) vertex_buff = 0x0000000000300450
(unsigned int *) index_buff = 0x00000000002fe7f0
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCRayHit) rayhit = {
  ray = {
    org_x = 0
    org_y = 0
    org_z = -1
    tnear = 0
    dir_x = 0
    dir_y = 0
    dir_z = 1
    time = 0
    tfar = +Inf
    mask = 1
    id = 0
    flags = 0
  }
  hit = {
    Ng_x = 0
    Ng_y = 0
    Ng_z = 0
    u = 0
    v = 0
    primID = 0
    geomID = 4294967295
    instID = ([0] = 0)
  }
}
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCIntersectContext) inter_context = {
  flags = 0
  filter = 0x0000000000000000
  instID = ([0] = 4294967295)
}
(lldb) 

next s results in crash:

(lldb) s
Process 549 stopped
* thread #1, name = 'zigembree', stop reason = signal SIGSEGV: invalid address (fault address: 0x0)
    frame #0: 0x00007ffff720c098 libembree3.so.3`embree::avx2::BVHNIntersector1<4, 1, false, embree::avx2::ArrayIntersector1<embree::avx2::TriangleMIntersector1Moeller<4, true>>>::intersect(embree::Accel::Intersectors const*, embree::RayHitK<1>&, embree::IntersectContext*) + 72
libembree3.so.3`embree::avx2::BVHNIntersector1<4, 1, false, embree::avx2::ArrayIntersector1<embree::avx2::TriangleMIntersector1Moeller<4, true>>>::intersect:
->  0x7ffff720c098 <+72>: vmovaps 0x10(%rsi), %xmm5
    0x7ffff720c09d <+77>: vmovss 0x20(%rsi), %xmm10        ; xmm10 = mem[0],zero,zero,zero 
    0x7ffff720c0a2 <+82>: movl   $0x0, 0x228(%rsp)
    0x7ffff720c0ad <+93>: leaq   0x220(%rsp), %r11

and this is frame variable of the working binary from the same point, everything seem to be the same, except addresses

(lldb) frame variable 
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCDeviceTy *) device = 0x00000000002ec470
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCSceneTy *) scene = 0x00000000002ed810
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCGeometryTy *) geo = 0x00000000002ef210
(float *) vertex_buff = 0x00000000002ef450
(unsigned int *) index_buff = 0x00000000002ed7f0
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCRayHit) rayhit = {
  ray = {
    org_x = 0
    org_y = 0
    org_z = -1
    tnear = 0
    dir_x = 0
    dir_y = 0
    dir_z = 1
    time = 0
    tfar = +Inf
    mask = 1
    id = 0
    flags = 0
  }
  hit = {
    Ng_x = 0
    Ng_y = 0
    Ng_z = 0
    u = 0
    v = 0
    primID = 0
    geomID = 4294967295
    instID = ([0] = 0)
  }
}
(.home.user.projects.zigembree.zig-cache.o.17a1433213ec067c54214c531e7f664f.cimport.struct_RTCIntersectContext) inter_context = {
  flags = 0
  filter = 0x0000000000000000
  instID = ([0] = 4294967295)
}

Try to build for a generic target like: -target x86_64-linux-gnu or -Dtarget=x86_64-linux-gnu.