Skiplist fail, how to use std.rand.Random?

Hi,

I have written the following Skiplist implementation based on Pugh’s paper:

const std = @import("std");

pub fn Pugh(
    comptime K: anytype,
    comptime V: anytype,
    context: anytype,
    comptime lessThan: fn (@TypeOf(context), lhs: K, rhs: K) bool,
) type {
    return struct {
        rnd: std.rand.Random,
        allocator: std.mem.Allocator,
        /// Determines average skip size for each level of SkipList. Equals `1/P`.
        skip_size: f32,
        /// Maximum number of levels in the SkipList.
        max_level: usize = undefined,
        /// Current maximum level of the SkipList.
        level: usize = 0,
        /// Starting Node pointer of the SkipList.
        head: *Node,

        pub const Node = struct {
            key: ?K,
            value: V,
            forward: []?*Node,

            fn init(allocator: std.mem.Allocator, key: K, level: usize) !*@This() {
                var n = try allocator.create(Node);
                n.key = key;
                n.forward = try allocator.alloc(?*Node, level + 1);
                std.mem.set(?*Node, n.forward, null);
                return n;
            }

            fn deinit(self: *@This(), allocator: std.mem.Allocator) void {
                allocator.free(self.forward);
                allocator.destroy(self);
            }
        };

        fn getMaxLevel(self: @This(), n: usize) usize {
            return @floatToInt(usize, std.math.log(f32, self.skip_size, @intToFloat(f32, n)));
        }

        pub fn init(
            /// Allocator to use when creating SkipList and its Node items.
            allocator: std.mem.Allocator,
            /// Number of items to skip with each level. This is a probabilistic value.
            skip_size: f32,
            /// Maximum number of items to be stored in the SkipList.
            /// This is not a hard limit, but SkipList performance will degrade when badly choosen.
            n_max: usize,
        ) !@This() {
            if (skip_size <= 1) {
                std.log.err("skip_size {d} not allowed, must be > 1", .{skip_size});
                return error.InvalidSkipSize;
            }
            var rnd_tmp = std.rand.DefaultPrng.init(@intCast(u64, std.time.milliTimestamp()));
            var sl = @This(){
                .rnd = rnd_tmp.random(),
                .skip_size = skip_size,
                .allocator = allocator,
                .head = undefined,
            };
            sl.max_level = sl.getMaxLevel(n_max);
            // XXX: removing `catch` causing segfault
            sl.head = Node.init(sl.allocator, 0, sl.max_level) catch unreachable;
            sl.head.key = null;
            return sl;
        }

        pub fn deinit(self: @This()) void {
            _ = self;
            var current: ?*Node = self.head;
            while (true) {
                const next = current.?.forward[0];

                self.allocator.free(current.?.forward);
                self.allocator.destroy(current.?);

                if (next == null) break;

                current = next;
            }
        }

        pub fn search(self: @This(), search_key: K) ?*Node {
            var current: *Node = self.head;
            var level: usize = self.max_level + 1;
            while (level > 0) : (level -= 1) {
                while (current.forward[level - 1]) |next| : (current = next) {
                    if (!lessThan(context, next.key.?, search_key)) break;
                }
            }
            current = current.forward[0].?;
            if (current.key == search_key) return current else return null;
        }

        fn randomLevel(self: @This()) usize {
            var level: usize = 0;
            // while (self.rnd.float(f32) < (1.0 / self.skip_size) and
            //     level < self.max_level) level += 1;
            while (level < self.max_level) {
                const r = self.rnd.float(f32);
                // FIXME: strange r values...
                std.debug.print("r: {d}\n", .{r});
                if (r < 1 / self.skip_size) level += 1 else break;
            }
            std.debug.print("{d}\n", .{level});
            return level;
        }

        pub fn insert(self: *@This(), insert_key: K, insert_value: V) !void {
            var current: ?*Node = self.head;

            // create update array for `*Node` items which should be updated
            var update = try self.allocator.alloc(?*Node, self.max_level + 1);
            std.mem.set(?*Node, update, null);
            defer self.allocator.free(update);

            // Start from highest level move the current pointer forward while `insert_key`
            // is greater than key of node next to current. Otherwise inserted current in
            // update and move one level down and continue search.
            var l: usize = self.level + 1;
            while (l > 0) : (l -= 1) {
                while (current.?.forward[l - 1]) |next| : (current = next) {
                    if (next.key.? >= insert_key) break;
                }
                std.debug.assert(current != null);
                update[l - 1] = current;
            }

            // reached level 0 and forward pointer to right, which is the desired
            // point of insertion
            current = current.?.forward[0];

            // If current is null, then we have reached the end of the level.
            // If it is not null and current.key == insert_key, then key already exists.
            if (current != null and current.?.key == insert_key) return;

            // We have to insert our new Node.
            const rlevel = self.randomLevel();

            if (rlevel > self.level) {
                var level: usize = self.level + 1;
                while (level < rlevel + 1) : (level += 1) {
                    update[level] = self.head;
                }

                // update level
                self.level = rlevel;
            }

            // create node with rlevel
            const n = try Node.init(self.allocator, insert_key, rlevel);
            n.value = insert_value;

            // insert new node to SkipList
            var i: usize = 0;
            while (i <= rlevel) : (i += 1) {
                n.forward[i] = update[i].?.forward[i];
                update[i].?.forward[i] = n;
            }
        }

        pub fn display(self: @This()) void {
            std.debug.print("SkipList structure - P:{d}, max_level: {d}\n", .{ 1 / self.skip_size, self.max_level });
            var level: usize = self.max_level + 1;
            while (level > 0) : (level -= 1) {
                var node: ?*Node = self.head.forward[level - 1];
                std.debug.print("Level {d}: ", .{level - 1});
                while (node != null) {
                    std.debug.print("{any} ", .{node.?.key});
                    node = node.?.forward[level - 1];
                }
                std.debug.print("\n", .{});
            }
        }

        pub fn remove(self: *@This(), delete_key: K) !bool {
            var current: ?*Node = self.head;

            // create update array for `*Node` items which should be updated
            var update = try self.allocator.alloc(?*Node, self.max_level + 1);
            std.mem.set(?*Node, update, null);
            defer self.allocator.free(update);

            // Start from highest level move the current pointer forward while `delete_key`
            // is greater than key of node next to current. Otherwise inserted current in
            // update and move one level down and continue search.
            var l: usize = self.level + 1;
            while (l > 0) : (l -= 1) {
                while (current.?.forward[l - 1]) |next| : (current = next) {
                    if (next.key.? >= delete_key) break;
                }
                std.debug.assert(current != null);
                update[l - 1] = current;
            }

            // Reached level 0 and forward pointer to right, which is the desired
            // point of deletion.
            current = current.?.forward[0];

            // If current is `null` or `current.key != delete_key` the item is not
            // in the SkipList.
            if (current == null or current.?.key != delete_key) return false;

            // We have found the node to delete.
            var level: usize = 0;
            while (level <= self.level) : (level += 1) {
                // If next Node is not the current one then we do not need to
                // update any more Nodes.
                if (update[level].?.forward[level] != current) break;

                update[level].?.forward[level] = current.?.forward[level];
            }

            // Remove levels without elements.
            level = 1;
            while (level < self.level) : (level += 1) {
                if (self.head.forward[level] == null) self.level -= 1;
            }

            current.?.deinit(self.allocator);

            return true;
        }
    };
}

pub fn main() anyerror!void {
    const KeyType = usize;
    const SL = Pugh(KeyType, void, {}, comptime std.sort.asc(KeyType));
    var sl = try SL.init(std.heap.page_allocator, 4, 16);
    defer sl.deinit();

    try sl.insert(3, {});
    try sl.insert(6, {});
    try sl.insert(7, {});
    try sl.insert(9, {});
    try sl.insert(0, {});
    try sl.insert(std.math.minInt(KeyType), {});
    try sl.insert(12, {});
    try sl.insert(12, {}); // does nothing
    try sl.insert(12, {}); // does nothing
    try sl.insert(19, {});
    try sl.insert(17, {});
    try sl.insert(26, {});
    try sl.insert(21, {});
    try sl.insert(25, {});

    sl.display();

    try std.testing.expectEqual(@as(KeyType, 12), sl.search(12).?.key.?);
    try std.testing.expectEqual(@as(KeyType, 26), sl.search(26).?.key.?);
    try std.testing.expectEqual(@as(?*SL.Node, null), sl.search(13));

    try std.testing.expectEqual(true, try sl.remove(12));
    try std.testing.expectEqual(false, try sl.remove(12));

    _ = try sl.remove(std.math.minInt(KeyType));
}

It only compiles with -fno-stage1 because of some error.
When I run it on my rpi4 the output is a bit strange:

voroskoi ~/code/skiplist.zig via ↯ v0.10.0-dev.3385+c0a1b4fa4 ❯ zig run src/pugh.zig -fno-stage1
r: 0.9147371053695679
0
r: 0.14794936776161194
r: 0.7877211570739746
1
r: 0.3927851617336273
0
r: 0.8867189884185791
0
r: 0.000013530261639971286
r: 0.8805093765258789
1
r: 0.8242531418800354
0
r: 0.001243592007085681
r: 0.001155853969976306
2
r: 0.0000015804827171450597
r: 0.8286284804344177
1
r: 0.000012576623703353107
r: 0.4579407572746277
1
r: 0.051516659557819366
r: 0.4122631549835205
1
r: 0.010437771677970886
r: 0.084312804043293
2
SkipList structure - P:0.25, max_level: 2
Level 2: 19 25
Level 1: 0 6 17 19 21 25 26
Level 0: 0 3 6 7 9 12 17 19 21 25 26

Notice the very small r values. As skip_size is 4, there are way too much items in level 1.

On my amd64 box this code does not even compile: terminated by signal SIGSEGV (Address boundary error), which makes me think there is something wrong with my random initialization/use, but the error message does not give me any clue where to look.

I have built a debug version of the compiler, which runs the code and the output is below:

~/code/skiplist.zig via ↯ v0.10.0 ❯ ~/code/zig/build/zig run src/pugh.zig -fno-stage1
r: 0.00000000000000000000000020679515313825692
r: 0.7525559067726135
1
r: 0.00000000000000000000000041359030627651385
r: 0.9998949766159058
1
r: 0.00000000000000000000000041359030627651385
r: 0.6141244769096375
1
r: 0.00000000000000000000000041359030627651385
r: 0.0156288743019104
2
r: 0.00000000000000000000000041359030627651385
r: 0.7501263618469238
1
r: 0.00000000000000000000000041359030627651385
r: 0.7501254081726074
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
SkipList structure - P:0.25, max_level: 2
Level 2: 9 
Level 1: 0 3 6 7 9 12 17 19 21 25 26 
Level 0: 0 3 6 7 9 12 17 19 21 25 26 

This gives even more bad random values. Where should I look?

1 Like

I haven’t read all your code but I can at least point out the most obvious issue with it.
std.rand.Random is an interface. As such, it contains pointers to a concrete implementation, which would be an instance of std.rand.DefaultPrng in your code.

Unfortunately your code is storing the concrete implementation in a local variable that goes out of scope as soon as you return from init().

My recommended solution would be to store the concrete implementation inside the struct, instead of just the interface, and instead create an interface instance (ie call .random()) whenever you need to get a random number.

1 Like

By reading the source code of DefaultPrng which is an alias of Xoshiro256. If your intention is to support different random implementation, there is a way to work around.

The init function() of Xoshiro256:

pub fn init(init_s: u64) Xoshiro256 {
    var x = Xoshiro256{
        .s = undefined,
    };

    x.seed(init_s);
    return x;
}

The problem here is the s field is on stack. We can just manually alloc the data on heap.

var slice = try allocator.alloc([4]u64, 1);
var rnd_tmp = std.rand.DefaultPrng { .s = slice[0] };
rnd_tmp.seed(@intCast(u64, std.time.milliTimestamp()));
var sl = @This(){
    .rnd = rnd_tmp.random(),
    .skip_size = skip_size,
    .allocator = allocator,
    .head = undefined,
};

(NOTE: above code not tested, just the idea)
And don’t forgot free the memory when deinit the struct. You may find once DefaultPrng saved as Random there is no way to deinit it, because the type information is lost, so a wrapper struct is required to have the ability to deinit the .rnd field.

So, in your case use std.rand.DefaultPrng as rnd field’s type is a more clean and easy way to go.

1 Like