Hi,
I have written the following Skiplist implementation based on Pugh’s paper:
const std = @import("std");
pub fn Pugh(
comptime K: anytype,
comptime V: anytype,
context: anytype,
comptime lessThan: fn (@TypeOf(context), lhs: K, rhs: K) bool,
) type {
return struct {
rnd: std.rand.Random,
allocator: std.mem.Allocator,
/// Determines average skip size for each level of SkipList. Equals `1/P`.
skip_size: f32,
/// Maximum number of levels in the SkipList.
max_level: usize = undefined,
/// Current maximum level of the SkipList.
level: usize = 0,
/// Starting Node pointer of the SkipList.
head: *Node,
pub const Node = struct {
key: ?K,
value: V,
forward: []?*Node,
fn init(allocator: std.mem.Allocator, key: K, level: usize) !*@This() {
var n = try allocator.create(Node);
n.key = key;
n.forward = try allocator.alloc(?*Node, level + 1);
std.mem.set(?*Node, n.forward, null);
return n;
}
fn deinit(self: *@This(), allocator: std.mem.Allocator) void {
allocator.free(self.forward);
allocator.destroy(self);
}
};
fn getMaxLevel(self: @This(), n: usize) usize {
return @floatToInt(usize, std.math.log(f32, self.skip_size, @intToFloat(f32, n)));
}
pub fn init(
/// Allocator to use when creating SkipList and its Node items.
allocator: std.mem.Allocator,
/// Number of items to skip with each level. This is a probabilistic value.
skip_size: f32,
/// Maximum number of items to be stored in the SkipList.
/// This is not a hard limit, but SkipList performance will degrade when badly choosen.
n_max: usize,
) !@This() {
if (skip_size <= 1) {
std.log.err("skip_size {d} not allowed, must be > 1", .{skip_size});
return error.InvalidSkipSize;
}
var rnd_tmp = std.rand.DefaultPrng.init(@intCast(u64, std.time.milliTimestamp()));
var sl = @This(){
.rnd = rnd_tmp.random(),
.skip_size = skip_size,
.allocator = allocator,
.head = undefined,
};
sl.max_level = sl.getMaxLevel(n_max);
// XXX: removing `catch` causing segfault
sl.head = Node.init(sl.allocator, 0, sl.max_level) catch unreachable;
sl.head.key = null;
return sl;
}
pub fn deinit(self: @This()) void {
_ = self;
var current: ?*Node = self.head;
while (true) {
const next = current.?.forward[0];
self.allocator.free(current.?.forward);
self.allocator.destroy(current.?);
if (next == null) break;
current = next;
}
}
pub fn search(self: @This(), search_key: K) ?*Node {
var current: *Node = self.head;
var level: usize = self.max_level + 1;
while (level > 0) : (level -= 1) {
while (current.forward[level - 1]) |next| : (current = next) {
if (!lessThan(context, next.key.?, search_key)) break;
}
}
current = current.forward[0].?;
if (current.key == search_key) return current else return null;
}
fn randomLevel(self: @This()) usize {
var level: usize = 0;
// while (self.rnd.float(f32) < (1.0 / self.skip_size) and
// level < self.max_level) level += 1;
while (level < self.max_level) {
const r = self.rnd.float(f32);
// FIXME: strange r values...
std.debug.print("r: {d}\n", .{r});
if (r < 1 / self.skip_size) level += 1 else break;
}
std.debug.print("{d}\n", .{level});
return level;
}
pub fn insert(self: *@This(), insert_key: K, insert_value: V) !void {
var current: ?*Node = self.head;
// create update array for `*Node` items which should be updated
var update = try self.allocator.alloc(?*Node, self.max_level + 1);
std.mem.set(?*Node, update, null);
defer self.allocator.free(update);
// Start from highest level move the current pointer forward while `insert_key`
// is greater than key of node next to current. Otherwise inserted current in
// update and move one level down and continue search.
var l: usize = self.level + 1;
while (l > 0) : (l -= 1) {
while (current.?.forward[l - 1]) |next| : (current = next) {
if (next.key.? >= insert_key) break;
}
std.debug.assert(current != null);
update[l - 1] = current;
}
// reached level 0 and forward pointer to right, which is the desired
// point of insertion
current = current.?.forward[0];
// If current is null, then we have reached the end of the level.
// If it is not null and current.key == insert_key, then key already exists.
if (current != null and current.?.key == insert_key) return;
// We have to insert our new Node.
const rlevel = self.randomLevel();
if (rlevel > self.level) {
var level: usize = self.level + 1;
while (level < rlevel + 1) : (level += 1) {
update[level] = self.head;
}
// update level
self.level = rlevel;
}
// create node with rlevel
const n = try Node.init(self.allocator, insert_key, rlevel);
n.value = insert_value;
// insert new node to SkipList
var i: usize = 0;
while (i <= rlevel) : (i += 1) {
n.forward[i] = update[i].?.forward[i];
update[i].?.forward[i] = n;
}
}
pub fn display(self: @This()) void {
std.debug.print("SkipList structure - P:{d}, max_level: {d}\n", .{ 1 / self.skip_size, self.max_level });
var level: usize = self.max_level + 1;
while (level > 0) : (level -= 1) {
var node: ?*Node = self.head.forward[level - 1];
std.debug.print("Level {d}: ", .{level - 1});
while (node != null) {
std.debug.print("{any} ", .{node.?.key});
node = node.?.forward[level - 1];
}
std.debug.print("\n", .{});
}
}
pub fn remove(self: *@This(), delete_key: K) !bool {
var current: ?*Node = self.head;
// create update array for `*Node` items which should be updated
var update = try self.allocator.alloc(?*Node, self.max_level + 1);
std.mem.set(?*Node, update, null);
defer self.allocator.free(update);
// Start from highest level move the current pointer forward while `delete_key`
// is greater than key of node next to current. Otherwise inserted current in
// update and move one level down and continue search.
var l: usize = self.level + 1;
while (l > 0) : (l -= 1) {
while (current.?.forward[l - 1]) |next| : (current = next) {
if (next.key.? >= delete_key) break;
}
std.debug.assert(current != null);
update[l - 1] = current;
}
// Reached level 0 and forward pointer to right, which is the desired
// point of deletion.
current = current.?.forward[0];
// If current is `null` or `current.key != delete_key` the item is not
// in the SkipList.
if (current == null or current.?.key != delete_key) return false;
// We have found the node to delete.
var level: usize = 0;
while (level <= self.level) : (level += 1) {
// If next Node is not the current one then we do not need to
// update any more Nodes.
if (update[level].?.forward[level] != current) break;
update[level].?.forward[level] = current.?.forward[level];
}
// Remove levels without elements.
level = 1;
while (level < self.level) : (level += 1) {
if (self.head.forward[level] == null) self.level -= 1;
}
current.?.deinit(self.allocator);
return true;
}
};
}
pub fn main() anyerror!void {
const KeyType = usize;
const SL = Pugh(KeyType, void, {}, comptime std.sort.asc(KeyType));
var sl = try SL.init(std.heap.page_allocator, 4, 16);
defer sl.deinit();
try sl.insert(3, {});
try sl.insert(6, {});
try sl.insert(7, {});
try sl.insert(9, {});
try sl.insert(0, {});
try sl.insert(std.math.minInt(KeyType), {});
try sl.insert(12, {});
try sl.insert(12, {}); // does nothing
try sl.insert(12, {}); // does nothing
try sl.insert(19, {});
try sl.insert(17, {});
try sl.insert(26, {});
try sl.insert(21, {});
try sl.insert(25, {});
sl.display();
try std.testing.expectEqual(@as(KeyType, 12), sl.search(12).?.key.?);
try std.testing.expectEqual(@as(KeyType, 26), sl.search(26).?.key.?);
try std.testing.expectEqual(@as(?*SL.Node, null), sl.search(13));
try std.testing.expectEqual(true, try sl.remove(12));
try std.testing.expectEqual(false, try sl.remove(12));
_ = try sl.remove(std.math.minInt(KeyType));
}
It only compiles with -fno-stage1
because of some error.
When I run it on my rpi4 the output is a bit strange:
voroskoi ~/code/skiplist.zig via ↯ v0.10.0-dev.3385+c0a1b4fa4 ❯ zig run src/pugh.zig -fno-stage1
r: 0.9147371053695679
0
r: 0.14794936776161194
r: 0.7877211570739746
1
r: 0.3927851617336273
0
r: 0.8867189884185791
0
r: 0.000013530261639971286
r: 0.8805093765258789
1
r: 0.8242531418800354
0
r: 0.001243592007085681
r: 0.001155853969976306
2
r: 0.0000015804827171450597
r: 0.8286284804344177
1
r: 0.000012576623703353107
r: 0.4579407572746277
1
r: 0.051516659557819366
r: 0.4122631549835205
1
r: 0.010437771677970886
r: 0.084312804043293
2
SkipList structure - P:0.25, max_level: 2
Level 2: 19 25
Level 1: 0 6 17 19 21 25 26
Level 0: 0 3 6 7 9 12 17 19 21 25 26
Notice the very small r
values. As skip_size
is 4, there are way too much items in level 1.
On my amd64 box this code does not even compile: terminated by signal SIGSEGV (Address boundary error)
, which makes me think there is something wrong with my random initialization/use, but the error message does not give me any clue where to look.
I have built a debug version of the compiler, which runs the code and the output is below:
~/code/skiplist.zig via ↯ v0.10.0 ❯ ~/code/zig/build/zig run src/pugh.zig -fno-stage1
r: 0.00000000000000000000000020679515313825692
r: 0.7525559067726135
1
r: 0.00000000000000000000000041359030627651385
r: 0.9998949766159058
1
r: 0.00000000000000000000000041359030627651385
r: 0.6141244769096375
1
r: 0.00000000000000000000000041359030627651385
r: 0.0156288743019104
2
r: 0.00000000000000000000000041359030627651385
r: 0.7501263618469238
1
r: 0.00000000000000000000000041359030627651385
r: 0.7501254081726074
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
r: 0.00000000000000000000000041359030627651385
r: 0.8751866817474365
1
SkipList structure - P:0.25, max_level: 2
Level 2: 9
Level 1: 0 3 6 7 9 12 17 19 21 25 26
Level 0: 0 3 6 7 9 12 17 19 21 25 26
This gives even more bad random values. Where should I look?