Jittered backoff implementation for retries

Hi. New to Zig. Mainly from C/C++ then Go. Evaluating Zig at the moment for some of the distributed systems I’m working on both at work and personal stuff. Here’s one of my first foray into the language: jittered backoff for retries.

4 Likes

There is an error in the example:

fn funcThatCanFail() !u64 {
  _ = try std.time.Instant.now();
  return 1;
}

pub fn main() void {
  var bo = zbackoff.Backoff{};
  for (0..3) |_| {
    const ret = funcThatCanFail();
    if (ret != 0) {
      std.time.sleep(bo.pause());
    }
  }
}

The code doesn’t deal with the error union returned by funcThatCanFail, you could use something like this:

fn oldExample() void {
    var bo = zbackoff.Backoff{};
    for (0..3) |_| {
        if (funcThatCanFail()) |ret| {
            if (ret != 0) {
                std.time.sleep(bo.pause());
            } else {
                break; // missing from the example stop retries on success
            }
        } else |_| {
            std.time.sleep(bo.pause());
        }
    }
}

However that is a lot of code, I think if you write your function so that every retry reason is communicated as an error and every success value is a non-error return value, the calling code becomes simpler:

fn funcThatFails() !u64 {
    _ = try std.time.Instant.now();
    std.debug.print("funcThatFails()\n", .{});
    return error.UnwantedResult1;
}
fn oldExample2() void {
    var bo = zbackoff.Backoff{};
    for (0..3) |_| {
        const result = funcThatFails() catch {
            std.time.sleep(bo.pause());
            continue;
        };
        _ = result;
        break;
    }
}

I also would consider adding a field max_tries:?u32 = null, and a next iterator method, with that you can start using a while loop with the optional returned from the next method, this allows you to use else on the while loop when the max_tries were exceeded (the waiting gets baked into the next call):

fn exampleNext() void {
    var bo = zbackoff.Backoff{ .max_tries = 3 };
    while (bo.next()) |_| {
        const result = funcThatFails() catch continue;
        _ = result;
        break;
    } else {
        std.debug.print("failed after 3 tries\n", .{});
    }
}

nextBackoff is a variant of next where you still call sleep yourself.
Here is the modified example:

//! Backoff represents a struct for getting a jittered backoff value (nanoseconds) for operations
//! that needs to do sleeps with backoff between retries. The implementation is based on
//! https://www.awsarchitectureblog.com/2015/03/backoff.html.

const std = @import("std");

pub const Backoff = struct {
    /// The initial value of the retry period in ns, defaults to 1s.
    initial: u64 = 1e9,

    /// The max value of the retry period in ns, defaults to 30s.
    max: u64 = 30 * 1e9,

    /// The factor by which the retry period increases. It should be greater than 1, defaults to 2.
    multiplier: f64 = 2.0,

    last: u64 = 1e9, // internal, current retry period
    iter: u64 = 0,

    max_tries: ?u32 = null,
    const Self = @This();

    /// Returns the next nanosecond duration that the caller should use to backoff.
    pub fn pause(self: *Self) u64 {
        self.iter += 1;
        if (self.initial == 0) self.initial = 1e9;
        if (self.max == 0) self.max = 30 * 1e9;
        if (self.multiplier < 1.0) self.multiplier = 2.0;

        if (self.iter == 1) return self.initial;

        const mf = @as(f64, @floatFromInt(self.last)) * self.multiplier;
        const mu = @as(u64, @intFromFloat(mf));

        const seed = std.crypto.random.int(u64);
        var prng = std.rand.DefaultPrng.init(seed);
        const random = prng.random();
        const rval = 1 + random.uintAtMost(u64, mu);
        self.last = @min(self.max, rval);
        return self.last;
    }

    pub fn nextBackoff(self: *Backoff) ?u64 {
        if (self.max_tries) |tries| {
            if (self.iter < tries) return self.pause();
            return null;
        }
        return self.pause();
    }

    pub fn wait(self: *Backoff) void {
        if (self.iter == 0) {
            self.iter += 1;
            return;
        }
        std.time.sleep(self.pause());
    }

    pub fn next(self: *Backoff) ?void {
        if (self.max_tries) |tries| {
            if (self.iter < tries) {
                self.wait();
                return;
            }
            return null;
        }
        self.wait();
        return;
    }
};

const zbackoff = @This();

fn funcThatCanFail() !u64 {
    _ = try std.time.Instant.now();
    std.debug.print("funcThatCanFail()\n", .{});
    return 1;
}

fn funcThatFails() !u64 {
    _ = try std.time.Instant.now();
    std.debug.print("funcThatFails()\n", .{});
    return error.UnwantedResult1;
}

fn oldExample() void {
    var bo = zbackoff.Backoff{};
    for (0..3) |_| {
        if (funcThatCanFail()) |ret| {
            if (ret != 0) {
                std.time.sleep(bo.pause());
            } else {
                break; // missing from the example: stop retries on success
            }
        } else |_| {
            std.time.sleep(bo.pause());
        }
    }
}

fn oldExample2() void {
    var bo = zbackoff.Backoff{};
    for (0..3) |_| {
        const result = funcThatFails() catch {
            std.time.sleep(bo.pause());
            continue;
        };
        _ = result;
        break;
    }
}

fn exampleNextBackoff() void {
    var bo = zbackoff.Backoff{ .max_tries = 3 };
    while (bo.nextBackoff()) |sleep| {
        const result = funcThatFails() catch {
            std.time.sleep(sleep);
            continue;
        };
        _ = result;
        break;
    } else {
        std.debug.print("failed after 3 tries\n", .{});
    }
}

fn exampleNext() void {
    var bo = zbackoff.Backoff{ .max_tries = 3 };
    while (bo.next()) |_| {
        const result = funcThatFails() catch continue;
        _ = result;
        break;
    } else {
        std.debug.print("failed after 3 tries\n", .{});
    }
}

pub fn main() void {
    std.debug.print("------------\n", .{});
    oldExample();
    std.debug.print("------------\n", .{});
    oldExample2();
    std.debug.print("------------\n", .{});
    exampleNextBackoff();
    std.debug.print("------------\n", .{});
    exampleNext();
}

Wow, thanks for these ideas, especially the wait, next, and nextBackoff. I think I’m already so used to just having the pause function (in Go, at least) and the retry/error management moved to the caller’s layer.

1 Like