Algorithm Translation Challenge 1: Symmetric Difference

Hello everyone! I’m starting an algorithms series where we take famous algorithms written in other languages (I’m focusing on C/C++) and translate them to Zig code.

Our target is Symmetric Difference - this algorithm populates an output set with the difference between the two input sets.

Note:

Input sets are sorted.
Output set is sorted.

Example:

Input: 

    Set A: {1, 2, 3, 4, 5, 6, 7, 8};

    Set B: {5, 7, 9, 10};

Output: 

     Set C: { 1, 2, 3, 4, 6, 8, 9, 10 }

Here is a possible implementation in C++ from: std::set_symmetric_difference - cppreference.com

template<class InputIt1, class InputIt2, class OutputIt, class Compare>
OutputIt set_symmetric_difference(InputIt1 first1, InputIt1 last1,
                                  InputIt2 first2, InputIt2 last2,
                                  OutputIt d_first, Compare comp)
{
    while (first1 != last1)
    {
        if (first2 == last2)
            return std::copy(first1, last1, d_first);
 
        if (comp(*first1, *first2))
            *d_first++ = *first1++;
        else
        {
            if (comp(*first2, *first1))
                *d_first++ = *first2;
            else
                ++first1;
            ++first2;
        }
    }
    return std::copy(first2, last2, d_first);
}

Bonus Challenge:

Since this algorithm only requires Forward Iterators, write a version that uses a theoretical ForwardIterator (can be represented as anytype) that has a next function that advances the iterator by one place and a get function that returns a pointer to the value being referenced.

5 Likes

Totally cheating by using std.bit_set but I just love using those bit sets. lol It isn’t generic enough as it only works with integer types. Maybe could be made more generic by taking a function that maps any type to an integer but don’t know how feasible that would be.

const std = @import("std");

fn max(comptime T: type, s: []const T) T {
    var m = s[0];
    for (s) |n| {
        if (n > m) m = n;
    }
    return m;
}

/// Returns the symmetric difference of sets `a` and `b`
/// as a slice of `out`. If `out` is too small, an
/// index out-of-bounds runtime panic will occur.
/// `T` must be an integer type and the max value
/// of both `a` and `b` cannot be greater than `bits`.
pub fn symDiff(
    comptime bits: usize,
    comptime T: type,
    a: []const T,
    b: []const T,
    out: []T,
) []const T {
    comptime std.debug.assert(@typeInfo(T) == .Int);
    const max_value = @max(max(T, a), max(T, b));
    std.debug.assert(max_value <= bits);

    const BitSet = std.bit_set.StaticBitSet(bits);

    var set_a = BitSet.initEmpty();
    for (a) |i| set_a.set(@intCast(i));
    var set_b = BitSet.initEmpty();
    for (b) |i| set_b.set(@intCast(i));

    const sym_diff = set_a.xorWith(set_b);

    var iter = sym_diff.iterator(.{});
    var i: usize = 0;
    while (iter.next()) |n| : (i += 1) {
        if (n > max_value) break;
        out[i] = @intCast(n);
    }

    return out[0..i];
}

pub fn main() !void {
    const bits = 16;
    const T = u8;

    const a: []const T = &.{ 1, 2, 3, 4, 5, 6, 7, 8 };
    const b: []const T = &.{ 5, 7, 9, 10 };
    var buf: [10]T = undefined;

    const sd = symDiff(bits, T, a, b, &buf);

    for (sd) |n| std.debug.print("{}, ", .{n});
    std.debug.print("\n", .{});
}
4 Likes

“set” isn’t the best term for this. Even the C++ standard got this wrong. The inputs are supposed to be two sorted ranges. They can even have repeating values, as long as everything is sorted.

Here’s my entry:

const std = @import("std");

/// Computes the symmetric difference between two sorted ranges.
/// Returns a slice of dst containing the output.
/// Caller ensures dst has enough space for the output.
/// Comparison is an object (passed by value or reference), that must
/// have a method named compare, with the following signature:
/// pub fn compare(@TypeOf(comparison), T, T) std.math.Order.
/// The first parameter of the compare methods can be a value,
/// pointer or const pointer.
pub fn symmetricDifference(
    comptime T: type,
    inputA: []const T,
    inputB: []const T,
    dst: [*]T,
    comparison: anytype,
) []T {
    var localA = inputA;
    var localB = inputB;
    var dstPtr = dst;
    var result = dst[0 .. localA.len + localB.len];

    while (true) {
        if (localA.len == 0) {
            @memcpy(dstPtr, localB);
            break;
        }
        if (localB.len == 0) {
            @memcpy(dstPtr, localA);
            break;
        }

        switch (comparison.compare(localA[0], localB[0])) {
            .lt => {
                dstPtr[0] = localA[0];
                dstPtr = dstPtr[1..];
                localA = localA[1..];
            },
            .gt => {
                dstPtr[0] = localB[0];
                dstPtr = dstPtr[1..];
                localB = localB[1..];
            },
            .eq => {
                localA = localA[1..];
                localB = localB[1..];
                result.len -= 2;
            },
        }
    }

    return result;
}

pub fn main() void {
    const a = [_]u8{ 1, 2, 3, 4, 5, 6, 7, 8 };
    const b = [_]u8{ 5, 7, 9, 10 };
    var dst: [a.len + b.len]u8 = undefined;
    const Comparison = struct {
        pub fn compare(_: @This(), inputA: u8, inputB: u8) std.math.Order {
            return std.math.order(inputA, inputB);
        }
    };

    const output = symmetricDifference(
        u8,
        &a,
        &b,
        &dst,
        Comparison{},
    );

    std.debug.print("{any}\n", .{output});
    // Prints: { 1, 2, 3, 4, 6, 8, 9, 10 }
}
1 Like

I decided to deviate from the proposed solution by using an allocator and an ArrayList.
Also no cheating here :wink:

You could easily transform this into an iterator version, by replacing ia += 1 with iteratorA.next() and the a[ia] with iteratorA.get(), and the appendSlice bits can be replaced with a while loop over the remaining elements of the iterator.

const std = @import("std");

pub fn symDiff(
    allocator: std.mem.Allocator,
    comptime T: type,
    a: []const T,
    b: []const T,
) ![]const T {
    var result = std.ArrayList(T).init(allocator);
    errdefer result.deinit();
    
    var ia: usize = 0;
    var ib: usize = 0;
    while(ia < a.len and ib < b.len) {
        if(a[ia] == b[ib]) {
            ia += 1;
            ib += 1;
        } else if(a[ia] < b[ib]) {
            try result.append(a[ia]);
            ia += 1;
        } else {
            try result.append(b[ib]);
            ib += 1;
        }
    }
    try result.appendSlice(a[ia..]);
    try result.appendSlice(b[ib..]);

    return try result.toOwnedSlice();
}

pub fn main() !void {
    const T = u64;

    const a: []const T = &.{ 1, 2, 3, 4, 5, 6, 7, 8 };
    const b: []const T = &.{ 5, 7, 9, 10 };

    const sd = try symDiff(std.heap.page_allocator, T, a, b);
    defer std.heap.page_allocator.free(sd);

    for (sd) |n| std.debug.print("{}, ", .{n});
    std.debug.print("\n", .{});
}
3 Likes

Here’s my take on this…

const std = @import("std");

const DualIterator = struct {
    a: []const u8,
    b: []const u8,
    aPos: u8 = 0,
    bPos: u8 = 0,

    fn aNext(self: *DualIterator) u8 {
        const val = self.a[self.aPos];
        self.aPos += 1;
        return val;
    }

    fn bNext(self: *DualIterator) u8 {
        const val = self.b[self.bPos];
        self.bPos += 1;
        return val;
    }
};

fn symDiff(it: *DualIterator, out: []u8) !void {
    var pos: usize = 0;
    while (it.aPos < it.a.len and it.bPos < it.b.len) {
        if (comp(it.a[it.aPos], it.b[it.bPos])) {
            out[pos] = it.aNext();
            pos += 1;
        } else if (comp(it.b[it.bPos], it.a[it.aPos])) {
            out[pos] = it.bNext();
            pos += 1;
        } else {
            it.aPos += 1;
            it.bPos += 1;
        }
    }

    while (it.aPos < it.a.len) : (pos += 1) {
        out[pos] = it.aNext();
    }

    while (it.bPos < it.b.len) : (pos += 1) {
        out[pos] = it.bNext();
    }
}

fn comp(a: u8, b: u8) bool {
    return a < b;
}

pub fn main() !void {
    const allocator = std.heap.page_allocator;
    const writer = std.io.getStdOut().writer();

    var range1 = [_]u8{ 1, 2, 3, 4, 5, 5, 6, 7, 8, 11, 15, 15, 15 };
    var range2 = [_]u8{ 5, 5, 5, 7, 9, 10, 11, 11, 12, 15 };

    var out: []u8 = undefined;

    if (range1.len < range2.len) {
        out = try allocator.alloc(u8, range2.len);
    } else {
        out = try allocator.alloc(u8, range1.len);
    }

    var it = DualIterator{ .a = &range1, .b = &range2 };

    try symDiff(&it, out);

    try writer.print("> {any}\n", .{out});
}

Edit: I updated this so that it doesn’t use an ArrayList. I would prefer this method for simple console output, and it should be easy to adjust for different situations. Wouldn’t recommend it for highly optimized environments, but it would still be a good place to start.

2 Likes