Unexpected Data Corruption?

Hello, I’m currently new at Zig and trying to implement a simple HTTP server from scratch.

A net.Server.Connection is passed to a thread that runs this function

const TestStruct = struct {
    name: []const u8,
    age: u16,
};

pub fn handle_connection(alloc: std.mem.Allocator, conn: net.Server.Connection) !void {
    std.debug.print("A client is connecting from {}\n", .{conn.address});

    const start = time.milliTimestamp();

    var req = try request.Request.init(alloc, conn.stream);
    const encoding = req.headers.get("Accept-Encoding") orelse "";

    var res = response.Response.init(alloc, encoding);

    var hit = req.headers.iterator();
    while (hit.next()) |k| {
        std.debug.print("{s}: {s}\n", .{ k.key_ptr.*, k.value_ptr.* });
    }

    const dummy_object = try req.json(
        alloc,
        TestStruct,
    );

    var hit2 = req.headers.iterator();
    while (hit2.next()) |k| {
        std.debug.print("{s}: {s}\n", .{ k.key_ptr.*, k.value_ptr.* });
    }

    _ = try res.status(HTTPStatus.ok).json(alloc, TestStruct, dummy_object);

    const serialized = try res.serialize(alloc);
    defer {
        req.deinit();
        res.deinit();
        alloc.free(serialized);
    }

    _ = try conn.stream.write(serialized);

    conn.stream.close();

    const end = time.milliTimestamp();

    std.debug.print("{s} {s} - {}ms\n", .{ @tagName(req.method), req.path, end - start });
}

A request is then handled using this struct

const StringMap = std.StringHashMap([]const u8);
const RequestLine = struct { method: core.HTTPMethod, path: []const u8, protocol_version: []const u8 };

const BUFFER_SIZE = 8192;

pub const Request = struct {
    method: core.HTTPMethod,

    path: []const u8,
    query: StringMap,

    protocol_version: []const u8,

    headers: StringMap,
    raw_body: []const u8,
    cookies: cookie.CookieManager,

    pub fn init(alloc: std.mem.Allocator, stream: net.Stream) !Request {
        var buffer: [BUFFER_SIZE]u8 = undefined;
        const recv = stream.read(&buffer) catch return core.HTTPError.MalformedRequest;
        const raw_data = buffer[0..recv];

        var it = mem.splitSequence(u8, raw_data, "\r\n");

        const req_line = try parseRequestLine(&it);
        const headers = try parseHeaders(alloc, &it);

        const header_end = mem.indexOf(u8, raw_data, "\r\n\r\n") orelse return core.HTTPError.MalformedRequest;
        const body_start = header_end + 4;

        const content_length_raw = headers.get("Content-Length") orelse "0";
        const content_length = fmt.parseInt(u64, content_length_raw, 10) catch return core.HTTPError.MalformedHeaders;

        var raw_body: []u8 = "";

        if (content_length > 0) {
            raw_body = try alloc.alloc(u8, content_length);

            const already_read = recv - body_start;
            if (already_read > 0) {
                @memcpy(raw_body[0..already_read], raw_data[body_start..recv]);
            }

            if (already_read < content_length) {
                _ = stream.read(raw_body[already_read..]) catch return core.HTTPError.MalformedRequest;
            }
        }

        const raw_cookie = headers.get("Cookie") orelse "";
        const cookies = try cookie.CookieManager.init(alloc, raw_cookie);

        const query = StringMap.init(alloc);

        return Request{
            .method = req_line.method,
            .path = req_line.path,
            .protocol_version = req_line.protocol_version,
            .headers = headers,
            .raw_body = raw_body,
            .cookies = cookies,
            .query = query,
        };
    }

    pub fn deinit(self: *Request) void {
        self.query.deinit();
        self.headers.deinit();
        self.cookies.deinit();
    }

    fn parseRequestLine(it: *std.mem.SplitIterator(u8, .sequence)) core.HTTPError!RequestLine {
        const line = it.next() orelse return core.HTTPError.MalformedRequest;

        var line_it = mem.splitSequence(u8, line, " ");

        const method = line_it.next() orelse return core.HTTPError.MalformedRequest;
        const host = line_it.next() orelse return core.HTTPError.MalformedRequest;
        const protocol_version = line_it.next() orelse return core.HTTPError.MalformedRequest;

        return RequestLine{
            .method = std.meta.stringToEnum(core.HTTPMethod, method) orelse return core.HTTPError.UnsupportedMethod,
            .path = host,
            .protocol_version = protocol_version,
        };
    }

    fn parseHeaders(alloc: std.mem.Allocator, it: *std.mem.SplitIterator(u8, .sequence)) !StringMap {
        var headers = StringMap.init(alloc);

        while (it.next()) |line| {
            if (line.len > 0) {
                var header_iterator = mem.splitSequence(u8, line, ": ");

                const key = header_iterator.next() orelse return core.HTTPError.MalformedRequest;
                const value = header_iterator.next() orelse return core.HTTPError.MalformedRequest;

                try headers.put(key, value);
            } else {
                break;
            }
        }

        return headers;
    }

    pub fn text(self: *Request) []const u8 {
        return self.raw_body;
    }

    pub fn json(self: *Request, alloc: std.mem.Allocator, comptime T: type) !T {
        const parsed = try std.json.parseFromSlice(T, alloc, self.raw_body, .{});
        defer parsed.deinit();

        return parsed.value;
    }

    pub fn formData(_: *Request, _: std.mem.Allocator, comptime T: type) !T {
        return error.NotImplemented;
    }
};

However, I started to notice that any string value is getting corrupted after .json calls.

Before

Content-Length: 37
User-Agent: insomnia/8.6.1
Content-Type: application/json
Accept: */*
Accept-Encoding: gzip
Host: localhost:6969

After

��_�:
0�_��: W��_��
��_���: ����_����_
�␦: (i�
��: �
:  �_�
� �

Unfortunately, I didn’t find any reason for the value to be corrupted. Am I missing something here?

For the allocator, I’m currently using GeneralPurposeAllocator.

You are returning pointers to stack memory:

        var buffer: [BUFFER_SIZE]u8 = undefined;

This buffer lives on the stack and becomes invalid after leaving the function.

const raw_data = buffer[0..recv];

raw_data points to the buffer.

var it = mem.splitSequence(u8, raw_data, "\r\n");

Splitting does not allocate, so any output produced by it will still point into the stack buffer.

        const req_line = try parseRequestLine(&it);
        const headers = try parseHeaders(alloc, &it);

In both of these functions you create data structures from the split results which point to the stack buffer.


        return Request{
            .method = req_line.method,
            .path = req_line.path,
            .protocol_version = req_line.protocol_version,
            .headers = headers,

And then you return these data structures, which, after returning, may point to invalid data at any point.

The solution to this would be to allocate a copy for each string that you want to store permanently. Or if you want to save on tedious small allocations you could also allocate a copy of the entire string, and store it in the struct to clean up after everything is done.

5 Likes