Strange assembly generated by inline assembly

qwn · August 1, 2025, 1:49pm

So I’m trying to load my GDT on a x86_64 long mode emulator This is the inline assembly which I’ve written to reload the code and data segments

   asm volatile (
        \\xor %%rax, %%rax
        \\movw %[ds], %%ax
        \\mov %%ax, %%ds
        \\movw %%ax, %%ss
        \\movw %%ax, %%es
        \\movw %%ax, %%fs
        \\movw %%ax, %%gs
        \\push %[cs]
        \\push %[dummy]
        \\lretq
        :
        : [ds] "i" (DATA_SEG),
          [cs] "i" (CODE_SEG),
          [dummy] "r" (&dummy_ret),
        : "stack", "rax"
    );

Now the code and the data segments on x86_64 are 16bits which means the generated assembly should use some form of “ax” registers but dumping the disassembly, I can see that the generated disassembly is using eax

  400001ce3e:   48 31 c0                xor    %rax,%rax
  400001ce41:   66 b8 10 00             mov    $0x10,%ax
  400001ce45:   8e d8                   mov    %eax,%ds
  400001ce47:   8e d0                   mov    %eax,%ss
  400001ce49:   8e c0                   mov    %eax,%es
  400001ce4b:   8e e0                   mov    %eax,%fs
  400001ce4d:   8e e8                   mov    %eax,%gs
  400001ce4f:   6a 08                   push   $0x8
  400001ce51:   51                      push   %rcx
  400001ce52:   48 cb                   lretq

and sure enough the qemu triple faults on the same instruction

Triple fault
CPU Reset (CPU 0)
RAX=0000000000000010 RBX=0000000000000000 RCX=000000400001d640 RDX=0000004000055190
RSI=00af00000000ffff RDI=00000040000578a0 RBP=0000004000057760 RSP=0000004000057750
R8 =000000000000000a R9 =000000000000000a R10=0000000000000000 R11=000000400005767f
R12=0000000000000000 R13=000000001e280000 R14=000000001e9ee098 R15=000000001e14b018
RIP=000000400001ce45 RFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0

Am I missing something?

CuckooEXE · August 1, 2025, 3:01pm

EDIT: I’m a very silly person. Please ignore my response because I can’t read and just realized I did come up with the same result.

I tried compiling it myself and didn’t get your results. Here’s the code I used, compiled with zig build-exe test-zig.zig.

const DATA_SEG = 0x10; // Example data segment selector
const CODE_SEG = 0x08; // Example code segment selector

fn asmtest() void {
    var dummy_ret: u64 = 0;
    asm volatile (
        \\xor %%rax, %%rax
        \\movw %[ds], %%ax
        \\mov %%ax, %%ds
        \\movw %%ax, %%ss
        \\movw %%ax, %%es
        \\movw %%ax, %%fs
        \\movw %%ax, %%gs
        \\push %[cs]
        \\push %[dummy]
        \\lretq
        :
        : [ds] "i" (DATA_SEG),
          [cs] "i" (CODE_SEG),
          [dummy] "r" (&dummy_ret),
        : "stack", "rax"
    );
}

pub fn main() !void {
    asmtest();
}

And here’s the disassembly:

lldb -o 'disassemble --name test-zig.asmtest' test-zig
(lldb) target create "test-zig"
Current executable set to '/workspaces/zig-curl/test-zig' (x86_64).
(lldb) disassemble --name test-zig.asmtest
test-zig`test-zig.asmtest:
test-zig[0x10ddbe0] <+0>:  pushq  %rbp
test-zig[0x10ddbe1] <+1>:  movq   %rsp, %rbp
test-zig[0x10ddbe4] <+4>:  pushq  %rax
test-zig[0x10ddbe5] <+5>:  movq   $0x0, -0x8(%rbp)
test-zig[0x10ddbed] <+13>: leaq   -0x8(%rbp), %rcx
test-zig[0x10ddbf1] <+17>: xorq   %rax, %rax
test-zig[0x10ddbf4] <+20>: movw   $0x10, %ax
test-zig[0x10ddbf8] <+24>: movl   %eax, %ds
test-zig[0x10ddbfa] <+26>: movl   %eax, %ss
test-zig[0x10ddbfc] <+28>: movl   %eax, %es
test-zig[0x10ddbfe] <+30>: movl   %eax, %fs
test-zig[0x10ddc00] <+32>: movl   %eax, %gs
test-zig[0x10ddc02] <+34>: pushq  $0x8
test-zig[0x10ddc04] <+36>: pushq  %rcx
test-zig[0x10ddc05] <+37>: lretq
test-zig[0x10ddc07] <+39>: addq   $0x8, %rsp
test-zig[0x10ddc0b] <+43>: popq   %rbp
test-zig[0x10ddc0c] <+44>: retq

joed · August 1, 2025, 3:38pm

movl %eax, %ds should have the same effect as movw %ax, %ds but will encode as a byte shorter (that is if encoding it with a 16-bit prefix is even valid, I can’t remember).

What do your GDT and GDTR structures look like?