“The Old New Thing”

For this week’s lab, we’re asked to write assembler (both x86-64 and Aarch64) programs to generate the following printout:

Loop: 0
Loop: 1
Loop: 2
Loop: 3
Loop: 4
Loop: 5
Loop: 6
Loop: 7
Loop: 8
Loop: 9
Loop: 10
Loop: 11
Loop: 12
Loop: 13
Loop: 14
Loop: 15
Loop: 16
Loop: 17
Loop: 18
Loop: 19
Loop: 20
Loop: 21
Loop: 22
Loop: 23
Loop: 24
Loop: 25
Loop: 26
Loop: 27
Loop: 28
Loop: 29
Loop: 30

Note that if, on each of the first ten lines, we put a space between the digit and the newline, to the effect that all the output lines will have the same, predictable length, then we can simplify the logic of our programs a bit. But we won’t go that way; that is, on each and everyh output line there’ll be no extra spaces.

Here’s the x86-64 version:

/* loop.s - x86-64 version */
.section .text

.globl  _start
_start:

        /* write prefix to buffer */
        movq    $0,%rcx                /* # characters written to buffer */
        max = 31
strcopy:
        movb    prefix(%rcx),%bl
        movb    %bl,buffer(%rcx)
        inc     %rcx
        cmp     $len,%rcx
        jne     strcopy

        movq    $0,%rbx                 /* loop index in [0..max] */

        /* write the loop index to buffer as is */
loop:
        movq    %rcx,%r15               /* back up length of prefix */

        movq    %rbx,%rax
        movb    $10,%dl
        divb    %dl

        cmpb    $0,%al                  /* quotient */
        je      ones
        addb    $'0',%al
        movb    %al,buffer(%rcx)
        inc     %rcx
ones:
        addb    $'0',%ah                /* remainder */
        movb    %ah,buffer(%rcx)
        inc     %rcx

        movb    $'\n',buffer(%rcx)
        inc     %rcx

        movq    %rcx,%rdx               /* RCX contains length */
        movq    $buffer,%rsi            /* message location */
        movq    $1,%rdi                 /* file descriptor stdout */
        movq    $1,%rax                 /* syscall sys_write */
        syscall

        movq    %r15,%rcx               /* restore length of prefix */
        inc     %rbx                    /* increment index */
        cmp     $max,%rbx               /* see if we're done */
        jne     loop                    /* loop if we're not */

        movq    $0,%rdi                 /* exit status */
        movq    $60,%rax                /* syscall sys_exit */
        syscall

.section .rodata

prefix: .ascii  "Loop: "
len = . - prefix
maxLen = len + 3

.section .bss

buffer: .skip maxLen


And, the Aarch64 version:

/* loop.s - Aarch64 version */
.section .text
.globl _start
_start:
        mov     x12, #0          /* # characters writen to buffer */
        adr     x5, prefix
        adr     x4, buffer
strcopy:
        ldrb    w0, [x5, x12]
        strb    w0, [x4, x12]
        add     x12, x12, #1
        cmp     x12, #len
        bne     strcopy

        max = 31
        mov     x9, #0           /* loop index */
loop:
        mov     x11, x12         /* save # characters written */
        mov     x10, #10         /* divisor */
        udiv    x8, x9, x10      /* quotient in x8 */
        msub    x7, x8, x10, x9  /* remainder in x7 */

        cmp     x8, #0
        beq     ones

        add     x0, x8, #'0'
        strb    w0, [x4, x12]
        add     x12, x12, #1
ones:
        add     x0, x7, #'0'
        strb    w0, [x4, x12]
        add     x12, x12, #1

        mov     x0, #'\n'
        strb    w0, [x4, x12]
        add     x12, x12, #1

        mov     x0, 1           /* file descriptor: 1 is stdout */
        adr     x1, buffer      /* message location (memory address) */
        mov     x2, x12         /* message length (bytes) */
        mov     x8, 64          /* write is syscall #64 */
        svc     0               /* invoke syscall */

        mov     x12, x11
        add     x9, x9, #1
        cmp     x9, #max
        bne     loop

        mov     x0, 0           /* status -> 0 */
        mov     x8, 93          /* exit is syscall #93 */
        svc     0               /* invoke syscall */

.section .rodata
prefix: .ascii "Loop: "
len = . - prefix
maxLen = len + 3

.section .bss

buffer: .skip maxLen

For someone who has been exposed to C’s array access and pointer arithmetic, x86’s memory addressing is intuitive and, to some extent, straightforward. In contrast, Aarch64’s addressing scheme need much more getting used to.

Of course, both x86 and Aarch64 assemblers heva their idiosyncrasies. It’s interesting and rather dewildering to notice how these two architectures deal with 8-, 16-, 32-, and 64-bit data.

Leave a comment