[vox-tech] Memory addressing?

Tue Jun 22 09:46:59 PDT 2010

On Tue, Jun 22, 2010 at 09:11:44AM -0700, Brian Lavender wrote:
> Can someone confirm what is correct? 
> 
> Tim and I were discussing memory addressing at Crepeville last night
> and we had a disagreement about how memory is addressable. I say that
> on today's common intel i386 32 bit architecture (in case you are one of
> those souls who builds your hardware from scratch), that memory is byte
> (octet) addressable. You can load a byte from memory into the lower 8
> bits of a register. Tim says that memory is only addressable on 32 bit
> word boundaries.
> 
> Say you look at memory in bits and then on the left is the memory
> address. 
> 
> I say that memory is normally byte addressable and the addressing
> corresponds to byte (octet) boundaries.
> 
> Address  bits
> 0        0     7      15     23     31
> 3        0     7      15     23     31
> 7        0     7      15     23     31
> 11       0     7      15     23     31
> 15       0     7      15     23     31
> 
> Tim says that memory is only 32 bit word addressable 
> 
> Address  bits
> 0        0     7      15     23     31
> 1        0     7      15     23     31
> 2        0     7      15     23     31
> 3        0     7      15     23     31
> 4        0     7      15     23     31

Consider the following program. The fact that you can get pointers to
arbitrary characers should be enough proof that the architecture is
byte addressable. (I don't know of any modern architectures that aren't
byte addressable, though MIPS takes some shortcuts in its various jump
instructions because the instructions have to be word-aligned.)

Now what can happen is that the computer crashes when dereferencing
the integer pointer, complaining that the access is unaligned -- the
addresses would still refer to individual bytes, but the computer
would crash if the two least significant bits were nonzero. MIPS
behaves this way, but since the following program works, you can see
that Intel doesn't even require word-aligned integer accesses.
(word-aligned integer accesses may be faster, but they don't require
special instructions to perform them).

And just to prove that the compiler isn't performing any funny
business, I included an assembly dump from this program.

#include <stdio.h>

int main(int argc, char** argv){
   char* mystring="ABCDEFGHIJKLMNOPQRSTUVWXYZ";
   int theint= *((int*)(mystring+1));
   printf("%x\n",theint);
}

$ ./test | xxd -r -p
EDCB

AMD64 assembly language:

        .file   "test.c"
        .section        .rodata
.LC0:
        .string "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
.LC1:
        .string "%x\n"
        .text
.globl main
        .type   main, @function
main:
.LFB0:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        movq    %rsp, %rbp
        .cfi_offset 6, -16
        .cfi_def_cfa_register 6
        subq    $32, %rsp
        movl    %edi, -20(%rbp)
        movq    %rsi, -32(%rbp)
        movq    $.LC0, -16(%rbp)
        movq    -16(%rbp), %rax
        addq    $1, %rax          ; we're adding 1 to an address 
                                  ; for one character
        movl    (%rax), %eax      ; and here we're dereferencing it
                                  ; successfully
        movl    %eax, -4(%rbp)
        movl    $.LC1, %eax
        movl    -4(%rbp), %edx
        movl    %edx, %esi
        movq    %rax, %rdi
        movl    $0, %eax
        call    printf
        leave
        ret
        .cfi_endproc
.LFE0:
        .size   main, .-main
        .ident  "GCC: (Debian 4.4.4-5) 4.4.4"
        .section        .note.GNU-stack,"", at progbits

Intel 32-bit x86 assembly language:

        .file   "test.c"
        .section        .rodata
.LC0:
        .string "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
.LC1:
        .string "%x\n"
        .text
.globl main
        .type   main, @function
main:
        pushl   %ebp
        movl    %esp, %ebp
        andl    $-16, %esp
        subl    $32, %esp
        movl    $.LC0, 24(%esp)
        movl    24(%esp), %eax
        addl    $1, %eax           ; we're adding 1 to an address
                                   ; for one character
        movl    (%eax), %eax       ; and we're loading from that 
                                   ; address without a special instruction
        movl    %eax, 28(%esp)
        movl    $.LC1, %eax
        movl    28(%esp), %edx
        movl    %edx, 4(%esp)
        movl    %eax, (%esp)
        call    printf
        leave
        ret
        .size   main, .-main
        .ident  "GCC: (Debian 4.4.4-5) 4.4.4"
        .section        .note.GNU-stack,"", at progbits

-- 
Chanoch (Ken) Bloom. PhD candidate. Linguistic Cognition Laboratory.
Department of Computer Science. Illinois Institute of Technology.
http://www.iit.edu/~kbloom1/