CSE411

CSE411: Introduction to Compilers

Code Generation

Jooyong Yi (UNIST)

CSE411

Code Generation Example (Expression)

c = a + 3 - b   -- IR -->             ASSIGN           --- Assembly -->   MOVEQ a, R0
                                      /     \                             MOVEQ $3, R1
                                    ISUB     c                            ADDQ R0, R1
                                   /    \                                 MOVQ b, R0
                                IADD      b                               SUBQ R0, R1
                               /    \                                     MOVQ R1, c
                              a      3
CSE411

How to Generate Code?

c = a + 3 - b   -- IR -->             ASSIGN           --- Assembly -->   MOVEQ a, R0
                                      /     \                             MOVEQ $3, R1
                                    ISUB     c                            ADDQ R0, R1
                                   /    \                                 MOVQ b, R0
                                IADD      b                               SUBQ R0, R1
                               /    \                                     MOVQ R1, c
                              a      3
  • Post-order traversal of the IR tree
CSE411

How to Generate Code?

c = a + 3 - b   -- IR -->             ASSIGN           --- Assembly -->   MOVEQ a, R0
                                      /     \                             MOVEQ $3, R1
                                    ISUB     c                            ADDQ R0, R1
                                   /    \                                 MOVQ b, R0
                                IADD      b                               SUBQ R0, R1
                               /    \                                     MOVQ R1, c
                              a      3
void expr_codegen( struct expr *e ) {
  switch(e->kind) {
    case EXPR_ASSIGN:
      expr_codegen(e->left);
      printf("MOVQ %s, %s\n",
                scratch_name(e->left->reg),
                symbol_codegen(e->right->symbol));
      e->reg = e->left->reg;
      break;
  }                                                                                          
}
CSE411

How to Generate Code?

c = a + 3 - b   -- IR -->             ASSIGN           --- Assembly -->   MOVEQ a, R0
                                      /     \                             MOVEQ $3, R1
                                    ISUB     c                            ADDQ R0, R1
                                   /    \                                 MOVQ b, R0
                                IADD      b                               SUBQ R0, R1
                               /    \                                     MOVQ R1, c
                              a      3
void expr_codegen( struct expr *e ) {
  switch(e->kind) {
    case EXPR_SUB:
      expr_codegen(e->left);
      expr_codegen(e->right);
      printf("SUBQ %s, %s\n",
                scratch_name(e->left->reg),
                scratch_name(e->right->reg),                
      e->reg = e->right->reg;
      scratch_free(e->left->reg);
      break;
  }                                                                                          
}
CSE411

How to Generate Code?

c = a + 3 - b   -- IR -->             ASSIGN           --- Assembly -->   MOVEQ a, R0
                                      /     \                             MOVEQ $3, R1
                                    ISUB     c                            ADDQ R0, R1
                                   /    \                                 MOVQ b, R0
                                IADD      b                               SUBQ R0, R1
                               /    \                                     MOVQ R1, c
                              a      3
void expr_codegen( struct expr *e ) {
  switch(e->kind) {
    case EXPR_NAME:
      e->reg = scratch_alloc();
      printf("MOVQ %s, %s\n",
                symbol_codegen(e->symbol),
                scratch_name(e->reg));
      break;
  }                                                                                          
}
CSE411

Machine Specifics: Variables

c = a + 3 - b   -- IR -->             ASSIGN           --- Assembly -->   MOVEQ a, R0
                                      /     \                             MOVEQ $3, R1
                                    ISUB     c                            ADDQ R0, R1
                                   /    \                                 MOVQ b, R0
                                IADD      b                               SUBQ R0, R1
                               /    \                                     MOVQ R1, c
                              a      3
void expr_codegen( struct expr *e ) {
  switch(e->kind) {
    case EXPR_NAME:
      e->reg = scratch_alloc();
      printf("MOVQ %s, %s\n",
                symbol_codegen(e->symbol), // a: -8(%rbp) if a is the first parameter
                scratch_name(e->reg));
      break;
  }                                                                                          
}
CSE411

Machine Specifics: Stack Frames

          |            |  ↑ higher address
          |            |
          |------------| ← %rbp (base pointer or frame pointer)
          |    arg0    | ← -8(%rbp)
          |    arg1    | ← -16(%rbp)
          |     ...    |
          |   local0   |
          |   local1   |
          |            |
          |            |
          |------------| ← %rsp (stack pointer)
          |            |
          |            | ↓ lower address 
  • pushq R is equivalent to subq $8, %rsp; movq R, (%rsp)
  • popq R is equivalent to movq (%rsp), R; addq $8, %rsp
CSE411

Code Generation Example (Function Call)

a = f(10, b+c)   -- IR -->             ASSIGN           --- Assembly -->   MOVEQ $10, $rbx
                                      /     \                              MOVEQ b, $r10
                                    CALL     a                             MOVEQ c, $r11
                                   /    \                                  ADDQ  %r10, $r11
                                  f     ARG                                MOVQ  %r11, %rsi
                                       /   \                               MOVQ  %rbx, %rdi      
                                      10   ARG                             PUSHQ %r10
                                          /   \                            PUSHQ %r11
                                        IADD  (null)                       CALL  f
                                        /  \                               POPQ  %r11
                                       b    c                              POPQ  %r10
                                                                           MOVQ  %rax, %rbx
                                                                           MOVQ  %rbx, a
  • caller-saved registers: %r10, %r11
    • callees can use them without saving/restoring
CSE411

Code Generation Example (Function)

                                                     # function preamble
int compute(int a, int b, int c) {                   pushq %rbp          # save the base pointer
  int x = a+b+c;                                     movq  %rsp, %rbp    
  int y = x*5;                                       pushq %rdi          # save the first parameter (a)
  return y;                                          pushq %rsi          # save the second parameter (b)
}                                                    pushq %rdx          # save the third parameter (c)
                                                     subq  $16, %rsp     # allocate two more local variables
                                                     pushq %rbx          # save callee-saved registers
                                                     pushq %r12
                                                     pushq %r13
                                                     pushq %r14
                                                     pushq %r15
                                                     ...
CSE411

Code Generation Example (Function)

                                                     # function preamble
int compute(int a, int b, int c) {                   pushq %rbp          # save the base pointer
  int x = a+b+c;                                     movq  %rsp, %rbp    
  int y = x*5;                                       pushq %rdi          # save the first parameter (a)
  return y;                                          pushq %rsi          # save the second parameter (b)
}                                                    pushq %rdx          # save the third parameter (c)
                                                     subq  $16, %rsp     # allocate two more local variables
                                                     pushq %rbx          # save callee-saved registers
                                                     pushq %r12
                                                     pushq %r13
                                                     pushq %r14
                                                     pushq %r15
                                                     ...
          |            |  ↑ higher address
          |            |
          |  old %rbp  | ← %rbp (base pointer or frame pointer)
          |    arg0    | ← -8(%rbp)
          |    arg1    | ← -16(%rbp)
          |     ...    |
          |   local0   |
          |   local1   |
          | saved reg  |
          | saved reg  |
          |     ...    |
          |------------| ← %rsp (stack pointer)
          |            |
          |            | ↓ lower address                                                                          
CSE411

Code Generation Example (Function)

                                                     # function body
int compute(int a, int b, int c) {                   movq  -8(%rbp),  %rbx   # load each arg into a register
  int x = a+b+c;                                     movq  -16(%rbp), %rcx
  int y = x*5;                                       movq  -24(%rbp), %rdx
  return y;                                          addq  %rdx, %rcx
}                                                    addq  %rcx, %rbx
                                                     movq  %rbx, -32(%rbp)   # store the result into local 0 (x)
                                                     movq  -32(%rbp), %rbx   # load local 0 (x) into a register.
                                                     movq  $5, %rcx          # load 5 into a register
                                                     movq  %rbx, %rax        # move argument in rax
                                                     imulq %rcx              # %rax = %rax * %rcx
                                                     movq  %rax, -40(%rbp)   # store the result into local 1 (y)
                                                     ...
CSE411

Code Generation Example (Function)

                                                     # function epilogue
int compute(int a, int b, int c) {                   popq %r15               # restore callee-saved registers
  int x = a+b+c;                                     popq %r14
  int y = x*5;                                       popq %r13
  return y;                                          popq %r12
}                                                    popq %rbx
                                                     movq %rbp, %rsp         # reset stack to base pointer.
                                                     popq %rbp               # restore the old base pointer
                                                     ret                                                        
  • popq R is equivalent to movq (%rsp), R; addq $8, %rsp
          |            |  ↑ higher address
          |            |
          |  old %rbp  | ← %rbp (base pointer or frame pointer)
          |    arg0    | ← -8(%rbp)
          |    arg1    | ← -16(%rbp)
          |     ...    |
          |   local0   |
          |   local1   |
          | saved reg  |
          | saved reg  |
          |     ...    |
          |------------| ← %rsp (stack pointer)
          |            |
          |            | ↓ lower address                                                                          
CSE411

Code Generation Example (If Statement)

if (expr) {                              [[expr]]
  true-statements                        CMP $0, register
} else {                                 JE false-label
  false-statements                       [[true-statements]]
}                                        JMP done-label
                                       false-label:
                                         [[false-statements]]
                                       done-label:                                                               
CSE411

Code Generation Example (If Statement)

if (expr) {                              [[expr]]
  true-statements                        CMP $0, register
} else {                                 JE false-label
  false-statements                       [[true-statements]]
}                                        JMP done-label
                                       false-label:
                                         [[false-statements]]
                                       done-label:                                                               
void stmt_codegen( struct stmt *s ) {
   switch(s->kind) {
     case STMT_IF:
        int else_label = label_create();
        int done_label = label_create();
        expr_codegen(s->expr);
        printf("CMP $0, %s\n",scratch_name(s->expr->reg));
        scratch_free(s->expr->reg);
        printf("JE  %s\n",label_name(else_label));
        stmt_codegen(s->body);
        printf("JMP %s\n",label_name(done_label));
        printf("%s:\n",label_name(else_label));
        stmt_codegen(s->else_body);
        printf("%s:\n",label_name(done_label));
        break;
   }                                                                                                 
}