Control: Condition codes
Processor State(x86-64,Partial)
Information about currently executing program
Temporary data(%rax, …)
Location of runtime stack(%rsp)
Location of current code control point(%rip,…)
Status of recent tests(CF,ZF,SF,OF)
Condition Codes(Implicit Setting)
Single bit registers
Example: addq Src,Dest <-> t = a + b
CF Carry Flag(for unsigned): if carry out from most significant bit(unsigned overflow)
ZF Zero Flag: set if t == 0
SF Sign Flag(for signed): set if t < 0 (as signed)
OF Overflow Flag(for signed): set if two’s-complement(signed) overflow.
No set by leaq instruction
Condition Codes(Explicit Setting:Compare)
Explicit Setting by Compare Instruction
Cmpq Src2, Src1.
Cmpq b, a like computing a-b without setting destination
CF set if carry out from most significant bit (used for unsigned comparisons)
ZF set if a == b
SF set if (a-b) < 0 (as signed)
OF set if two’s-complemet(signed) overflow
Other: testq Src2, Src1
Reading Condition Codes
We can use a series of setX instructions to read flag registers’ values.
setX:Set single byte based on combination of condition codes
Like this:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 #include <stdio.h> int main () { int value = 50 ; char result; __asm__("testl %1, %1\n\t" "setz %0" : "=r" (result) : "r" (value) ); if (result) { printf ("Value is zero.\n" ); } else { printf ("Value is non-zero.\n" ); } return 0 ; }
testl: Used in assembly language to perform logical AND operations, but instead of saving the result, it reflects the state of the result in the flag register.
setz:set target register value by ZF.
Let’s look at another example.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 #include <stdio.h> int gt (long x, long y) { return x > y; } int main () { int z = gt (3 ,4 ); return 0 ; } gt: .LFB0: pushq %rbp movq %rsp, %rbp movq %rdi, -8 (%rbp) movq %rsi, -16 (%rbp) movq -8 (%rbp), %rax cmpq -16 (%rbp), %rax # compare x:y setg %al # Set when > movzbl %al, %eax # Zero rest of %rax popq %rbp ret
Setg: ~(SF^OF)&~ZF
Conditional branches
jumping
jX instructions: jump to different parts of code depending on condition codes.
Conditional Branch Example(Old Style)
1 2 3 4 5 6 7 8 long absdiff (long x, long y) { long result; if (x > y) result = x - y; else result = y - x; return result; }
gcc -s -Og -fno-if-conversion reading\ condition.c
1 2 3 4 5 6 7 8 9 10 11 absdiff: .LFB23: cmpq %rsi, %rdi # x:y jle .L2 movq %rdi, %rax subq %rsi, %rax ret .L2: movq %rsi, %rax subq %rdi, %rax ret
Using conditional moves
1 2 3 4 5 6 7 8 9 10 11 absdiff: .LFB23: .cfi_startproc endbr64 movq %rdi, %rdx # x movq %rsi, %rax # y subq %rsi, %rdx # eval = x - y subq %rdi, %rax # reslut = y - x cmpq %rsi, %rdi # x:y cmovg %rdx, %rax # if >, result = eval ret
Bad Cases for Conditional Move
Expensive Computations
1 val = Test(x) ? Hard1(x) : Hard2(x);
Both values get computed, Only makes sense when computations are very simple
Risky Computations
Both values get computed, May have undesirable effects
Computations with side effects
1 Val = x > 0 ? x*=7 : x+=3 ;
Must be side-effect free
Loops
Do-While
C Code
1 2 3 4 5 6 7 8 long pcount_do (unsigned long x) { long result = 0 ; do { result += x & 0x1 ; x >>= 1 ; } while (x); return result; }
Goto Version
1 2 3 4 5 6 7 8 long pcount_goto (unsigned long x) { long result = 0 ; loop: result += x & 0x1 ; x >>= 1 ; if (x) goto loop; return result; }
Assembly code
1 2 3 4 5 6 7 8 movl $0, %eax # result = 0 .L9: # loop: movq %rdi, %rdx andl $1, %edx # t = x & 0x1; addq %rdx, %rax # result += t; shrq %rdi # x >>= 1 jne .L9 # if (x) goto loop ret
While
C Code
1 2 3 4 5 6 7 8 long pcount_while (unsigned long x) { long result = 0 ; while (x) { result += x & 0x1 ; x >>= 1 ; } return result; }
Goto version
1 2 3 4 5 6 7 8 9 10 long pcount_goto_jtm (unsigned long x) { long result = 0 ; goto test; loop: result += x & 0x1 ; x >> = 1 ; test: if (x) goto loop; return result; }
Used with -O1.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 // While Version while(Test) Body // Do-While Version if(!Test) goto Done; do Body while(Test); done: // Goto Version if(!Test) goto done; loop: Body if (Test) goto loop; done: // assembly _Z12pcount_whilem: .LFB1812: testq %rdi, %rdi je .L4 movl $0, %eax .L3: movq %rdi, %rdx andl $1, %edx addq %rdx, %rax shrq %rdi jne .L3 ret .L4: movl $0, %eax ret
For
C Code
1 2 3 4 5 6 7 8 9 10 #define WSIZE 8*sizeof(int) long pcount_while (unsigned long x) { size_t i; long result = 0 ; for (int i = 0 ; i < WSIZE; i ++) { unsigned bit = (x >> i) & 0x1 ; result += bit; } return result; }
For loop->while loop
1 2 3 4 5 6 7 8 9 10 for (Init; Test; Update) Body; Init; while (Test) { Body Update; }
Switch Statements
C Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 long switch_eg (long x, long y, long z) { long w = 1 ; switch (x) { case 1 : w = y*z; break ; case 2 : w = y/z; case 3 : w += z; break ; case 5 : case 6 : w -= 2 ; break ; default : w = 2 ; } return w; }
Jump Table Structure
Switch Form
关于跳转表的生成条件,在我的测试中,上述代码不会生产跳转表,无论是-Og,-O1,-O2,还是-O3优化。在case5中加入一条语句(w+=y)后,再次编译,就生产了跳转表。这取决于gcc的具体实现。
Assembly Code
有跳转表
.L11 是跳转表的基地址,跳转表中存储了target代码块相对于.L11的偏移地址。
所以实现跳转的汇编代码的主要逻辑如下:
leaq .L11(%rip), %rdx 取出.L11的地址
movslq (%rdx,%rdi,4), %rax 基于.L11的基地址,计算出跳转表第idx个元素(里面存储的是target代码块相对于.L11的偏移,而不是target代码块的地址)的地址。rax = rdx + 4 * rdi
addq %rdx, %rax 将rdx与rax相加=.L11基地址+target代码块于.LL11的偏移=target代码块地址
Jmp *%rax 跳转到target代码块。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 _Z9switch_eglll: .LFB1812: .cfi_startproc endbr64 movq %rdx, %rcx cmpq $6, %rdi # x : 6 ja .L16 # use default leaq .L11(%rip), %rdx movslq (%rdx,%rdi,4), %rax addq %rdx, %rax notrack jmp *%rax # goto *JTab[x] 跳转表 .section .rodata .align 4 .align 4 .L11: .long .L16-.L11 # default .long .L15-.L11 # case 1 .long .L14-.L11 # case 2 .long .L17-.L11 # case 3 .long .L16-.L11 # case 4 .long .L12-.L11 # case 5 .long .L18-.L11 # case 6 .text .L15: movq %rsi, %rax imulq %rcx, %rax ret .L14: movq %rsi, %rax cqto idivq %rcx jmp .L13 .L17: movl $1, %eax .L13: addq %rcx, %rax ret .L12: leaq 1(%rsi), %rax jmp .L10 .L18: movl $1, %eax .L10: subq $2, %rax ret .L16: movl $2, %eax ret .cfi_endproc