Control: Condition codes

Processor State(x86-64,Partial)

Information about currently executing program

  • Temporary data(%rax, …)
  • Location of runtime stack(%rsp)
  • Location of current code control point(%rip,…)
  • Status of recent tests(CF,ZF,SF,OF)

img

Condition Codes(Implicit Setting)

Single bit registers

Example: addq Src,Dest <-> t = a + b

  • CF Carry Flag(for unsigned): if carry out from most significant bit(unsigned overflow)
  • ZF Zero Flag: set if t == 0
  • SF Sign Flag(for signed): set if t < 0 (as signed)
  • OF Overflow Flag(for signed): set if two’s-complement(signed) overflow.
    • (a>0&&b>0&&t<0)(a<0&&b<0&&t>=0)(a>0\&\&b>0\&\&t<0)||(a<0\&\&b<0\&\&t>=0)

No set by leaq instruction

Condition Codes(Explicit Setting:Compare)

Explicit Setting by Compare Instruction

Cmpq Src2, Src1.

Cmpq b, a like computing a-b without setting destination

  • CF set if carry out from most significant bit (used for unsigned comparisons)
  • ZF set if a == b
  • SF set if (a-b) < 0 (as signed)
  • OF set if two’s-complemet(signed) overflow

Other: testq Src2, Src1

Reading Condition Codes

We can use a series of setX instructions to read flag registers’ values.

setX:Set single byte based on combination of condition codes

Like this:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#include <stdio.h>

int main() {
int value = 50;
char result; // 使用 char 类型来匹配 8 位寄存器

__asm__("testl %1, %1\n\t"
"setz %0"
: "=r"(result) // 约束:输出到8位寄存器
: "r"(value) // 约束:输入为32位寄存器
);

if (result) {
printf("Value is zero.\n");
} else {
printf("Value is non-zero.\n");
}

return 0;
}

testl: Used in assembly language to perform logical AND operations, but instead of saving the result, it reflects the state of the result in the flag register.

setz:set target register value by ZF.

img

Let’s look at another example.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#include <stdio.h>
int gt(long x, long y) {
return x > y;
}
int main() {
int z = gt(3,4);
return 0;
}
gt:
.LFB0:
pushq %rbp
movq %rsp, %rbp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq -8(%rbp), %rax
cmpq -16(%rbp), %rax # compare x:y
setg %al # Set when >
movzbl %al, %eax # Zero rest of %rax
popq %rbp
ret

Setg: ~(SF^OF)&~ZF

Conditional branches

jumping

jX instructions: jump to different parts of code depending on condition codes.

Conditional Branch Example(Old Style)

1
2
3
4
5
6
7
8
long absdiff(long x, long y) {
long result;
if(x > y)
result = x - y;
else
result = y - x;
return result;
}

gcc -s -Og -fno-if-conversion reading\ condition.c

1
2
3
4
5
6
7
8
9
10
11
absdiff:
.LFB23:
cmpq %rsi, %rdi # x:y
jle .L2
movq %rdi, %rax
subq %rsi, %rax
ret
.L2:
movq %rsi, %rax
subq %rdi, %rax
ret

Using conditional moves

1
2
3
4
5
6
7
8
9
10
11
absdiff:
.LFB23:
.cfi_startproc
endbr64
movq %rdi, %rdx # x
movq %rsi, %rax # y
subq %rsi, %rdx # eval = x - y
subq %rdi, %rax # reslut = y - x
cmpq %rsi, %rdi # x:y
cmovg %rdx, %rax # if >, result = eval
ret

Bad Cases for Conditional Move

Expensive Computations

1
val = Test(x) ? Hard1(x) : Hard2(x);

Both values get computed, Only makes sense when computations are very simple

Risky Computations

1
Val = p ? *p : 0;

Both values get computed, May have undesirable effects

Computations with side effects

1
Val = x > 0 ? x*=7 : x+=3;

Must be side-effect free

Loops

Do-While

C Code

1
2
3
4
5
6
7
8
long pcount_do(unsigned long x) {
long result = 0;
do {
result += x & 0x1;
x >>= 1;
} while(x);
return result;
}

Goto Version

1
2
3
4
5
6
7
8
long pcount_goto(unsigned long x) {
long result = 0;
loop:
result += x & 0x1;
x >>= 1;
if(x) goto loop;
return result;
}

Assembly code

1
2
3
4
5
6
7
8
        movl        $0, %eax    # result = 0
.L9: # loop:
movq %rdi, %rdx
andl $1, %edx # t = x & 0x1;
addq %rdx, %rax # result += t;
shrq %rdi # x >>= 1
jne .L9 # if (x) goto loop
ret

While

C Code

1
2
3
4
5
6
7
8
long pcount_while(unsigned long x) {
long result = 0;
while(x) {
result += x & 0x1;
x >>= 1;
}
return result;
}

Goto version

1
2
3
4
5
6
7
8
9
10
long pcount_goto_jtm(unsigned long x) {
long result = 0;
goto test;
loop:
result += x & 0x1;
x >> = 1;
test:
if(x) goto loop;
return result;
}

Used with -O1.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
// While Version
while(Test)
Body

// Do-While Version
if(!Test)
goto Done;
do
Body
while(Test);
done:
// Goto Version
if(!Test)
goto done;
loop:
Body
if (Test)
goto loop;
done:
// assembly
_Z12pcount_whilem:
.LFB1812:
testq %rdi, %rdi
je .L4
movl $0, %eax
.L3:
movq %rdi, %rdx
andl $1, %edx
addq %rdx, %rax
shrq %rdi
jne .L3
ret
.L4:
movl $0, %eax
ret

For

C Code

1
2
3
4
5
6
7
8
9
10
#define WSIZE 8*sizeof(int)
long pcount_while(unsigned long x) {
size_t i;
long result = 0;
for (int i = 0; i < WSIZE; i ++) {
unsigned bit = (x >> i) & 0x1;
result += bit;
}
return result;
}

For loop->while loop

1
2
3
4
5
6
7
8
9
10
// for version
for (Init; Test; Update)
Body;

// while version
Init;
while(Test) {
Body
Update;
}

Switch Statements

C Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
long switch_eg(long x, long y, long z) {
long w = 1;
switch(x) {
case 1:
w = y*z;
break;
case 2:
w = y/z;
case 3:
w += z;
break;
case 5:
case 6:
w -= 2;
break;
default:
w = 2;
}
return w;
}

Jump Table Structure

Switch Form

img

关于跳转表的生成条件,在我的测试中,上述代码不会生产跳转表,无论是-Og,-O1,-O2,还是-O3优化。在case5中加入一条语句(w+=y)后,再次编译,就生产了跳转表。这取决于gcc的具体实现。

Assembly Code

有跳转表

.L11 是跳转表的基地址,跳转表中存储了target代码块相对于.L11的偏移地址。

所以实现跳转的汇编代码的主要逻辑如下:

  1. leaq .L11(%rip), %rdx 取出.L11的地址
  2. movslq (%rdx,%rdi,4), %rax 基于.L11的基地址,计算出跳转表第idx个元素(里面存储的是target代码块相对于.L11的偏移,而不是target代码块的地址)的地址。rax = rdx + 4 * rdi
  3. addq %rdx, %rax 将rdx与rax相加=.L11基地址+target代码块于.LL11的偏移=target代码块地址
  4. Jmp *%rax 跳转到target代码块。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
_Z9switch_eglll:
.LFB1812:
.cfi_startproc
endbr64
movq %rdx, %rcx
cmpq $6, %rdi # x : 6
ja .L16 # use default
leaq .L11(%rip), %rdx
movslq (%rdx,%rdi,4), %rax
addq %rdx, %rax
notrack jmp *%rax # goto *JTab[x] 跳转表
.section .rodata
.align 4
.align 4
.L11:
.long .L16-.L11 # default
.long .L15-.L11 # case 1
.long .L14-.L11 # case 2
.long .L17-.L11 # case 3
.long .L16-.L11 # case 4
.long .L12-.L11 # case 5
.long .L18-.L11 # case 6
.text
.L15:
movq %rsi, %rax
imulq %rcx, %rax
ret
.L14:
movq %rsi, %rax
cqto
idivq %rcx
jmp .L13
.L17:
movl $1, %eax
.L13:
addq %rcx, %rax
ret
.L12:
leaq 1(%rsi), %rax
jmp .L10
.L18:
movl $1, %eax
.L10:
subq $2, %rax
ret
.L16:
movl $2, %eax
ret
.cfi_endproc