Skip to content

Commit 0537d14

Browse files
committedOct 10, 2021
Added vm tracer
1 parent b64c33f commit 0537d14

File tree

5 files changed

+1929
-0
lines changed

5 files changed

+1929
-0
lines changed
 

‎tracer/README.md

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# vm_disassembler_tracer.py
2+
3+
This script takes the values obtained from *miasm* while applying symbolic execution, to follow the control flow of the program and generating a *VM Stack* and *local variables* used during the execution of the binary.
4+
5+
This will be helpful to follow the execution of the obfuscated function, as the obfuscation works with a virtual stack, and virtual variables, we cannot easily watch this on a debugger, this tracer allows you to follow the execution watching after each instruction the result on stack and in variables.
6+
7+
* Example 1:
8+
9+
![Example1](example1.jpg)
10+
11+
* Example 2:
12+
13+
![Example2](example2.jpg)
14+
15+
## Execution output
16+
17+
You can see one of the outputs using as input parameter the value *5*, in the file *output.txt*.

‎tracer/example1.jpg

55.6 KB
Loading

‎tracer/example2.jpg

75.4 KB
Loading

‎tracer/output.txt

+1,612
Large diffs are not rendered by default.

‎tracer/vm_disassembler_tracer.py

+300
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
#!/usr/bin/python3
2+
import sys
3+
from miasm.analysis.binary import Container
4+
from miasm.analysis.machine import Machine
5+
from miasm.core.locationdb import LocationDB
6+
from miasm.expression.expression import *
7+
from miasm.expression.simplifications import expr_simp
8+
from miasm.ir.symbexec import SymbolicExecutionEngine
9+
10+
11+
# hardcoded list of VM handlers taken from the binary
12+
VM_HANDLERS = set([
13+
0x129e,
14+
0x1238,
15+
0x126d,
16+
0x11c4,
17+
0x1262,
18+
0x11a9,
19+
0x1245,
20+
0x11f1,
21+
0x11e1,
22+
0x1281,
23+
0x1226,
24+
])
25+
26+
27+
# program stack used for the operations
28+
stack = dict()
29+
# global variables used during stack operations
30+
# here will be stored the values extracted from
31+
# the stack
32+
g_vars = dict()
33+
# number used as parameter for the function
34+
fib_number = 1
35+
36+
37+
def constraint_memory(address, num_of_bytes):
38+
"""
39+
Reads `num_of_bytes` from the binary at a given address
40+
and builds symbolic formulas to pre-configure the symbolic
41+
execution engine for concolinc execution.
42+
"""
43+
global container
44+
# read bytes from binary
45+
byte_stream = container.bin_stream.getbytes(address, num_of_bytes)
46+
# build symbolic memory address
47+
sym_address = ExprMem(ExprInt(address, 64), num_of_bytes * 8)
48+
# build symbolic memory value
49+
sym_value = ExprInt(int.from_bytes(
50+
byte_stream, byteorder='little'), num_of_bytes * 8)
51+
52+
return sym_address, sym_value
53+
54+
55+
56+
def disassemble(sb, address):
57+
"""
58+
Callback to dump individual VM handler information,
59+
execution context etc.
60+
"""
61+
global stack
62+
global g_vars
63+
# fetch value of current virtual instruction pointer
64+
vip = sb.symbols[ExprId("RDX", 64)]
65+
vsp = sb.symbols[ExprId("RCX", 64)]
66+
var = sb.symbols[ExprId("RSI", 64)]
67+
r8 = sb.symbols[ExprId("R8",64)]
68+
69+
# catch the individual handlers and print execution context
70+
if int(address) == 0x129e:
71+
first_val = stack[list(stack.keys())[-1]]
72+
second_val = stack[list(stack.keys())[-2]]
73+
74+
print(f"{vip}: {second_val} <= {first_val} ", end="")
75+
76+
# check ExprInt values and push if true or false
77+
if (int(second_val) <= int(first_val)):
78+
print("(true)")
79+
stack[list(stack.keys())[-1]] = ExprInt(1,32)
80+
else:
81+
print("(false)")
82+
stack[list(stack.keys())[-1]] = ExprInt(0,32)
83+
84+
# clean one of the values from the stack
85+
stack.pop(list(stack.keys())[-2], None)
86+
87+
elif int(address) == 0x1238:
88+
print(f"{vip}: [VM_STK] = *VAR")
89+
# retrieve from the top of the stack, the address
90+
top_stack = stack[list(stack.keys())[-1]]
91+
# insert the value pointed by a global var.
92+
stack[list(stack.keys())[-1]] = g_vars[top_stack]
93+
94+
elif int(address) == 0x126d:
95+
96+
# get the address pointed by vip
97+
address = expr_simp((vip + ExprInt(1,64)).signExtend(64))
98+
# get now the number in that address
99+
number = expr_simp(sb.symbols[ExprMem(address, 32)])
100+
101+
print(f"{vip}: if [V_PC+1] == 0: push [r8] ({number} == 0)")
102+
103+
# if the number is equals to zero,
104+
# push a value from r8 and create it
105+
# as variable.
106+
107+
# R8 will hold a pointer to the parameter
108+
# of the fibonacci number
109+
if (number == ExprInt(0,32)):
110+
stack[vsp] = expr_simp(r8)
111+
g_vars[r8] = ExprInt(fib_number,32)
112+
113+
114+
elif int(address) == 0x11c4:
115+
116+
# get the address pointed by vip
117+
address = expr_simp((vip + ExprInt(1,64)).signExtend(64))
118+
# get now the number in that address
119+
number = expr_simp(sb.symbols[ExprMem(address, 32)])
120+
# address of var
121+
local_var = expr_simp(var+number.zeroExtend(64))
122+
123+
print(f"{vip}: PUSH ADDR [RSI+{number}] = {local_var} (push address of local variable)")
124+
125+
# push the address of local var on the stack
126+
stack[vsp] = local_var
127+
128+
elif int(address) == 0x1262:
129+
# get the address where to jump and
130+
# calculate target
131+
address = expr_simp(vip + ExprInt(1,64))
132+
offset = expr_simp(sb.symbols[ExprMem(address, 32)]).zeroExtend(64)
133+
target = expr_simp(address + offset)
134+
print(f"{vip}: GOTO {target}")
135+
elif int(address) == 0x11a9:
136+
137+
first_val = stack[list(stack.keys())[-1]]
138+
second_val = stack[list(stack.keys())[-2]]
139+
140+
print(f"{vip}: [VM_STACK] = {first_val} + {second_val}")
141+
142+
# remove the two values from the stack
143+
stack.pop(list(stack.keys())[-2], None)
144+
stack.pop(list(stack.keys())[-1], None)
145+
146+
# push the result
147+
stack[vsp] = expr_simp(first_val + second_val)
148+
149+
elif int(address) == 0x1245:
150+
print(f"{vip}: VM_RET")
151+
152+
result = stack[list(stack.keys())[-1]]
153+
print("================================")
154+
print(f"| Result value: {result} |")
155+
print("================================")
156+
157+
elif int(address) == 0x11f1:
158+
# retrieve the values from the stack
159+
first_val = stack[list(stack.keys())[-1]]
160+
second_val = stack[list(stack.keys())[-2]]
161+
# clean the stack
162+
stack.pop(list(stack.keys())[-1], None)
163+
stack.pop(list(stack.keys())[-1], None)
164+
165+
print(f"{vip}: {first_val} == {second_val} ", end="")
166+
167+
# push the result as 1 for true, or 0 for false
168+
if (first_val == second_val):
169+
print("(true)")
170+
stack[vsp] = ExprInt(1,32)
171+
else:
172+
print("(false)")
173+
stack[vsp] = ExprInt(0,32)
174+
175+
elif int(address) == 0x11e1:
176+
# calculate address for bytecode
177+
address = expr_simp(vip + ExprInt(1, 64))
178+
# read from bytecode
179+
constant = expr_simp(sb.symbols[ExprMem(address, 32)])
180+
181+
print(f"{vip}: PUSH {constant}")
182+
183+
# write value on stack
184+
stack[vsp] = constant
185+
186+
elif int(address) == 0x1281:
187+
188+
address = expr_simp(vip + ExprInt(1,64))
189+
constant = expr_simp(sb.symbols[ExprMem(address, 32)])
190+
target = expr_simp(vip + constant.zeroExtend(64) + ExprInt(1, 64))
191+
print(f"{vip}: VM_COND_JUMP {target} ", end="")
192+
193+
if stack[list(stack.keys())[-1]] == ExprInt(1,32):
194+
print("(taken)")
195+
else:
196+
print("(not taken)")
197+
198+
# clean the stack from the boolean value
199+
stack.pop(list(stack.keys())[-1], None)
200+
201+
elif int(address) == 0x1226:
202+
203+
print(f"{vip}: POP VAR")
204+
top_stack = stack[list(stack.keys())[-1]]
205+
value = stack[list(stack.keys())[-2]]
206+
# remove stack
207+
stack.pop(list(stack.keys())[-1], None)
208+
stack.pop(list(stack.keys())[-1], None)
209+
# create global vars
210+
g_vars[top_stack] = value
211+
212+
i = 0
213+
print("\t\t========STACK=========")
214+
for key, value in stack.items():
215+
if i == len(stack)-1:
216+
print(f"\t\tVM_SP => {key} ==> {value}")
217+
else:
218+
print(f"\t\t\t{key} ==> {value}")
219+
i += 1
220+
print("\t\t======================")
221+
222+
223+
print("\t\t========VARS=========")
224+
for key, value in g_vars.items():
225+
print(f"\t\t\t{key} ==> {value}")
226+
print("\t\t=====================")
227+
228+
# check arguments
229+
if len(sys.argv) != 2:
230+
print(f"[*] Syntax: {sys.argv[0]} <file>")
231+
exit()
232+
233+
# parse file path
234+
file_path = sys.argv[1]
235+
236+
# address of vm entry
237+
start_addr = 0x115a
238+
239+
# init symbol table
240+
loc_db = LocationDB()
241+
# read binary file
242+
container = Container.from_stream(open(file_path, 'rb'), loc_db)
243+
# get CPU abstraction
244+
machine = Machine(container.arch)
245+
# disassembly engine
246+
mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db)
247+
248+
# initialize lifter to intermediate representation
249+
lifter = machine.lifter_model_call(mdis.loc_db)
250+
251+
# disassemble the function at address
252+
asm_cfg = mdis.dis_multiblock(start_addr)
253+
254+
# translate asm_cfg into ira_cfg
255+
ira_cfg = lifter.new_ircfg_from_asmcfg(asm_cfg)
256+
257+
# init SE engine
258+
sb = SymbolicExecutionEngine(lifter)
259+
260+
# constraint bytecode -- start address and size (highest address - lowest address)
261+
sym_address, sym_value = constraint_memory(0x4060, 0x4140 - 0x4060)
262+
sb.symbols[sym_address] = sym_value
263+
264+
# constraint VM input (rdi, first function argument). The value in `ExprInt` rerpesents the function's input value.
265+
rdi = ExprId("RDI", 64)
266+
sb.symbols[rdi] = ExprInt(fib_number, 64)
267+
268+
sb.symbols[ExprId("RCX", 64)] = ExprId("VM_STACK", 64)
269+
sb.symbols[ExprId("RDX", 64)] = ExprId("VM_PC", 64)
270+
271+
# init worklist
272+
basic_block_worklist = [ExprInt(start_addr, 64)]
273+
274+
# worklist algorithm
275+
while basic_block_worklist:
276+
# get current block
277+
current_block = basic_block_worklist.pop()
278+
279+
# print(f"current block: {current_block}")
280+
281+
# if current block is a VM handler, dump handler-specific knowledge
282+
if current_block.is_int() and int(current_block) in VM_HANDLERS:
283+
disassemble(sb, current_block)
284+
285+
# symbolical execute block -> next_block: symbolic value/address to execute
286+
next_block = sb.run_block_at(ira_cfg, current_block, step=False)
287+
288+
# print(f"next block: {next_block}")
289+
290+
# is next block is integer or label, continue execution
291+
if next_block.is_int() or next_block.is_loc():
292+
basic_block_worklist.append(next_block)
293+
294+
# dump symbolic state
295+
# sb.dump()
296+
297+
# dump VMs/functions' return value -- only works if SE runs until the end
298+
# rax = ExprId("RAX", 64)
299+
# value = sb.symbols[rax]
300+
# print(f"VM return value: {value}")

0 commit comments

Comments
 (0)
Please sign in to comment.