# Angr 学习

学习 angr 借鉴了 Syclover 战队大佬 SYJ 的笔记 https://agate-colony-3f5.notion.site/angr_ctf-8a26aa74a4a74c428ef6129e2df6dd02

# Angr

angr 是一个二进制分析框架,它可以用于静态和动态分析,以及漏洞发现等方面。它被设计用来处理二进制文件,包括 ELF,PE 和 Mach-O 等文件格式。

angr 提供了一个 Python API,可以用来编写脚本来自动化分析过程。它支持多种分析技术,包括符号执行,动态符号执行和模糊测试等。它还提供了一些有用的工具,如符号执行器、符号内存和符号寄存器等,以帮助用户进行深入的分析。

angr 的一个优点是它可以在不需要人工干预的情况下进行自动化分析。这使其成为安全研究人员和漏洞猎人的有用工具,他们可以使用 angr 来自动发现漏洞并生成利用代码。

# 使用 angr 的大概步骤

  • 创建 project
  • 设置 state
  • 新建 符号量 : BVS (bitvector symbolic) 或 BVV (bitvector value) [约束求解时的自变量,可类比 z3]
  • 把符号量设置到内存或者其他地方 (这两步非必须)
  • 设置 Simulation Managers , 进行路径探索的对象
  • 运行,探索满足路径需要的值
  • 约束求解,获取执行结果

# 00_angr_find

import angr
import sys
# 加载可执行文件路径
path_to_binary = "./00_angr_find"
project = angr.Project(path_to_binary)
# 找到入口
initial_state = project.factory.entry_state()
# 创建模拟管理器,支持搜索和执行
simulation = project.factory.simgr(initial_state)
# 设置要进入的位置:integer (probably in hexadecimal)
simulation.explore(find=0x08048678)
# 查看结果状态
solution_state = simulation.found[0]
# 输出结果
print(solution_state.posix.dumps(sys.stdin.fileno()))

image-20230407212823955

image-20230407212849218

# 01_angr_avoid

使用.explore () 方法的另一个参数 avoid 来回避 avoid_me 函数即可 (如果 IDA 按 TAB 会提示 main 函数过大)image-20230410164448520

直接找到 2 个函数的地址就能写脚本了

import angr
import sys
# 加载可执行文件路径
path_to_binary = "./01_angr_avoid"
project = angr.Project(path_to_binary)
# 找到入口
initial_state = project.factory.entry_state()
# 创建模拟管理器,支持搜索和执行
simulation = project.factory.simgr(initial_state)
# 设置要进入的位置,回避的函数
simulation.explore(find=0x80485f7,avoid=0x80485bf)
# 查看结果状态
solution_state = simulation.found[0]
# 输出结果
print(solution_state.posix.dumps(sys.stdin.fileno()))

结果:

root@mou-virtual-machine:/home/mou/桌面/angr_ctf-master/solutions/01_angr_avoid#
 python3 try.py
WARNING  | 2023-04-10 16:51:27,641 | angr.storage.memory_mixins.default_filler_mixin | The program is accessing register with an unspecified value. This could indicate unwanted behavior.
WARNING  | 2023-04-10 16:51:27,641 | angr.storage.memory_mixins.default_filler_mixin | angr will cope with this by generating an unconstrained symbolic variable and continuing. You can resolve this by:
WARNING  | 2023-04-10 16:51:27,642 | angr.storage.memory_mixins.default_filler_mixin | 1) setting a value to the initial state
WARNING  | 2023-04-10 16:51:27,642 | angr.storage.memory_mixins.default_filler_mixin | 2) adding the state option ZERO_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to make unknown regions hold null
WARNING  | 2023-04-10 16:51:27,642 | angr.storage.memory_mixins.default_filler_mixin | 3) adding the state option SYMBOL_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to suppress these messages.
WARNING  | 2023-04-10 16:51:27,642 | angr.storage.memory_mixins.default_filler_mixin | Filling register edi with 4 unconstrained bytes referenced from 0x80d45b1 (__libc_csu_init+0x1 in 01_angr_avoid (0x80d45b1))
WARNING  | 2023-04-10 16:51:31,839 | angr.storage.memory_mixins.default_filler_mixin | Filling memory at 0x7ffeff3d with 11 unconstrained bytes referenced from 0x819b3b0 (strncmp+0x0 in libc.so.6 (0x9b3b0))
WARNING  | 2023-04-10 16:51:31,839 | angr.storage.memory_mixins.default_filler_mixin | Filling memory at 0x7ffeff60 with 4 unconstrained bytes referenced from 0x819b3b0 (strncmp+0x0 in libc.so.6 (0x9b3b0))
b'JLVUSGJZ'
root@mou-virtual-machine:/home/mou/桌面/angr_ctf-master/solutions/01_angr_avoid#
 ./01_angr_avoid
Enter the password: JLVUSGJZ
Good Job.

# 02_angr_find_condition

学习根据程序本身的输出来告诉 angr 应避免或保留的内容,而不是每次都找打印错误提示的函数地址

import angr
import sys
def Go():
    path_to_binary = "./02_angr_find_condition"
    project = angr.Project(path_to_binary, auto_load_libs=False)
    initial_state = project.factory.entry_state()
    simulation = project.factory.simgr(initial_state)
    def is_successful(state):
        stdout_output = state.posix.dumps(sys.stdout.fileno())#输出内容放到变量中
        if b'Good Job.' in stdout_output:
            return True
        else:
            return False
    def should_abort(state):
        stdout_output = state.posix.dumps(sys.stdout.fileno())
        if b'Try again.' in  stdout_output:
            return True
        else:
            return False
    simulation.explore(find=is_successful, avoid=should_abort)
    if simulation.found:
        solution_state = simulation.found[0]
        solution = solution_state.posix.dumps(sys.stdin.fileno())
        print("Solution is: {}".format(solution.decode("utf-8")))
    else:
        raise Exception('Could not find the solution')
if __name__ == "__main__":
    Go()

注意,这不是字符串,而是字节对象,这意味着我们必须使用 b’Good Job.' 而不是仅 "Good Job." 来检查我们是否正确输出了 “Good Job.”

# 03_angr_simbolic_registers

学会符号化寄存器

import sys
import angr
import claripy
def is_successful(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    if b'Good Job.\n' in stdout_output:
        return True
    else:
        return False
def should_abort(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    if b'Try again.\n' in stdout_output:
        return True
    else:
        return False
def Go():
    path_to_binary = "./03_angr_symbolic_registers"
    project = angr.Project(path_to_binary)
    start_address = 0x80488c7 
    initial_state = project.factory.blank_state(addr=start_address)
    passwd_size_in_bits = 32
    passwd0 = claripy.BVS('passwd0', passwd_size_in_bits)
    passwd1 = claripy.BVS('passwd1', passwd_size_in_bits)
    passwd2 = claripy.BVS('passwd2', passwd_size_in_bits)
    initial_state.regs.eax = passwd0
    initial_state.regs.ebx = passwd1
    initial_state.regs.edx = passwd2
    simulation = project.factory.simgr(initial_state)
    simulation.explore(find=is_successful, avoid=should_abort)
    if simulation.found:
        for i in simulation.found:
            solution_state = i
            solution0 = format(solution_state.solver.eval(passwd0), 'x')
            solution1 = format(solution_state.solver.eval(passwd1), 'x')
            solution2 = format(solution_state.solver.eval(passwd2), 'x')
            solution = solution0 + " " + solution1 + " " + solution2
            print("[+] Success! Solution is: {}".format(solution))
            # print(simgr.found[0].posix.dumps(0))
    else:
        raise Exception('Could not find the solution')
if __name__ == '__main__':
    Go()

这里用到了 claripy 模块,claripy 主要将变量符号化,生成约束式并求解约束式,这也是符号执行的核心所在,在 angr 中主要是利用微软提供的 z3 库去解约束式。

# 04_angr_symbolic_stack

学习符号化栈区 (从之前的寄存器传参变成了利用栈空间传参)

image-20230411205729603

手动布置栈

import angr
import claripy
filename = "04_angr_symbolic_stack"
p = angr.Project(filename)
startAddr = 0x080486AE
init_state = p.factory.blank_state(addr=startAddr) 
init_state.stack_push(init_state.regs.ebp)
init_state.regs.ebp = init_state.regs.esp
init_state.regs.esp -= 8
pass1 = init_state.solver.BVS('pass1', 32)
pass2 = init_state.solver.BVS('pass2', 32)
init_state.stack_push(pass1)
init_state.stack_push(pass2)
sm = p.factory.simulation_manager(init_state)
def is_good(state):
    return b'Good Job' in state.posix.dumps(1)
def is_bad(state):
    return b'Try again' in state.posix.dumps(1)
sm.explore(find=is_good, avoid=is_bad)
if sm.found:
    found_state = sm.found[0]
    passwodr1 = found_state.solver.eval(pass1)
    passwodr2 = found_state.solver.eval(pass2)
    print("Solution:",int(passwodr1), int(passwodr2))
else:
    raise Exception("No solution found!")

# 05_angr_symbolic_memory

学习符号化内存 (符号化.bss 段)

4 个输入字符串存于内存地址

import angr
import claripy
def is_successful(state):
    stdout_output = state.posix.dumps(1)
    if b'Good Job.\n' in stdout_output:
        return True
    else:
        return False
def should_abort(state):
    stdout_output = state.posix.dumps(1)
    if b'Try again.\n' in stdout_output:
        return True
    else:
        return False
def Go():
    path_to_binary = "./05_angr_symbolic_memory"
    project = angr.Project(path_to_binary, auto_load_libs=False)
    start_address = 0x08048618
    initial_state = project.factory.blank_state(addr=start_address)
    passwd_size_in_bits = 64
    passwd0 = claripy.BVS('passwd0', passwd_size_in_bits)
    passwd1 = claripy.BVS('passwd1', passwd_size_in_bits)
    passwd2 = claripy.BVS('passwd2', passwd_size_in_bits)
    passwd3 = claripy.BVS('passwd3', passwd_size_in_bits)
    passwd0_address = 0x0AB232C0 # user_input addr
    initial_state.memory.store(passwd0_address, passwd0)
    initial_state.memory.store(passwd0_address + 0x8, passwd1)
    initial_state.memory.store(passwd0_address + 0x10, passwd2)
    initial_state.memory.store(passwd0_address + 0x18, passwd3)
    simulation = project.factory.simgr(initial_state)
    simulation.explore(find=is_successful, avoid=should_abort)
    if simulation.found:
        for i in simulation.found:
            solution_state = i
            solution0 = solution_state.solver.eval(passwd0, cast_to=bytes)
            solution1 = solution_state.solver.eval(passwd1, cast_to=bytes)
            solution2 = solution_state.solver.eval(passwd2, cast_to=bytes)
            solution3 = solution_state.solver.eval(passwd3, cast_to=bytes)
            solution = solution0 + b" " + solution1 + b" " + solution2 + b" " + solution3
            print("[+] Success! Solution is: {}".format(solution.decode("utf-8")))
            # print(solution0, solution1, solution2, solution3)
    else:
        raise Exception('Could not find the solution')
if __name__ == "__main__":
    Go()

[+] Success! Solution is: OJQVXIVX LLEAOODW UVCWUVVC AJXJMVKA

# 06_angr_symbolic_dynamic_memory

这题主要是学会符号化动态内存,这个题与上题没有太大区别,除了字符串的内存是通过堆 malloc () 而不是堆栈分配的

import angr
import claripy
import sys
def main(argv):
  path_to_binary = "./06_angr_symbolic_dynamic_memory"
  project = angr.Project(path_to_binary)
  start_address = 0x80486af
  initial_state = project.factory.blank_state(
    addr=start_address,
    add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
                    angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
  )
  password0 = claripy.BVS('password0', 8*8)
  password1 = claripy.BVS('password0', 8*8)
  fake_heap_address0 = 0x4444444
  pointer_to_malloc_memory_address0 = 0xa2def74
  initial_state.memory.store(pointer_to_malloc_memory_address0, fake_heap_address0, endness=project.arch.memory_endness, size=4)
  fake_heap_address1 = 0x4444454
  pointer_to_malloc_memory_address1 = 0xa2def7c
  initial_state.memory.store(pointer_to_malloc_memory_address1, fake_heap_address1, endness=project.arch.memory_endness, size=4)
  initial_state.memory.store(fake_heap_address0, password0)
  initial_state.memory.store(fake_heap_address1, password1)
  simulation = project.factory.simgr(initial_state)
  def is_successful(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    return 'Good Job.'.encode() in stdout_output
  def should_abort(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    return 'Try again.'.encode() in stdout_output
  simulation.explore(find=is_successful, avoid=should_abort)
  if simulation.found:
    solution_state = simulation.found[0]
    solution0 = solution_state.solver.eval(password0,cast_to=bytes).decode()
    solution1 = solution_state.solver.eval(password1,cast_to=bytes).decode()
    solution = ' '.join([ solution0, solution1 ])
    print(solution)
  else:
    raise Exception('Could not find the solution')
if __name__ == '__main__':
  main(sys.argv)

实际上,(IDA 中) 可以看到在两次调用之后,根据 mov ds:buffer0, eax 和 mov ds:buffer1, eax 得知开辟后的缓冲区被复制到标识为 buffer0 和 buffer1 的两个存储区中

(这里总的逻辑是这样的,之前是 buffer 指向的是 malloc 分配好的内存地址,string 存在这里。现在是 buffer 指向的是我们伪造的地址,符号位向量存在这里)

将两个符号变量存储到一个预先分配的 fake_heap_address0 和 fake_heap_address1 地址中,这些地址是未使用的堆内存地址,Angr 使用 initial_state.memory.store 函数将符号变量存储到指定地址中。通过修改指向 malloc 分配的内存地址的指针,指向 fake_heap_address0 和 fake_heap_address1,从而欺骗程序,使其认为输入的密码已经存储在 malloc 分配的内存中。

# 07_angr_symbolic_file

学习如何符号化一个文件里面的内容

# 1. 确定 fread 读取的文件
# 2. 使用 Angr 模拟一个文件系统,让读取的文件被我们自己的文件替换
# 3. 使用符号值初始化文件,该值将使用 fread 读取
# 4. 求解符号输入以确定密码
import angr
import claripy
import sys
def main(argv):
  path_to_binary = "./07_angr_symbolic_file"
  project = angr.Project(path_to_binary)
  start_address = 0x80488bc
  initial_state = project.factory.blank_state(
    addr=start_address,
    add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
                    angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
  )
  filename = 'FOQVSBZB.txt'  # :string
  symbolic_file_size_bytes = 8
  password = claripy.BVS('password', symbolic_file_size_bytes * 8)
  password_file = angr.storage.SimFile(filename, content=password)
  
  initial_state.fs.insert(filename, password_file)
  simulation = project.factory.simgr(initial_state)
  def is_successful(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    return 'Good Job.'.encode() in stdout_output
  def should_abort(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    return 'Try again.'.encode() in stdout_output
  simulation.explore(find=is_successful, avoid=should_abort)
  if simulation.found:
    solution_state = simulation.found[0]
    solution = solution_state.solver.eval(password,cast_to=bytes).decode()
    print(solution)
  else:
    raise Exception('Could not find the solution')
if __name__ == '__main__':
  main(sys.argv)

简单来说利用 SimFile 形成符号化的文件的格式:

simgr_file = angr.storage.SimFile(filename, content=xxxxxx, size=file_size)

该代码使用了 Angr 工具模拟了一个文件系统,以解决密码验证的问题。主要流程如下:

  1. 使用 Angr 加载二进制文件,并指定程序入口地址。
  2. 使用 initial_state.fs.insert 函数向模拟文件系统中添加一个名为’FOQVSBZB.txt’的文件,并使用 claripy.BVS 定义了一个名为 password 的符号变量,表示密码。
  3. 将符号变量 password 存储到模拟文件系统的’FOQVSBZB.txt’文件中,作为文件的内容。
  4. 使用 Angr 的 simgr 对象,对初始状态进行符号执行,通过指定的 find 和 avoid 条件,找到满足条件的状态。
  5. 如果找到满足条件的状态,则从该状态中提取出 password 的解,输出解。
  6. 如果无法找到满足条件的状态,则抛出异常。

该代码的核心思想是通过符号执行,对程序进行模拟,并在模拟的文件系统中添加一个文件,并将符号变量存储到该文件中。因此,当程序调用 fread 函数从文件中读取密码时,实际上读取的是符号变量,Angr 可以通过符号执行来解决密码验证的问题。

# 08_angr_constraints

学习通过添加约束条件来解决路径爆炸问题

# 路径爆炸

通过我们之前的学习体验感觉到 angr 这么强大的应用怎么没有在实际的测试生产中大规模应用,这是因为给符号执行技术在复杂程序的测试案例生成的应用中造成阻碍的两个大问题:一个是约束求解问题,另一个就是路径爆炸问题 (for 中 if)

所谓符号执行就是把程序中的变量符号化去模拟程序运行,搜集路径约束条件并使用约束求解器对其进行求解后得到结果。

当一个程序存在循环结构时,即使逻辑十分简单也可能会产生规模十分巨大的执行路径。在符号执行的过程中,每个分支点都会产生两个实例,当程序中存在循环结构展开时,可能会导致程序分支路径数呈指数级增长,即路径爆炸问题。故我们需要提供更多的约束条件控制路径爆炸问题。

# 约束求解

在 angr 中提供了可以用加入一个约束条件到一个 state 中的方法(state.solver.add),将每一个符号化的布尔值作为一个关于符号变量合法性的断言。之后可以通过使用 state.solver.eval (symbol) 对各个断言进行评测来求出一个合法的符号值(若有多个合法值,返回其中的一个)。简单来说就是通过 .add 对 state 对象添加约束,并使用 .eval 接口求解,得到符号变量的可行解

import angr
import claripy
import sys
def main(argv):
  path_to_binary = "./08_angr_constraints"
  project = angr.Project(path_to_binary)
  start_address = 0x804863c
  initial_state = project.factory.blank_state(
    addr=start_address,
    add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
                    angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
  )
  password = claripy.BVS('password', 8*16)
  password_address = 0x804a040
  initial_state.memory.store(password_address, password)
  simulation = project.factory.simgr(initial_state)
  # Angr will not be able to reach the point at which the binary prints out
  # 'Good Job.'. We cannot use that as the target anymore.
  # (!)
  address_to_check_constraint = 0x8048683
  simulation.explore(find=address_to_check_constraint)
  if simulation.found:
    solution_state = simulation.found[0]
    constrained_parameter_address = 0x804a040
    constrained_parameter_size_bytes = 16
    constrained_parameter_bitvector = solution_state.memory.load(
      constrained_parameter_address,
      constrained_parameter_size_bytes
    )
    # We want to constrain the system to find an input that will make
    # constrained_parameter_bitvector equal the desired value.
    # (!)
    constrained_parameter_desired_value = 'OSIWHBXIFOQVSBZB'.encode() # :string (encoded)
  
    solution_state.add_constraints(constrained_parameter_bitvector == constrained_parameter_desired_value)
    solution = solution_state.solver.eval(password,cast_to=bytes).decode()
    print(solution)
  else:
    raise Exception('Could not find the solution')
if __name__ == '__main__':
  main(sys.argv)

该脚本使用 Angr 工具自动化解决二进制文件中的密码问题。它首先加载二进制文件,然后使用 angr.Project() 创建一个项目对象。接下来,它使用初始状态创建一个符号变量 password ,并将其存储在地址 0x804a040 中。然后,它使用 project.factory.simgr() 创建一个模拟器对象,该对象用于探索状态空间以找到目标状态。接着,它探索状态空间以找到特定地址 0x8048683 ,该地址检查密码是否正确。如果找到目标状态,则添加约束条件,使 constrained_parameter_bitvector 等于预期值。最后,它使用 solver.eval() 求解约束条件,并打印出解决方案。如果找不到解决方案,则会引发异常。

# 09_angr_hooks

之前三面学过 inline hook 但还没咋应用过

学习使用 angr 的 hook 技术解决路径爆炸问题,与刚刚利用的约束条件不同,hook 技术则更为强大!

# Hook

angr 提供了简便的 Hook 接口,能够用于 Hook 被 angr 模拟执行的程序,它通常有两种方式:

  • 装饰器 @project.hook

  • 函数 project.hook_symbol

    装饰器:

@project.hook(0x80486B3, length=5)

第一个参数即需要 Hook 的调用函数的地址,第二个参数 length 即指定执行引擎在完成挂钩后应跳过多少字节。具体多少字节由 Hook 处地址的指令长度确定

@project.hook(check_equals_called_address, length=instruction_to_skip_length)
    def skip_check_equals_(state):
        user_input_buffer_address = 0x804A054
        user_input_buffer_length = 16
        user_input_string = state.memory.load(
            user_input_buffer_address,
            user_input_buffer_length
        )
        check_against_string = 'XKSPZSJKJYQCQXZV'
        register_size_bit = 32
        state.regs.eax = claripy.If(
            user_input_string == check_against_string,
            claripy.BVV(1, register_size_bit),
            claripy.BVV(0, register_size_bit)
        )#不再循环了
    simulation = project.factory.simgr(initial_state)

脚本

import angr
import claripy
import sys
def main(argv):
  path_to_binary = "./09_angr_hooks"
  project = angr.Project(path_to_binary)
  # Since Angr can handle the initial call to scanf, we can start from the
  # beginning.
  initial_state = project.factory.entry_state(
    add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
                    angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
  )
  # Hook the address of where check_equals_ is called.
  # (!)
  check_equals_called_address = 0x80486ca
  # The length parameter in angr.Hook specifies how many bytes the execution
  # engine should skip after completing the hook. This will allow hooks to
  # replace certain instructions (or groups of instructions). Determine the
  # instructions involved in calling check_equals_, and then determine how many
  # bytes are used to represent them in memory. This will be the skip length.
  # (!)
  instruction_to_skip_length = 5
  @project.hook(check_equals_called_address, length=instruction_to_skip_length)
  def skip_check_equals_(state):
    user_input_buffer_address = 0x804a044 
    user_input_buffer_length = 16
    user_input_string = state.memory.load(
      user_input_buffer_address,
      user_input_buffer_length
    )
    
    check_against_string = 'OSIWHBXIFOQVSBZB'.encode() # :string
    state.regs.eax = claripy.If(
      user_input_string == check_against_string, 
      claripy.BVV(1, 32), 
      claripy.BVV(0, 32)
    )
  simulation = project.factory.simgr(initial_state)
  def is_successful(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    return 'Good Job.'.encode() in stdout_output
  def should_abort(state):
    stdout_output = state.posix.dumps(sys.stdout.fileno())
    return 'Try again.'.encode() in stdout_output
  simulation.explore(find=is_successful, avoid=should_abort)
  if simulation.found:
    solution_state = simulation.found[0]
    # Since we are allowing Angr to handle the input, retrieve it by printing
    # the contents of stdin. Use one of the early levels as a reference.
    solution = solution_state.posix.dumps(sys.stdin.fileno()).decode()
    print(solution)
  else:
    raise Exception('Could not find the solution')
if __name__ == '__main__':
  main(sys.argv)

这段代码使用了 angr 工具包来通过符号执行解决程序的逆向分析问题。首先定义了程序路径 path_to_binary,然后使用 angr.Project 函数将其载入到 angr 中。接着使用 project.factory.entry_state 创建了一个初始状态 initial_state,该状态包含了程序的起始信息。然后,使用 project.hook 函数来注册了一个钩子函数,该钩子函数用于拦截到函数 check_equals_的调用,并修改其返回值。在钩子函数中,首先确定了用户输入的地址和长度,并使用 state.memory.load 方法从用户输入的地址中读取出用户输入的字符串。接着确定了需要与用户输入的字符串进行比较的字符串,并使用 claripy 库中的 If 方法来判断两者是否相等,从而修改 eax 寄存器中的值。最后,使用 project.factory.simgr 函数来创建了一个符号执行器 simulation,并使用其 explore 方法来探索符号执行的状态空间,直到找到满足 is_successful 条件的状态或避免 should_abort 条件的状态。如果找到了满足条件的状态,则从该状态中获取解决方案,并打印输出。否则,抛出异常并提示无法找到解决方案。

另一个版本 (其实差不多)

import angr
import sys
import claripy
def Go():
    path_to_binary = "./09_angr_hooks"
    project = angr.Project(path_to_binary, auto_load_libs=False)
    initial_state = project.factory.entry_state()
    check_equals_called_address = 0x80486CA
    instruction_to_skip_length = 5
    @project.hook(check_equals_called_address, length=instruction_to_skip_length)
    def skip_check_equals_(state):
        user_input_buffer_address = 0x804A044
        user_input_buffer_length = 16
        user_input_string = state.memory.load(
            user_input_buffer_address,
            user_input_buffer_length
        )
        check_against_string = 'OSIWHBXIFOQVSBZB'.encode()
        register_size_bit = 32
        state.regs.eax = claripy.If(
            user_input_string == check_against_string,
            claripy.BVV(1, register_size_bit),
            claripy.BVV(0, register_size_bit)
        )
    simulation = project.factory.simgr(initial_state)
    def is_successful(state):
        stdout_output = state.posix.dumps(1)
        if b'Good Job.' in stdout_output:
            return True
        else:
            return False
    def should_abort(state):
        stdout_output = state.posix.dumps(1)
        if b'Try again.' in  stdout_output:
            return True
        else:
            return False
    simulation.explore(find=is_successful, avoid=should_abort)
    if simulation.found:
        for i in simulation.found:
            solution_state = i
            solution = solution_state.posix.dumps(0)
            print("[+] Success! Solution is: {0}".format(solution.decode('utf-8')))
            #print(solution0)
    else:
        raise Exception('Could not find the solution')
if __name__ == "__main__":
    Go()

# 10_angr_simprocedures

学习如何利用函数名进行 hook,而不是复杂的利用函数的调用地址

第十题的路径爆炸仍然发生在 check_equals 函数当中,与上一题的区别在于,此时输入的字符串被保存在局部变量当中,我们无法提前得知它的地址,因此如果要 hook 掉 check 函数,则必须获取 check 函数的参数。

同时,我们可以发现 check_equals 被调用了很多次,以致于无法通过地址 Hook 每个调用位置。 这时我们必须使用 SimProcedure 编写我们自己的 check_equals 实现,然后通过函数名 Hook 所有对 check_equals 的调用

因此需要使用 hook_symbol 的方式来 hook 掉 check 函数

# Hooking Symbols

每一个程序都有一个符号表,angr 可以确保从每个导入符号都可以解析出地址,可以使用 angr 提供的 Project.hook_symbol API 来通过符号名来 Hook 函数所有的调用地址,这意味着可以用自己的代码替换函数

import angr
import time
import claripy
time_strat = time.perf_counter()
def good(state):
    tag = b'Good' in state.posix.dumps(1)
    return True if tag else False
def bad(state):
    tag = b'Try' in state.posix.dumps(1)
    return True if tag else False
path_to_binary = './10_angr_simprocedures'
p = angr.Project(path_to_binary, auto_load_libs=False)
init_state = p.factory.entry_state()
class Hook(angr.SimProcedure):
    def run(self, addr, size):
        # 后两个参数即为 check_equals 函数的参数
        input_buffer = addr
        input_size = size
        # 从内存中读取输入
        input_str = self.state.memory.load(input_buffer, input_size)
        right_str = 'OSIWHBXIFOQVSBZB'.encode()
        # 比较返回符号约束
        result = claripy.If(input_str == right_str, claripy.BVV(1, 32),
                            claripy.BVV(0, 32))
        return result
# funcname 一定要相同,可在 IDA 中查看
hooked_funcname = 'check_equals_OSIWHBXIFOQVSBZB'
p.hook_symbol(hooked_funcname, Hook())
simgr = p.factory.simgr(init_state)
simgr.explore(find=good, avoid=bad)
if simgr.found:
    solution_state = simgr.found[0]
    flag = solution_state.posix.dumps(0)
    print(flag)

# 11_angr_sim_scanf

如题,这题主要是学习如何 hook scanf 函数,步骤其实与上一题是几乎一致的,得先找到需要 hook 的函数符号,然后编写一个继承 angr.SimProcedure 的类,然后利用 hook_symbol 对函数进行 hook

import angr
import claripy
def good(state):
    tag = b'Good' in state.posix.dumps(1)
    return True if tag else False
def bad(state):
    tag = b'Try' in state.posix.dumps(1)
    return True if tag else False
path_to_binary = './11_angr_sim_scanf'
project = angr.Project(path_to_binary, auto_load_libs=False)
initial_state = project.factory.entry_state()
class ReplacementScanf(angr.SimProcedure):
    # 第一个参数是 scanf 的格式化字符串,接着是两个 buffer 地址
    def run(self, format_string, scanf0_address, scanf1_address):
        # 在 run 中完成 scanf 的功能,即向目标地址写入符号
        # 定义两个符号变量,用于写入 buffer
        scanf0 = claripy.BVS('scanf0', 32)
        scanf1 = claripy.BVS('scanf1', 32)
        # 写入内存,最后一个参数指定字节序
        self.state.memory.store(scanf0_address,
                                scanf0,
                                endness=project.arch.memory_endness)
        self.state.memory.store(scanf1_address,
                                scanf1,
                                endness=project.arch.memory_endness)
        # 保存变量到全局,方便后续约束求解
        self.state.globals['solution0'] = scanf0
        self.state.globals['solution1'] = scanf1
# scanf 的函数名称,可在 ida 中查看
scanf_symbol = '__isoc99_scanf'
project.hook_symbol(scanf_symbol, ReplacementScanf())
simulation = project.factory.simgr(initial_state)
simulation.explore(find=good, avoid=bad)
if simulation.found:
    solution_state = simulation.found[0]
    # 对符号变量进行约束求解
    stored_solutions0 = solution_state.globals['solution0']
    stored_solutions1 = solution_state.globals['solution1']
    scanf0_solution = solution_state.solver.eval(stored_solutions0)
    scanf1_solution = solution_state.solver.eval(stored_solutions1)
    print(scanf0_solution, scanf1_solution)
else:
    raise Exception('Could not find the solution')

run 方法用于执行 ReplacementScanf 类的实例。该方法接收三个参数,分别为 format_string、param0 和 param1。

# 12_angr_veritesting

此题与之前的题目不同的地方在于,它不仅是按位进行比较的,而且是按位进行混淆后马上比较。之前的题目都是整体混淆后比较。因此路径爆炸同时出现在混淆和比较两个地方。

考虑使用路径归并的搜索策略 veritesting 解题 (体验一下 veritesting)

import angr
import claripy
import sys
def test():
	file='12_angr_veritesting'
	project=angr.Project(file)
	initial_state=project.factory.entry_state()
	
	#Veritesting 结合了静态符合执行与动态符号执行,减少了路径爆炸的影响
	simulation=project.factory.simgr(initial_state,veritesting=True)
	def succ(state):
		return b'Good Job.' in state.posix.dumps(sys.stdout.fileno())
	def fail(state):
		return b'Try again.' in state.posix.dumps(sys.stdout.fileno())
	simulation.explore(find=succ,avoid=fail)
	if simulation.found:
		solution_state=simulation.found[0]
		solution = solution_state.posix.dumps(sys.stdin.fileno())
		print('Solution is {0}'.format(solution.decode('utf-8')))
	else:
		raise Exception('Could not find the solution')
if __name__=="__main__":
	test()