From 76655d6f037699dccbb4e467c9048cf2a451ac28 Mon Sep 17 00:00:00 2001 From: Han Dai Date: Mon, 8 Jan 2024 17:41:46 -0500 Subject: [PATCH] add recompiler --- src/patcherex2/components/assets/assets.py | 9 +- .../components/compilers/llvm_recomp.py | 179 ++++++++++++++++++ src/patcherex2/patches/function_patches.py | 32 +++- src/patcherex2/targets/__init__.py | 2 + .../targets/elf_x86_64_linux_recomp.py | 14 ++ 5 files changed, 228 insertions(+), 8 deletions(-) create mode 100644 src/patcherex2/components/compilers/llvm_recomp.py create mode 100644 src/patcherex2/targets/elf_x86_64_linux_recomp.py diff --git a/src/patcherex2/components/assets/assets.py b/src/patcherex2/components/assets/assets.py index 92b0d35..119f90e 100644 --- a/src/patcherex2/components/assets/assets.py +++ b/src/patcherex2/components/assets/assets.py @@ -20,6 +20,10 @@ class Assets: "url": "https://f002.backblazeb2.com/file/patcherex/assets/powerpc-eabivle.tgz", "path": ASSETS_DIR / "ppc_vle" / "bin", }, + "llvm_recomp": { + "url": "https://f002.backblazeb2.com/file/patcherex/assets/llvm_recomp.tgz", + "path": ASSETS_DIR / "llvm_recomp", + }, } def __init__(self, name): @@ -33,7 +37,8 @@ def __init__(self, name): def download(self): r = requests.get(self.url) with tempfile.TemporaryDirectory() as td: - with open(os.path.join(td, "asset.tar.xz"), "wb") as f: + with open(os.path.join(td, "asset.tgz"), "wb") as f: f.write(r.content) - with tarfile.open(os.path.join(td, "asset.tar.xz")) as tar: + with tarfile.open(os.path.join(td, "asset.tgz")) as tar: + # FIXME: better use filter here but it requires > py3.12. all tarball are manually verified to be safe so it's fine for now tar.extractall(path=self.ASSETS_DIR / self.name) diff --git a/src/patcherex2/components/compilers/llvm_recomp.py b/src/patcherex2/components/compilers/llvm_recomp.py new file mode 100644 index 0000000..1c2f1fb --- /dev/null +++ b/src/patcherex2/components/compilers/llvm_recomp.py @@ -0,0 +1,179 @@ +import json +import logging +import os +import subprocess +import tempfile + +import cle + +from ..assets.assets import Assets +from .clang import Clang + +logger = logging.getLogger(__name__) + + +class LLVMRecomp(Clang): + def __init__(self, p, clang_version=15, compiler_flags=None): + super().__init__(p, clang_version, compiler_flags) + self._clang_version = clang_version + self._assets_path = Assets("llvm_recomp").path + + def compile( + self, + code, + base=0, + symbols=None, + extra_compiler_flags=None, + is_thumb=False, + **kwargs, + ): + if symbols is None: + symbols = {} + if extra_compiler_flags is None: + extra_compiler_flags = [] + with tempfile.TemporaryDirectory() as td: + # source file + with open(os.path.join(td, "code.c"), "w") as f: + f.write(code) + + # linker script + _symbols = {} + _symbols.update(self.p.symbols) + _symbols.update(self.p.binary_analyzer.get_all_symbols()) + _symbols.update(symbols) + linker_script = ( + "SECTIONS { .text : SUBALIGN(0) { . = " + hex(base) + "; *(.text) " + ) + for name, addr in _symbols.items(): + linker_script += name + " = " + hex(addr) + ";" + linker_script += "} }" + with open(os.path.join(td, "linker.ld"), "w") as f: + f.write(linker_script) + + librecomp_path = os.path.join(self._assets_path, "libRecompiler.so") + + # c -> ll + try: + args = ( + [self._compiler] + + self._compiler_flags + + extra_compiler_flags + + [ + "-Wno-incompatible-library-redeclaration", + "-S", + "-w", + "-emit-llvm", + "-g", + "-o", + os.path.join(td, "code.ll"), + os.path.join(td, "code.c"), + "-I" "/usr/lib/clang/15/include", + ] + ) + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(e.stderr.decode("utf-8")) + raise e + + # ll --force-dso-local --> ll + if "dso_local_fix" in kwargs and kwargs["dso_local_fix"]: + try: + args = [ + f"opt-{self._clang_version}", + f"-load-pass-plugin={librecomp_path}", + "-passes=force-dso-local", + "-S", + os.path.join(td, "code.ll"), + "-o", + os.path.join(td, "code.ll"), + ] + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(e.stderr.decode("utf-8")) + raise e + + # ll -> o + if "stacklayout" in kwargs and kwargs["stacklayout"]: + with open(os.path.join(td, "stacklayout.json"), "w") as f: + json.dump(kwargs["stacklayout"], f) + try: + args = [ + f"llc-{self._clang_version}", + "-stop-before=prologepilog", + os.path.join(td, "code.ll"), + "-o", + os.path.join(td, "code.mir"), + "-relocation-model=pic", + ] + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(e.stderr.decode("utf-8")) + raise e + try: + args = [ + f"llc-{self._clang_version}", + "-load", + librecomp_path, + "-run-pass=updated-prologepilog", + f"-stkloc={os.path.join(td, 'stacklayout.json')}", + "-o", + os.path.join(td, "code.2.mir"), + os.path.join(td, "code.mir"), + "-relocation-model=pic", + ] + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(e.stderr.decode("utf-8")) + raise e + try: + args = [ + f"llc-{self._clang_version}", + "-start-after=prologepilog", + "-o", + os.path.join(td, "obj.o"), + os.path.join(td, "code.2.mir"), + "-relocation-model=pic", + "--filetype=obj", + ] + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(e.stderr.decode("utf-8")) + raise e + else: + try: + args = [ + f"llc-{self._clang_version}", + "-o", + os.path.join(td, "obj.o"), + os.path.join(td, "code.ll"), + "-relocation-model=pic", + "--filetype=obj", + ] + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(e.stderr.decode("utf-8")) + raise e + + # link object file + try: + args = [self._linker] + [ + "-relocatable", + os.path.join(td, "obj.o"), + "-T", + os.path.join(td, "linker.ld"), + "-o", + os.path.join(td, "obj_linked.o"), + ] + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(e.stderr.decode("utf-8")) + raise e + + # extract compiled code + ld = cle.Loader( + os.path.join(td, "obj_linked.o"), main_opts={"base_addr": 0x0} + ) + compiled = ld.memory.load( + ld.all_objects[0].entry + base, ld.memory.max_addr + ) + return compiled diff --git a/src/patcherex2/patches/function_patches.py b/src/patcherex2/patches/function_patches.py index 8de04e7..9c2d7f4 100644 --- a/src/patcherex2/patches/function_patches.py +++ b/src/patcherex2/patches/function_patches.py @@ -7,16 +7,19 @@ class ModifyFunctionPatch(Patch): - def __init__(self, addr_or_name, code, detour_pos=-1) -> None: + def __init__(self, addr_or_name, code, detour_pos=-1, **kwargs) -> None: self.code = code self.detour_pos = detour_pos self.addr_or_name = addr_or_name + self.compile_opts = kwargs["compile_opts"] if "compile_opts" in kwargs else {} def apply(self, p): func = p.binary_analyzer.get_function(self.addr_or_name) compiled_size = len( p.compiler.compile( - self.code, is_thumb=p.binary_analyzer.is_thumb(func["addr"]) + self.code, + is_thumb=p.binary_analyzer.is_thumb(func["addr"]), + **self.compile_opts, ) ) if compiled_size < func["size"]: @@ -46,13 +49,18 @@ def apply(self, p): p.binfmt_tool.update_binary_content( file_addr, p.compiler.compile( - self.code, mem_addr, is_thumb=p.binary_analyzer.is_thumb(func["addr"]) + self.code, + mem_addr, + is_thumb=p.binary_analyzer.is_thumb(func["addr"]), + **self.compile_opts, ), ) class InsertFunctionPatch(Patch): - def __init__(self, addr_or_name, code, detour_pos=-1, is_thumb=False) -> None: + def __init__( + self, addr_or_name, code, detour_pos=-1, is_thumb=False, **kwargs + ) -> None: self.addr = None self.name = None if isinstance(addr_or_name, int): @@ -62,12 +70,19 @@ def __init__(self, addr_or_name, code, detour_pos=-1, is_thumb=False) -> None: self.code = code self.detour_pos = detour_pos self.is_thumb = is_thumb + self.compile_opts = kwargs["compile_opts"] if "compile_opts" in kwargs else {} def apply(self, p): if self.addr: raise NotImplementedError() elif self.name: - compiled_size = len(p.compiler.compile(self.code, is_thumb=self.is_thumb)) + compiled_size = len( + p.compiler.compile( + self.code, + is_thumb=self.is_thumb, + **self.compile_opts, + ) + ) if self.detour_pos == -1: block = p.allocation_manager.allocate( compiled_size + 0x20, align=0x4, flag=MemoryFlag.RX @@ -80,7 +95,12 @@ def apply(self, p): p.symbols[self.name] = mem_addr p.binfmt_tool.update_binary_content( file_addr, - p.compiler.compile(self.code, mem_addr, is_thumb=self.is_thumb), + p.compiler.compile( + self.code, + mem_addr, + is_thumb=self.is_thumb, + **self.compile_opts, + ), ) diff --git a/src/patcherex2/targets/__init__.py b/src/patcherex2/targets/__init__.py index 649fc73..64bdf87 100644 --- a/src/patcherex2/targets/__init__.py +++ b/src/patcherex2/targets/__init__.py @@ -4,6 +4,7 @@ from .elf_i386_linux import ElfI386Linux from .elf_leon3_bare import ElfLeon3Bare from .elf_x86_64_linux import ElfX8664Linux +from .elf_x86_64_linux_recomp import ElfX8664LinuxRecomp from .ihex_ppc_bare import IHexPPCBare from .target import Target @@ -14,6 +15,7 @@ "ElfI386Linux", "ElfLeon3Bare", "ElfX8664Linux", + "ElfX8664LinuxRecomp", "IHexPPCBare", "Target", ] diff --git a/src/patcherex2/targets/elf_x86_64_linux_recomp.py b/src/patcherex2/targets/elf_x86_64_linux_recomp.py new file mode 100644 index 0000000..9b5e466 --- /dev/null +++ b/src/patcherex2/targets/elf_x86_64_linux_recomp.py @@ -0,0 +1,14 @@ +from ..components.compilers.llvm_recomp import LLVMRecomp +from .elf_x86_64_linux import ElfX8664Linux + + +class ElfX8664LinuxRecomp(ElfX8664Linux): + @staticmethod + def detect_target(binary_path): + return False + + def get_compiler(self, compiler): + compiler = compiler or "llvm_recomp" + if compiler == "llvm_recomp": + return LLVMRecomp(self.p) + raise NotImplementedError()