Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shorten the generated code in short mode #23

Merged
merged 3 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions scripts/base91.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
def encode(x):
def encode(x_in, use_rle=False):
sharp_insertion_points = []
if use_rle:
current_bits, current_bytes, i = 0, 0, 0
x = bytearray()
while i < len(x_in):
current_bits += 13
while current_bytes < current_bits // 8:
if i >= len(x_in):
break
x.append(x_in[i])
current_bytes += 1
i += 1
if len(x) > 0 and x[-1] == 0:
zeros_cnt = 1
while i - 1 + zeros_cnt < len(x_in) and zeros_cnt < 255 and x_in[i - 1 + zeros_cnt] == 0:
zeros_cnt += 1
if zeros_cnt >= 2:
x.pop()
x.append(zeros_cnt)
sharp_insertion_points.append((current_bits // 13 * 2) + len(sharp_insertion_points))
i += zeros_cnt - 1
sharp_insertion_points = list(reversed(sharp_insertion_points))
else:
x = x_in

out = []
i = 0
cnt5, stack5 = 0, []
Expand All @@ -21,11 +46,14 @@ def encode(x):
if bits < 13:
stack5.append((pos, bits))
cnt5 -= 8
ret = bytearray(2 * len(out) + 1)
for i in range(len(out)):
ret[2 * i + 0] = 0x24 + (out[i] % 91)
ret[2 * i + 1] = 0x24 + (out[i] // 91)
ret[-1] = ord('!')
ret = bytearray()
for pack in out:
ret.append(0x24 + (pack % 91))
ret.append(0x24 + (pack // 91))
if len(sharp_insertion_points) > 0 and len(ret) == sharp_insertion_points[-1]:
ret.append(ord(b'#'))
sharp_insertion_points.pop()
ret.append(ord(b'!'))
return bytes(ret)

if __name__ == '__main__':
Expand Down
65 changes: 40 additions & 25 deletions scripts/static-pie-gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@
sol[-1] = sol[-1].rstrip()
sol = "".join(sol)

# binary (raw)
code_raw_b91 = base91.encode(memory_bin[:-4], use_rle=True).decode('ascii')
code_raw_b91_len = len(code_raw_b91)
code_raw_b91 = '"' + code_raw_b91 + '"'

# binary
with open(compressed_binary_path, "rb") as f:
code = f.read()
Expand Down Expand Up @@ -115,29 +120,39 @@
stub_b85 = '"' + stub_b85 + '"'

# template
with open(template_path, encoding='utf8') as f:
template = f.read()
template = template.replace("\ufeff", "")

# putting it all together
# reference: https://stackoverflow.com/a/15448887
def multiple_replace(string, rep_dict):
pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL)
return pattern.sub(lambda x: rep_dict[x.group(0)], string)

out = multiple_replace(template, {
"$$$$solution_src$$$$": sol,
"$$$$stub_raw$$$$": stub_raw,
"$$$$stub_base85$$$$": stub_b85,
"$$$$stub_len$$$$": str(len(stub)),
"$$$$stub_base85_len$$$$": str(stub_b85_len),
"$$$$stub_base91$$$$": stub_b91,
"$$$$stub_base91_len$$$$": str(stub_b91_len),
"$$$$binary_base85$$$$": r,
"$$$$binary_base85_len$$$$": str(len(code_b85)),
"$$$$binary_base91$$$$": code_b91,
"$$$$binary_base91_len$$$$": str(code_b91_len),
"$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)),
"$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']),
})
template_candidates = [template_path]
if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path:
template_candidates.append(template_path.replace("short", "shorter"))

out = None
for each_template_path in template_candidates:
with open(each_template_path, encoding='utf8') as f:
template = f.read()
template = template.replace("\ufeff", "")

# putting it all together
# reference: https://stackoverflow.com/a/15448887
def multiple_replace(string, rep_dict):
pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL)
return pattern.sub(lambda x: rep_dict[x.group(0)], string)

out_candidate = multiple_replace(template, {
"$$$$solution_src$$$$": sol,
"$$$$stub_raw$$$$": stub_raw,
"$$$$stub_base85$$$$": stub_b85,
"$$$$stub_len$$$$": str(len(stub)),
"$$$$stub_base85_len$$$$": str(stub_b85_len),
"$$$$stub_base91$$$$": stub_b91,
"$$$$stub_base91_len$$$$": str(stub_b91_len),
"$$$$binary_base85$$$$": r,
"$$$$binary_base85_len$$$$": str(len(code_b85)),
"$$$$binary_base91$$$$": code_b91,
"$$$$binary_base91_len$$$$": str(code_b91_len),
"$$$$binary_raw_base91$$$$": code_raw_b91,
"$$$$binary_raw_base91_len$$$$": str(code_raw_b91_len),
"$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)),
"$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']),
})
if out is None or len(out_candidate) < len(out):
out = out_candidate
print(out)
86 changes: 86 additions & 0 deletions scripts/static-pie-prestub-amd64-shorter.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
; -*- tab-width: 4 -*-
;
; The prestub for amd64-rust target
; (prestub: the code that runs before the stub and sets the stage)
;
; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter.asm -o static-pie-prestub-amd64-shorter.bin
; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter.bin
; to obtain the form that can be embedded in Rust as inline assembly.

BITS 64
ORG 0
section .text

; Align stack to 16 byte boundary
; [rsp+ 32, rsp+120): PLATFORM_DATA
; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention)
enter 56, 0

; svc_alloc_rwx for Linux
_svc_alloc_rwx:
push 9
pop rax ; syscall id of x64 mmap
cdq ; rdx=0
xor r9d, r9d ; offset
push rsi ; save rsi
xor edi, edi ; rdi=0
push 1
pop rsi ; size
mov dl, 7 ; protect (safe since we have ensured rdx=0)
push 0x22
pop r10 ; flags
push -1
pop r8 ; fd
syscall
pop rsi ; restore rsi

; PLATFORM_DATA
_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx
push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress
push rax ; PLATFORM_DATA[16..23] = win_kernel32
push 1 ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK)
push 2 ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux)

; Current state: rax = new buffer
push rax
xchg rax, rdi ; rdi = new buffer

; Base91 decoder
_decode:
mov al, 0x1f ; syscall preserves rax; hence at this point rax=9
_decode_loop:
shl eax, 13
_decode_loop_2:
lodsb
sub al, 0x23
cdq
jc _jump_to_entrypoint
jz _decode_zeros
dec al
xchg eax, edx
lodsb
sub al, 0x24
imul eax, eax, 91
add eax, edx
_decode_output:
stosb
shr eax, 8
test ah, 16
jnz _decode_output
jmp _decode_loop
_decode_zeros:
xchg eax, edx
movzx ecx, byte [rdi-1]
dec rdi
rep stosb
xchg eax, edx
jmp _decode_loop_2

; Jump to entrypoint
_jump_to_entrypoint:
mov eax, dword [rdi-4]
pop rcx
add rax, rcx
push rsp
pop rcx
call rax
Binary file added scripts/static-pie-prestub-amd64-shorter.bin
Binary file not shown.
8 changes: 8 additions & 0 deletions scripts/static-pie-template-amd64-shorter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Generated with https://github.com/kiwiyou/basm-rs
// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box!
// SOLUTION BEGIN
#![crate_type="cdylib"]#![no_std]#[cfg(any())]mod x{
$$$$solution_src$$$$
}
// SOLUTION END
#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 9958096a000038c8h,16aff3156c93145h,6a5a41226a07b25eh,50525e050f5841ffh,0b0974850026a016ah,99232cac0de0c11fh,0ac92c8fe16742572h,0aad0015bc06b242ch,0f77510c4f608e8c1h,48ff4fb60f92dfebh,8bd5eb92aaf3cfffh,5954c8014859fc47h,53503",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())}
Loading