Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shorten the generated code with ./release-rs.sh --features short #22

Merged
merged 27 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ab17df4
Generate short code with `--features short`
byeongkeunahn Nov 24, 2023
7b0813d
short: enable shorter I/O with `--features short`
byeongkeunahn Nov 24, 2023
f06d253
short: minify template for Linux-amd64-Rust
byeongkeunahn Nov 24, 2023
bee6abb
short: update writer.rs
byeongkeunahn Nov 24, 2023
cbe1bc4
short: omit stack size increment routine
byeongkeunahn Nov 24, 2023
b6dc2ea
short: update reader.rs
byeongkeunahn Nov 24, 2023
76ba8f7
short: update amd64_elf.rs
byeongkeunahn Nov 24, 2023
57b1efd
short: support minified C output
byeongkeunahn Nov 24, 2023
9865130
short: embed original solution
byeongkeunahn Nov 24, 2023
31124d4
short: update static-pie-template-amd64-short.c
byeongkeunahn Nov 24, 2023
f908098
short: remove .gcc_except_table and .gnu.hash
byeongkeunahn Nov 24, 2023
f63de6c
short: remove unnecessary inlining prevention
byeongkeunahn Nov 25, 2023
9a8e6dc
short: use custom target to override options
byeongkeunahn Nov 26, 2023
811b09f
short: amd64-rust: shorten the prestub
byeongkeunahn Nov 28, 2023
cfd4d6a
short: update reader.rs
byeongkeunahn Nov 28, 2023
7f9dcc5
short: amd64-rust: shorten the prestub
byeongkeunahn Nov 28, 2023
4296993
short: update codegen.rs
byeongkeunahn Nov 28, 2023
93774f0
Update services.rs
byeongkeunahn Nov 28, 2023
cec65a0
short: update reader.rs and writer.rs
byeongkeunahn Nov 28, 2023
3ca0d08
short: amd64-linux: shorten _start
byeongkeunahn Nov 28, 2023
f3528eb
short: update writer.rs
byeongkeunahn Nov 28, 2023
79c2296
short: update mod.rs
byeongkeunahn Nov 28, 2023
97ba8a5
short: amd64-linux: reduce level of indirections
byeongkeunahn Nov 28, 2023
9b05218
short: update reader.rs
byeongkeunahn Nov 28, 2023
c5225a8
Update codegen.rs
byeongkeunahn Nov 28, 2023
3099aef
short: update amd64_elf.rs
byeongkeunahn Nov 28, 2023
46f6f35
short: update linux.rs
byeongkeunahn Nov 28, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .cargo/x86_64-unknown-linux-gnu-short.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"arch": "x86_64",
"cpu": "x86-64",
"crt-objects-fallback": "false",
"crt-static-respected": true,
"data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
"dynamic-linking": true,
"eh-frame-header": false,
"env": "gnu",
"has-rpath": true,
"has-thread-local": true,
"is-builtin": false,
"linker-flavor": "gnu-cc",
"llvm-target": "x86_64-unknown-linux-gnu",
"max-atomic-width": 64,
"os": "linux",
"plt-by-default": false,
"position-independent-executables": true,
"pre-link-args": {
"gnu-cc": [
"-m64"
],
"gnu-lld-cc": [
"-m64"
]
},
"relro-level": "off",
"stack-probes": {
"kind": "none"
},
"static-position-independent-executables": true,
"supported-sanitizers": [
"address",
"cfi",
"leak",
"memory",
"thread",
"safestack"
],
"supported-split-debuginfo": [
"packed",
"unpacked",
"off"
],
"supports-xray": true,
"target-family": [
"unix"
],
"target-pointer-width": "64"
}
13 changes: 12 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,21 @@ bench = false
path = "src/bin/basm-submit.rs"

[dependencies]
compiler_builtins = { version = "0.1.101", features = ["mem"] }
libm = "0.2.7"
ryu = "1.0"

[target.x86_64-pc-windows-msvc.dependencies]
compiler_builtins = { version = "0.1.103", features = ["mem"] }
[target.x86_64-unknown-linux-gnu.dependencies]
compiler_builtins = { version = "0.1.103", features = ["mem"] }
[target.i686-unknown-linux-gnu.dependencies]
compiler_builtins = { version = "0.1.103", features = ["mem"] }
[target.wasm32-unknown-unknown.dependencies]
compiler_builtins = { version = "0.1.103", features = ["mem"] }

[features]
short = []

[profile.dev]
panic = "abort"

Expand Down
8 changes: 4 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn main() {
link_args_basm.push("/EMITPOGOPHASEINFO");
link_args_basm_submit.push("/ALIGN:128");
},
"x86_64-unknown-linux-gnu" | "i686-unknown-linux-gnu" => {
"x86_64-unknown-linux-gnu" | "x86_64-unknown-linux-gnu-short" | "i686-unknown-linux-gnu" => {
link_args_basm.push("-nostartfiles");
link_args_basm.push("-nostdlib");
link_args_basm.push("-static-pie");
Expand All @@ -27,12 +27,12 @@ fn main() {
link_args_basm.push("-fno-unwind-tables");
link_args_basm.push("-fno-stack-protector");
link_args_basm.push("-fno-plt");
if target == "x86_64-unknown-linux-gnu" {
link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro");
} else {
if target == "i686-unknown-linux-gnu" {
// Prevent linker from putting data into text, which is non-writable and hence not relocatable.
// This prevents the hack for getting the _DYNAMIC symbol in the entrypoint.
link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro,-z,notext");
} else {
link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro");
}
link_args_basm_submit.push("-Wl,-z,max-page-size=128");
},
Expand Down
5 changes: 4 additions & 1 deletion scripts/static-pie-prestub-amd64-print.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import sys

# read prestub
with open("static-pie-prestub-amd64.bin", "rb") as f:
fname = "static-pie-prestub-amd64.bin" if len(sys.argv) <= 1 else sys.argv[1]
with open(fname, "rb") as f:
prestub = f.read()
prestub = bytearray(prestub)
if len(prestub) > 0 and prestub[-1] == 0:
Expand Down
86 changes: 86 additions & 0 deletions scripts/static-pie-prestub-amd64-short.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
; -*- tab-width: 4 -*-
;
; The prestub for amd64-rust target
; (prestub: the code that runs before the stub and sets the stage)
;
; build: nasm -f bin -O9 static-pie-prestub-amd64-short.asm -o static-pie-prestub-amd64-short.bin
; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-short.bin
; to obtain the form that can be embedded in Rust as inline assembly.

BITS 64
ORG 0
section .text

; Align stack to 16 byte boundary
; [rsp+ 32, rsp+120): PLATFORM_DATA
; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention)
enter 48, 0
push 1
pop rcx ; Enable ENV_FLAGS_LINUX_STYLE_CHKSTK outside Windows
call _t

; svc_alloc_rwx for Windows and Linux
; rcx = size
; rdi = pointer to VirtualAlloc (must be supplied before prepending the mov instruction)
_svc_alloc_rwx:
push 9
pop rax ; syscall id of x64 mmap
jecxz _decode
cdq ; rdx=0
xor r9d, r9d ; offset
push rsi ; save rsi
xor edi, edi ; rdi=0
mov esi, ecx ; size
mov dl, 7 ; protect (safe since we have ensured rdx=0)
push 0x22
pop r10 ; flags
push -1
pop r8 ; fd
syscall
pop rsi ; restore rsi
_ret:
ret
_svc_alloc_rwx_end:

; Base91 decoder
_decode:
mov al, 0x1f
_decode_loop:
shl eax, 13
lodsb
sub al, 0x24
jc _ret
cdq
xchg eax, edx
lodsb
sub al, 0x24
imul eax, eax, 91
add eax, edx
_decode_output:
stosb
shr eax, 8
test ah, 16
jnz _decode_output
jmp _decode_loop

; PLATFORM_DATA
_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx
pop rbx
push rbx
push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress
push rax ; PLATFORM_DATA[16..23] = win_kernel32
push rcx ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK)
inc ecx
push rcx ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux)
sub rsp, 40 ; shadow space + compensation
call rbx ; svc_alloc_rwx

; Current state: rax = new buffer, rdi = pointer to VirtualAlloc
push rax
xchg rax, rdi ; rdi = new buffer

; Decode stub (rsi -> rdi)
; Current state: rdi = stub memory (by the previous instruction)
; rsi = STUB_BASE91 (by the Rust template)
xor ecx, ecx
jmp rbx ; This will jump to the start of the new buffer (stub) upon the ret instruction
Binary file added scripts/static-pie-prestub-amd64-short.bin
Binary file not shown.
67 changes: 67 additions & 0 deletions scripts/static-pie-template-amd64-short.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Generated with https://github.com/kiwiyou/basm-rs
// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box!
// SOLUTION BEGIN
$$$$solution_src$$$$
// SOLUTION END
#include <unistd.h>
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define BASMCALL __attribute__((ms_abi))
// Base85 decoder. Code adapted from:
// https://github.com/rafagafe/base85/blob/master/base85.c
const char *b85 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>\?@^_`{|}~";
void b85tobin(void *dest, char const *src) {
u32 *p = (u32 *)dest;
u8 digittobin[256];
for (u8 i=0; i<85; i++) digittobin[(u8)b85[i]] = i;
while (1) {
while (*src == '\0') src++;
if (*src == ']') break;
u32 value = 0;
for (u32 i=0; i<5; i++) {
value *= 85;
value += digittobin[(u8)*src++];
}
*p++ = (value >> 24) | ((value >> 8) & 0xff00) | ((value << 8) & 0xff0000) | (value << 24);
}
}
#pragma pack(push, 1)
typedef struct {
u64 env_id;
u64 env_flags;
u64 win[2];
void *fn_table[6];
} PLATFORM_DATA;
#pragma pack(pop)
typedef int (BASMCALL *stub_ptr)(void *, void *);
char payload[][$$$$min_len_4096$$$$] = $$$$binary_base85$$$$;
int main() {}
#ifdef __cplusplus
extern "C"
#endif
int __libc_start_main(
void *func_ptr,
int argc,
char* argv[],
void (*init_func)(void),
void (*fini_func)(void),
void (*rtld_fini_func)(void),
void *stack_end) {
PLATFORM_DATA pd;
pd.env_id = 2;
pd.env_flags = 1;
u8 stubbuf[68 + $$$$stub_len$$$$];
b85tobin(stubbuf, "QMd~L002n8@6D@;XGJ3cz5oya01pLO>naZmS5~+Q0000n|450>x(5IN07=KfA^-pYO)<bp|Hw@-$qxlyU&9Xz]");
b85tobin(stubbuf + 68, $$$$stub_base85$$$$);
size_t base = ((size_t)main) & 0xFFFFFFFFFFFFF000ULL;
*(u64 *)(stubbuf + 0x08) = (u64) base;
*(u32 *)(stubbuf + 0x11) = (u32) 4096;
base = ((size_t)stubbuf) & 0xFFFFFFFFFFFFF000ULL;
size_t len = (((size_t)stubbuf) + 68 + $$$$stub_len$$$$) - base;
len = ((len + 0xFFF) >> 12) << 12;
syscall(10, base, len, 0x7);
pd.fn_table[0] = (void *) (stubbuf + 0x1c);
b85tobin(payload, (char const *)payload);
return ((stub_ptr) stubbuf)(&pd, payload);
}
8 changes: 8 additions & 0 deletions scripts/static-pie-template-amd64-short.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Generated with https://github.com/kiwiyou/basm-rs
// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box!
// SOLUTION BEGIN
#![crate_type="cdylib"]#![no_std]#[cfg(any())]mod x{
$$$$solution_src$$$$
}
// SOLUTION END
#[no_link]extern crate std;static mut P:[u8;$$$$binary_base91_len$$$$]=*br$$$$binary_base91$$$$;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 0e859016a000030c8h,6758096a0000003ch,3156c931459917e3h,41226a07b2ce89ffh,5e050f5841ff6a5ah,2cac0de0c11fb0c3h,242cac9299f57224h,0e8c1aad0015bc06bh,0e3ebf77510c4f608h,51c1ff515052535bh,4850d3ff28ec8348h,0e3ffc93197h",in("r14")P.as_mut_ptr(),in("rsi")r$$$$stub_base91$$$$.as_ptr())}
28 changes: 23 additions & 5 deletions scripts/static-pie.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,17 @@ shift
if [[ "$target_name" == "x86_64-unknown-linux-gnu" ]]; then
stub="static-pie-stub-amd64.bin"
if [[ "$lang_name" == "C" ]]; then
template="static-pie-template-amd64.c"
if [[ "$*" == *"short"* ]]; then
template="static-pie-template-amd64-short.c"
else
template="static-pie-template-amd64.c"
fi
elif [[ "$lang_name" == "Rust" ]]; then
template="static-pie-template-amd64.rs"
if [[ "$*" == *"short"* ]]; then
template="static-pie-template-amd64-short.rs"
else
template="static-pie-template-amd64.rs"
fi
else
>&2 echo "Language ${lang_name} is not supported for target ${target_name}"
exit
Expand Down Expand Up @@ -48,20 +56,30 @@ else
>&2 echo "Unknown build mode ${build_mode}"
exit
fi

if [[ "$target_name" == "x86_64-unknown-linux-gnu" && "$*" == *"short"* ]]; then
target_name_cargo=".cargo/x86_64-unknown-linux-gnu-short.json"
target_name="x86_64-unknown-linux-gnu-short"
extra_config='-Zbuild-std=core,compiler_builtins,alloc -Zbuild-std-features=compiler-builtins-mem'
else
target_name_cargo="$target_name"
extra_config=""
fi

>&2 echo "Building project for target ${target_name}, language ${lang_name}, build mode ${build_mode}"

binary_path=basm.bin
if [[ "$build_mode" == "Debug" ]]; then
cargo +nightly build --target "$target_name" --bin basm-submit "$@"
cargo +nightly build $extra_config --target "$target_name_cargo" --bin basm-submit "$@"
else
cargo +nightly build --target "$target_name" --bin basm-submit --release "$@"
cargo +nightly build $extra_config --target "$target_name_cargo" --bin basm-submit --release "$@"
fi

if [[ "$target_name" == "x86_64-pc-windows-msvc" ]]; then
python3 scripts/static-pie-gen.py src/solution.rs "$target_name" target/"$target_name"/"$build_mode_dir"/basm-submit.exe scripts/"$stub" "$lang_name" scripts/"$template"
else
cp target/"$target_name"/"$build_mode_dir"/basm-submit target/"$target_name"/"$build_mode_dir"/basm-submit-stripped
objcopy --strip-all target/"$target_name"/"$build_mode_dir"/basm-submit-stripped
objcopy --remove-section .eh_frame target/"$target_name"/"$build_mode_dir"/basm-submit-stripped
objcopy --remove-section .eh_frame --remove-section .gcc_except_table --remove-section .gnu.hash target/"$target_name"/"$build_mode_dir"/basm-submit-stripped
python3 scripts/static-pie-gen.py src/solution.rs "$target_name" target/"$target_name"/"$build_mode_dir"/basm-submit-stripped scripts/"$stub" "$lang_name" scripts/"$template"
fi
21 changes: 19 additions & 2 deletions src/bin/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ unsafe extern "win64" fn _start() -> ! {
// on the 16-byte boundary BEFORE `call` instruction.
// However, when called as the entrypoint by the Linux OS,
// RSP will be 16-byte aligned AFTER `call` instruction.
#[cfg(not(feature = "short"))]
asm!(
"clc", // CF=0 (running without loader) / CF=1 (running with loader)
"mov rbx, rcx", // Save PLATFORM_DATA table
Expand All @@ -80,6 +81,22 @@ unsafe extern "win64" fn _start() -> ! {
sym _start_rust,
options(noreturn)
);
// For "short", we always assume we are running with loader on Linux,
// since "short" is only meaningful when submitting to online judges (not local test runs).
#[cfg(feature = "short")]
asm!(
"clc", // Not needed but packager wants it
"push rcx", // Align stack
"mov rbx, rcx", // Save PLATFORM_DATA table
"lea rdi, [rip + __ehdr_start]",
"lea rsi, [rip + _DYNAMIC]",
"call {0}",
"mov rdi, rbx",
"call {1}", // This won't return since on Linux we invoke SYS_exitgroup in binary
sym loader::amd64_elf::relocate,
sym _start_rust,
options(noreturn)
);
}

#[cfg(target_os = "windows")]
Expand Down Expand Up @@ -234,11 +251,11 @@ extern "C" fn _start() {
}

/* We prevent inlining solution::main, since if the user allocates
* a large amount of stack memory there, it will be zero-initialized
* a large amount of stack memory there, it will be zero-initialized (or probed)
* *before* we increase the stack limits if it is inlined into _start_rust.
* This will cause stack overflow, thus we prevent it.
*/
#[inline(never)]
#[cfg_attr(not(feature = "short"), inline(never))]
fn _call_main() {
solution::main();
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![feature(rustc_private)]
#![feature(fn_align)]
#![feature(maybe_uninit_slice)]
#![feature(maybe_uninit_uninit_array)]
Expand Down
Loading
Loading