From ab17df4bc08c16e31f1f68f92dc95d26fc149c14 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 15:19:53 +0900 Subject: [PATCH 01/52] Generate short code with `--features short` Full command: `./release-rs.sh --features short` --- Cargo.toml | 3 +++ src/platform/mod.rs | 1 + 2 files changed, 4 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index f75934ff..dbd1f2d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,9 @@ compiler_builtins = { version = "0.1.101", features = ["mem"] } libm = "0.2.7" ryu = "1.0" +[features] +short = [] + [profile.dev] panic = "abort" diff --git a/src/platform/mod.rs b/src/platform/mod.rs index d72b2d3e..c16d4023 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -18,6 +18,7 @@ pub fn init(platform_data_by_loader: usize) { unsafe { match pd.env_id { #[cfg(not(target_arch = "wasm32"))] + #[cfg(not(feature = "short"))] services::ENV_ID_WINDOWS => { /* use OS APIs directly */ os::windows::init(); From 7b0813dc845f8e7d327e800bc6c69683e9d0f1bf Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 15:44:42 +0900 Subject: [PATCH 02/52] short: enable shorter I/O with `--features short` --- src/platform/io/reader.rs | 16 ++++++++++++++++ src/platform/io/writer.rs | 22 ++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/platform/io/reader.rs b/src/platform/io/reader.rs index 4e2f3442..75e97165 100644 --- a/src/platform/io/reader.rs +++ b/src/platform/io/reader.rs @@ -228,6 +228,7 @@ impl Reader { buf } + #[cfg(not(feature = "short"))] fn noskip_u64(&mut self) -> u64 { const POW10: [u32; 9] = [1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]; let mut out = 0; @@ -245,6 +246,21 @@ impl Reader { out += c; } } + #[cfg(feature = "short")] + fn noskip_u64(&mut self) -> u64 { + let mut n = 0; + while self.off < self.len { + let b = unsafe { self.buf[self.off].assume_init() }; + if b > 32 { + n *= 10; + n += b as u64 & 0x0F; + self.off += 1; + } else { + break; + } + } + n + } fn noskip_u128(&mut self) -> u128 { let mut n = 0; while self.off < self.len { diff --git a/src/platform/io/writer.rs b/src/platform/io/writer.rs index 129c993d..92043b46 100644 --- a/src/platform/io/writer.rs +++ b/src/platform/io/writer.rs @@ -18,8 +18,10 @@ impl Drop for Writer { } } +#[cfg(not(feature = "short"))] #[repr(align(16))] struct B128([u8; 16]); +#[cfg(not(feature = "short"))] #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] #[target_feature(enable = "avx2")] unsafe fn cvt8(out: &mut B128, n: u32) -> usize { @@ -67,6 +69,7 @@ unsafe fn cvt8(out: &mut B128, n: u32) -> usize { _mm_store_si128(out.0.as_mut_ptr().cast(), ascii); offset } +#[cfg(not(feature = "short"))] #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] unsafe fn cvt8(out: &mut B128, mut n: u32) -> usize { let mut offset = 16; @@ -159,6 +162,7 @@ impl Writer { self.u32(n as u32); } } + #[cfg(not(feature = "short"))] pub fn u32(&mut self, n: u32) { self.try_flush(11); let mut b128 = B128([0u8; 16]); @@ -180,6 +184,10 @@ impl Writer { unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + len]).copy_from_slice(&b128.0[off..]); } self.off += len; } + #[cfg(feature = "short")] + pub fn u32(&mut self, n: u32) { + self.u64(n as u64) + } pub fn i64(&mut self, n: i64) { if n < 0 { self.byte(b'-'); @@ -188,6 +196,7 @@ impl Writer { self.u64(n as u64); } } + #[cfg(not(feature = "short"))] pub fn u64(&mut self, n: u64) { self.try_flush(21); let mut hi128 = B128([0u8; 16]); @@ -225,6 +234,19 @@ impl Writer { unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + len]).copy_from_slice(&lo128.0[looff..]); } self.off += len; } + #[cfg(feature = "short")] + pub fn u64(&mut self, mut n: u64) { + let mut buf: [MaybeUninit; 20] = MaybeUninit::uninit_array(); + let mut offset = buf.len() - 1; + buf[offset].write(b'0' + (n % 10) as u8); + n /= 10; + while n > 0 { + offset -= 1; + buf[offset].write(b'0' + (n % 10) as u8); + n /= 10; + } + self.bytes(unsafe { MaybeUninit::slice_assume_init_ref(&buf[offset..]) }); + } pub fn i128(&mut self, n: i128) { if n < 0 { self.byte(b'-'); From f06d2532cf35d6a2e6fe1013082c06ac0b7d6713 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 15:56:27 +0900 Subject: [PATCH 03/52] short: minify template for Linux-amd64-Rust --- scripts/static-pie-template-amd64-short.rs | 1 + scripts/static-pie.sh | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 scripts/static-pie-template-amd64-short.rs diff --git a/scripts/static-pie-template-amd64-short.rs b/scripts/static-pie-template-amd64-short.rs new file mode 100644 index 00000000..138701db --- /dev/null +++ b/scripts/static-pie-template-amd64-short.rs @@ -0,0 +1 @@ +#![crate_type="cdylib"]#![no_std]#[no_link]extern crate std;static mut P:[u8;$$$$binary_base91_len$$$$]=*br$$$$binary_base91$$$$;#[no_mangle]unsafe fn _start(){std::arch::asm!("stc;.quad 19510173000030c8h,4ce8d9f7c9h,459927e36758096ah,870d74ff8548c931h,4100003000b841cah,0b2ce8956e7ff40b1h,41ff6a5a41226a07h,0c11fb0c35e050f58h,99f572242cac0de0h,15bc06b242cac92h,10c4f608e8c1aad0h,5052535be3ebf775h,20ec834851c1ff51h,0c93197485750d3ffh,90c9d0ff585fd3ffh",in("r14")P.as_mut_ptr(),in("rsi")r$$$$stub_base91$$$$.as_ptr())} \ No newline at end of file diff --git a/scripts/static-pie.sh b/scripts/static-pie.sh index f1969c9e..d3d1ab50 100755 --- a/scripts/static-pie.sh +++ b/scripts/static-pie.sh @@ -13,7 +13,11 @@ if [[ "$target_name" == "x86_64-unknown-linux-gnu" ]]; then if [[ "$lang_name" == "C" ]]; then template="static-pie-template-amd64.c" elif [[ "$lang_name" == "Rust" ]]; then - template="static-pie-template-amd64.rs" + if [[ "$*" == *"short"* ]]; then + template="static-pie-template-amd64-short.rs" + else + template="static-pie-template-amd64.rs" + fi else >&2 echo "Language ${lang_name} is not supported for target ${target_name}" exit From bee6abb593ad05ead86ffc414c1302eaaeb10349 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 16:14:42 +0900 Subject: [PATCH 04/52] short: update writer.rs --- src/platform/io/writer.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/platform/io/writer.rs b/src/platform/io/writer.rs index 92043b46..32108bd9 100644 --- a/src/platform/io/writer.rs +++ b/src/platform/io/writer.rs @@ -129,13 +129,12 @@ impl Writer { } // This function ensures an extra byte in the buffer to make sure that // println() can safely use `byte_unchecked`. - pub fn bytes(&mut self, s: &[u8]) { - let mut i = 0; - while i < s.len() { - let rem = s[i..].len().min(self.buf[self.off..].len()); - unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + rem]).copy_from_slice(&s[i..i + rem]); } + pub fn bytes(&mut self, mut s: &[u8]) { + while !s.is_empty() { + let rem = s.len().min(self.buf[self.off..].len()); + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + rem]).copy_from_slice(&s[..rem]); } self.off += rem; - i += rem; + s = &s[rem..]; self.try_flush(1); } } From cbe1bc41fa84810030279274d910f004feacf851 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 16:33:22 +0900 Subject: [PATCH 05/52] short: omit stack size increment routine --- src/platform/os/linux.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/platform/os/linux.rs b/src/platform/os/linux.rs index 2eb2c528..e063b54b 100644 --- a/src/platform/os/linux.rs +++ b/src/platform/os/linux.rs @@ -241,13 +241,15 @@ pub unsafe fn init() { * by the runtime startup code (e.g., glibc). * Thus, instead of parsing the ELF section, we just invoke * the kernel APIs directly. */ - let pd = services::platform_data(); - if pd.env_flags & services::ENV_FLAGS_NATIVE != 0 { - let mut rlim: syscall::RLimit = Default::default(); - let ret = syscall::getrlimit(syscall::RLIMIT_STACK, &mut rlim); - if ret == 0 && rlim.rlim_cur < 256 * 1024 * 1024 { - rlim.rlim_cur = 256 * 1024 * 1024; - syscall::setrlimit(syscall::RLIMIT_STACK, &rlim); + #[cfg(not(feature = "short"))] { + let pd = services::platform_data(); + if pd.env_flags & services::ENV_FLAGS_NATIVE != 0 { + let mut rlim: syscall::RLimit = Default::default(); + let ret = syscall::getrlimit(syscall::RLIMIT_STACK, &mut rlim); + if ret == 0 && rlim.rlim_cur < 256 * 1024 * 1024 { + rlim.rlim_cur = 256 * 1024 * 1024; + syscall::setrlimit(syscall::RLIMIT_STACK, &rlim); + } } } From b6dc2eae8bce1423a83d275675c308b2af9750d8 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 16:52:55 +0900 Subject: [PATCH 06/52] short: update reader.rs --- src/platform/io/reader.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/platform/io/reader.rs b/src/platform/io/reader.rs index 75e97165..76bb27fa 100644 --- a/src/platform/io/reader.rs +++ b/src/platform/io/reader.rs @@ -249,17 +249,16 @@ impl Reader { #[cfg(feature = "short")] fn noskip_u64(&mut self) -> u64 { let mut n = 0; - while self.off < self.len { + loop { let b = unsafe { self.buf[self.off].assume_init() }; if b > 32 { n *= 10; - n += b as u64 & 0x0F; + n += (b - b'0') as u64; self.off += 1; } else { - break; + break n; } } - n } fn noskip_u128(&mut self) -> u128 { let mut n = 0; From 76ba8f78adabbb696086426062fd31a2b8965cf6 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 18:35:06 +0900 Subject: [PATCH 07/52] short: update amd64_elf.rs --- src/platform/loader/amd64_elf.rs | 47 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/src/platform/loader/amd64_elf.rs b/src/platform/loader/amd64_elf.rs index ebd4efe4..0ccf07e4 100644 --- a/src/platform/loader/amd64_elf.rs +++ b/src/platform/loader/amd64_elf.rs @@ -72,35 +72,37 @@ struct Elf64Rela { } -unsafe fn find_tag(mut ptr: *const Elf64Dyn, tag: u64) -> *const Elf64Dyn { - while (*ptr).d_tag != 0 { - if (*ptr).d_tag == tag { - return ptr; - } - ptr = ptr.add(1); - } - core::ptr::null() -} - pub unsafe extern "sysv64" fn relocate( addr_image_base: u64, addr_dynamic_section: u64 ) { - let ptr_dyn: *const Elf64Dyn = addr_dynamic_section as *const Elf64Dyn; - let ptr_rela = find_tag(ptr_dyn, DT_RELA); - let ptr_relasz = find_tag(ptr_dyn, DT_RELASZ); - let ptr_relaent = find_tag(ptr_dyn, DT_RELAENT); - - /* do not use .is_null() since the method itself requires relocations, at least in debug mode */ - if ptr_rela == core::ptr::null() || - ptr_relasz == core::ptr::null() || - ptr_relaent == core::ptr::null() { + let mut ptr_dyn: *const Elf64Dyn = addr_dynamic_section as *const Elf64Dyn; + let mut ptr_rela = 0; + let mut relasz = 0; + let mut relaent = 0; + loop { + match (*ptr_dyn).d_tag { + 0 => { break; } + DT_RELA => { ptr_rela = addr_image_base + (*ptr_dyn).d_val_or_ptr; }, + DT_RELASZ => { relasz = (*ptr_dyn).d_val_or_ptr; }, + DT_RELAENT => { relaent = (*ptr_dyn).d_val_or_ptr; }, + _ => () + } + ptr_dyn = ptr_dyn.add(1); + } + + /* 1) Do not use .is_null() since the method itself requires relocations, at least in debug mode. + * 2) When DT_RELA is present, the other entries DT_RELASZ and DT_RELAENT must exist. + * Source: https://docs.oracle.com/cd/E19683-01/817-3677/chapter6-42444/index.html + * ("This element requires the DT_RELASZ and DT_RELAENT elements also be present.") + */ + if ptr_rela == 0 { return; } let mut j = 0; - while j < (*ptr_relasz).d_val_or_ptr { - let pst_rela = (addr_image_base + (*ptr_rela).d_val_or_ptr + j) as *mut Elf64Rela; + while j < relasz { + let pst_rela = ptr_rela as *mut Elf64Rela; let ul_offset = (*pst_rela).r_offset; let ul_info = (*pst_rela).r_info; let l_addend = (*pst_rela).r_addend; @@ -114,6 +116,7 @@ pub unsafe extern "sysv64" fn relocate( /* not implemented */ panic!(); } - j += (*ptr_relaent).d_val_or_ptr; + j += relaent; + ptr_rela += relaent; } } \ No newline at end of file From 57b1efd5e06cb340b0c99cb30f0bb447d96c16d0 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 19:13:12 +0900 Subject: [PATCH 08/52] short: support minified C output --- scripts/static-pie-template-amd64-short.c | 71 +++++++++++++++++++++++ scripts/static-pie.sh | 6 +- 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 scripts/static-pie-template-amd64-short.c diff --git a/scripts/static-pie-template-amd64-short.c b/scripts/static-pie-template-amd64-short.c new file mode 100644 index 00000000..5f559811 --- /dev/null +++ b/scripts/static-pie-template-amd64-short.c @@ -0,0 +1,71 @@ +#include +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +#define BASMCALL __attribute__((ms_abi)) +// Base85 decoder. Code adapted from: +// https://github.com/rafagafe/base85/blob/master/base85.c +const char *b85 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>\?@^_`{|}~"; +void b85tobin(void *dest, char const *src) { + u32 *p = (u32 *)dest; + u8 digittobin[256]; + for (u8 i=0; i<85; i++) digittobin[(u8)b85[i]] = i; + while (1) { + while (*src == '\0') src++; + if (*src == ']') break; + u32 value = 0; + for (u32 i=0; i<5; i++) { + value *= 85; + value += digittobin[(u8)*src++]; + } + *p++ = (value >> 24) | ((value >> 8) & 0xff00) | ((value << 8) & 0xff0000) | (value << 24); + } +} +#pragma pack(push, 1) +typedef struct { + u64 env_id; + u64 env_flags; + u64 win[2]; + void *fn_table[6]; +} PLATFORM_DATA; +#pragma pack(pop) +BASMCALL void *svc_alloc_rwx(size_t size) { + return (void *) syscall(9, NULL, size, 0x7, 0x22, -1, 0); +} +typedef int (BASMCALL *stub_ptr)(void *, void *); +__attribute__ ((section (".text#"))) const char stub_raw[] = $$$$stub_raw$$$$; +stub_ptr get_stub() { + return (stub_ptr) stub_raw; +} +char payload[][$$$$min_len_4096$$$$] = $$$$binary_base85$$$$; +int main() {} +#ifdef __cplusplus +extern "C" +#endif +int __libc_start_main( + void *func_ptr, + int argc, + char* argv[], + void (*init_func)(void), + void (*fini_func)(void), + void (*rtld_fini_func)(void), + void *stack_end) { + PLATFORM_DATA pd; + pd.env_id = 2; + pd.env_flags = 1; + u8 stubbuf[68 + $$$$stub_len$$$$] = "QMd~L002n8@6D@;XGJ3cz5oya01pLO>naZmS5~+Q0000n|450>x(5IN07=KfA^-pYO)> 12) << 12; + *(u64 *)(stubbuf + 0x08) = (u64) base; + *(u32 *)(stubbuf + 0x11) = (u32) len; + base = ((size_t)stubbuf) & 0xFFFFFFFFFFFFF000ULL; + len = (((size_t)stubbuf) + 68 + $$$$stub_len$$$$) - base; + len = ((len + 0xFFF) >> 12) << 12; + syscall(10, base, len, 0x7); + pd.fn_table[0] = (void *) (stubbuf + 0x1c); + b85tobin(payload, (char const *)payload); + return ((stub_ptr) stubbuf)(&pd, payload); +} \ No newline at end of file diff --git a/scripts/static-pie.sh b/scripts/static-pie.sh index d3d1ab50..ef3f135b 100755 --- a/scripts/static-pie.sh +++ b/scripts/static-pie.sh @@ -11,7 +11,11 @@ shift if [[ "$target_name" == "x86_64-unknown-linux-gnu" ]]; then stub="static-pie-stub-amd64.bin" if [[ "$lang_name" == "C" ]]; then - template="static-pie-template-amd64.c" + if [[ "$*" == *"short"* ]]; then + template="static-pie-template-amd64-short.c" + else + template="static-pie-template-amd64.c" + fi elif [[ "$lang_name" == "Rust" ]]; then if [[ "$*" == *"short"* ]]; then template="static-pie-template-amd64-short.rs" From 98651305275726c5f7fa290c3051cf208f93ec90 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 19:19:23 +0900 Subject: [PATCH 09/52] short: embed original solution --- scripts/static-pie-template-amd64-short.c | 7 ++++++- scripts/static-pie-template-amd64-short.rs | 9 ++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/static-pie-template-amd64-short.c b/scripts/static-pie-template-amd64-short.c index 5f559811..d232f52e 100644 --- a/scripts/static-pie-template-amd64-short.c +++ b/scripts/static-pie-template-amd64-short.c @@ -1,4 +1,9 @@ -#include +// Generated with https://github.com/kiwiyou/basm-rs +// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box! +// SOLUTION BEGIN +$$$$solution_src$$$$ +// SOLUTION END +#include typedef unsigned char u8; typedef unsigned int u32; typedef unsigned long long u64; diff --git a/scripts/static-pie-template-amd64-short.rs b/scripts/static-pie-template-amd64-short.rs index 138701db..a41a677e 100644 --- a/scripts/static-pie-template-amd64-short.rs +++ b/scripts/static-pie-template-amd64-short.rs @@ -1 +1,8 @@ -#![crate_type="cdylib"]#![no_std]#[no_link]extern crate std;static mut P:[u8;$$$$binary_base91_len$$$$]=*br$$$$binary_base91$$$$;#[no_mangle]unsafe fn _start(){std::arch::asm!("stc;.quad 19510173000030c8h,4ce8d9f7c9h,459927e36758096ah,870d74ff8548c931h,4100003000b841cah,0b2ce8956e7ff40b1h,41ff6a5a41226a07h,0c11fb0c35e050f58h,99f572242cac0de0h,15bc06b242cac92h,10c4f608e8c1aad0h,5052535be3ebf775h,20ec834851c1ff51h,0c93197485750d3ffh,90c9d0ff585fd3ffh",in("r14")P.as_mut_ptr(),in("rsi")r$$$$stub_base91$$$$.as_ptr())} \ No newline at end of file +// Generated with https://github.com/kiwiyou/basm-rs +// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box! +// SOLUTION BEGIN +#![crate_type="cdylib"]#![no_std]#[cfg(any())]mod x{ +$$$$solution_src$$$$ +} +// SOLUTION END +#[no_link]extern crate std;static mut P:[u8;$$$$binary_base91_len$$$$]=*br$$$$binary_base91$$$$;#[no_mangle]unsafe fn _start(){std::arch::asm!("stc;.quad 19510173000030c8h,4ce8d9f7c9h,459927e36758096ah,870d74ff8548c931h,4100003000b841cah,0b2ce8956e7ff40b1h,41ff6a5a41226a07h,0c11fb0c35e050f58h,99f572242cac0de0h,15bc06b242cac92h,10c4f608e8c1aad0h,5052535be3ebf775h,20ec834851c1ff51h,0c93197485750d3ffh,90c9d0ff585fd3ffh",in("r14")P.as_mut_ptr(),in("rsi")r$$$$stub_base91$$$$.as_ptr())} \ No newline at end of file From 31124d44498dc7cdef72e3afedafa10ce7052fb3 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 22:06:04 +0900 Subject: [PATCH 10/52] short: update static-pie-template-amd64-short.c --- scripts/static-pie-template-amd64-short.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/scripts/static-pie-template-amd64-short.c b/scripts/static-pie-template-amd64-short.c index d232f52e..25996119 100644 --- a/scripts/static-pie-template-amd64-short.c +++ b/scripts/static-pie-template-amd64-short.c @@ -34,14 +34,7 @@ typedef struct { void *fn_table[6]; } PLATFORM_DATA; #pragma pack(pop) -BASMCALL void *svc_alloc_rwx(size_t size) { - return (void *) syscall(9, NULL, size, 0x7, 0x22, -1, 0); -} typedef int (BASMCALL *stub_ptr)(void *, void *); -__attribute__ ((section (".text#"))) const char stub_raw[] = $$$$stub_raw$$$$; -stub_ptr get_stub() { - return (stub_ptr) stub_raw; -} char payload[][$$$$min_len_4096$$$$] = $$$$binary_base85$$$$; int main() {} #ifdef __cplusplus @@ -58,16 +51,14 @@ int __libc_start_main( PLATFORM_DATA pd; pd.env_id = 2; pd.env_flags = 1; - u8 stubbuf[68 + $$$$stub_len$$$$] = "QMd~L002n8@6D@;XGJ3cz5oya01pLO>naZmS5~+Q0000n|450>x(5IN07=KfA^-pYO)> 12) << 12; + u8 stubbuf[68 + $$$$stub_len$$$$]; + b85tobin(stubbuf, "QMd~L002n8@6D@;XGJ3cz5oya01pLO>naZmS5~+Q0000n|450>x(5IN07=KfA^-pYO)> 12) << 12; syscall(10, base, len, 0x7); pd.fn_table[0] = (void *) (stubbuf + 0x1c); From f908098eb420fa38797aebbf68092d3eca6bdbd9 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 24 Nov 2023 23:11:45 +0900 Subject: [PATCH 11/52] short: remove .gcc_except_table and .gnu.hash --- scripts/static-pie.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/static-pie.sh b/scripts/static-pie.sh index ef3f135b..9d9ee6ca 100755 --- a/scripts/static-pie.sh +++ b/scripts/static-pie.sh @@ -70,6 +70,6 @@ if [[ "$target_name" == "x86_64-pc-windows-msvc" ]]; then else cp target/"$target_name"/"$build_mode_dir"/basm-submit target/"$target_name"/"$build_mode_dir"/basm-submit-stripped objcopy --strip-all target/"$target_name"/"$build_mode_dir"/basm-submit-stripped - objcopy --remove-section .eh_frame target/"$target_name"/"$build_mode_dir"/basm-submit-stripped + objcopy --remove-section .eh_frame --remove-section .gcc_except_table --remove-section .gnu.hash target/"$target_name"/"$build_mode_dir"/basm-submit-stripped python3 scripts/static-pie-gen.py src/solution.rs "$target_name" target/"$target_name"/"$build_mode_dir"/basm-submit-stripped scripts/"$stub" "$lang_name" scripts/"$template" fi From f63de6c230c1b840ec207bbbde7f65b2e4da15f0 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Sat, 25 Nov 2023 17:33:04 +0900 Subject: [PATCH 12/52] short: remove unnecessary inlining prevention This was necessary when we used a hack to support debugging; this is no longer the case since we can now run the built executable natively. --- src/solution.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/solution.rs b/src/solution.rs index c8e6f2df..39565a53 100644 --- a/src/solution.rs +++ b/src/solution.rs @@ -1,11 +1,8 @@ use basm::platform::io::{Reader, Writer, Print}; - -#[cfg_attr(not(debug_assertions), inline(always))] -#[cfg_attr(debug_assertions, inline(never))] pub fn main() { let mut reader: Reader = Default::default(); let mut writer: Writer = Default::default(); let a = reader.i64(); let b = reader.i64(); writer.println(a + b); -} +} \ No newline at end of file From 9a8e6dc75997271693cd480538acceb89c823ff9 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Sun, 26 Nov 2023 21:52:00 +0900 Subject: [PATCH 13/52] short: use custom target to override options Specifically, we disable read-only relocations and stack probes. --- .cargo/x86_64-unknown-linux-gnu-short.json | 50 ++++++++++++++++++++++ Cargo.toml | 10 ++++- build.rs | 8 ++-- scripts/static-pie.sh | 14 +++++- src/lib.rs | 1 + 5 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 .cargo/x86_64-unknown-linux-gnu-short.json diff --git a/.cargo/x86_64-unknown-linux-gnu-short.json b/.cargo/x86_64-unknown-linux-gnu-short.json new file mode 100644 index 00000000..56da8ed9 --- /dev/null +++ b/.cargo/x86_64-unknown-linux-gnu-short.json @@ -0,0 +1,50 @@ +{ + "arch": "x86_64", + "cpu": "x86-64", + "crt-objects-fallback": "false", + "crt-static-respected": true, + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", + "dynamic-linking": true, + "eh-frame-header": false, + "env": "gnu", + "has-rpath": true, + "has-thread-local": true, + "is-builtin": false, + "linker-flavor": "gnu-cc", + "llvm-target": "x86_64-unknown-linux-gnu", + "max-atomic-width": 64, + "os": "linux", + "plt-by-default": false, + "position-independent-executables": true, + "pre-link-args": { + "gnu-cc": [ + "-m64" + ], + "gnu-lld-cc": [ + "-m64" + ] + }, + "relro-level": "off", + "stack-probes": { + "kind": "none" + }, + "static-position-independent-executables": true, + "supported-sanitizers": [ + "address", + "cfi", + "leak", + "memory", + "thread", + "safestack" + ], + "supported-split-debuginfo": [ + "packed", + "unpacked", + "off" + ], + "supports-xray": true, + "target-family": [ + "unix" + ], + "target-pointer-width": "64" +} \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index dbd1f2d4..a0047e08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,10 +24,18 @@ bench = false path = "src/bin/basm-submit.rs" [dependencies] -compiler_builtins = { version = "0.1.101", features = ["mem"] } libm = "0.2.7" ryu = "1.0" +[target.x86_64-pc-windows-msvc.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } +[target.x86_64-unknown-linux-gnu.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } +[target.i686-unknown-linux-gnu.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } +[target.wasm32-unknown-unknown.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } + [features] short = [] diff --git a/build.rs b/build.rs index 398c9cf1..ddac1f7c 100644 --- a/build.rs +++ b/build.rs @@ -18,7 +18,7 @@ fn main() { link_args_basm.push("/EMITPOGOPHASEINFO"); link_args_basm_submit.push("/ALIGN:128"); }, - "x86_64-unknown-linux-gnu" | "i686-unknown-linux-gnu" => { + "x86_64-unknown-linux-gnu" | "x86_64-unknown-linux-gnu-short" | "i686-unknown-linux-gnu" => { link_args_basm.push("-nostartfiles"); link_args_basm.push("-nostdlib"); link_args_basm.push("-static-pie"); @@ -27,12 +27,12 @@ fn main() { link_args_basm.push("-fno-unwind-tables"); link_args_basm.push("-fno-stack-protector"); link_args_basm.push("-fno-plt"); - if target == "x86_64-unknown-linux-gnu" { - link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro"); - } else { + if target == "i686-unknown-linux-gnu" { // Prevent linker from putting data into text, which is non-writable and hence not relocatable. // This prevents the hack for getting the _DYNAMIC symbol in the entrypoint. link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro,-z,notext"); + } else { + link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro"); } link_args_basm_submit.push("-Wl,-z,max-page-size=128"); }, diff --git a/scripts/static-pie.sh b/scripts/static-pie.sh index 9d9ee6ca..63570c39 100755 --- a/scripts/static-pie.sh +++ b/scripts/static-pie.sh @@ -56,13 +56,23 @@ else >&2 echo "Unknown build mode ${build_mode}" exit fi + +if [[ "$target_name" == "x86_64-unknown-linux-gnu" && "$*" == *"short"* ]]; then + target_name_cargo=".cargo/x86_64-unknown-linux-gnu-short.json" + target_name="x86_64-unknown-linux-gnu-short" + extra_config='-Zbuild-std=core,compiler_builtins,alloc -Zbuild-std-features=compiler-builtins-mem' +else + target_name_cargo="$target_name" + extra_config="" +fi + >&2 echo "Building project for target ${target_name}, language ${lang_name}, build mode ${build_mode}" binary_path=basm.bin if [[ "$build_mode" == "Debug" ]]; then - cargo +nightly build --target "$target_name" --bin basm-submit "$@" + cargo +nightly build $extra_config --target "$target_name_cargo" --bin basm-submit "$@" else - cargo +nightly build --target "$target_name" --bin basm-submit --release "$@" + cargo +nightly build $extra_config --target "$target_name_cargo" --bin basm-submit --release "$@" fi if [[ "$target_name" == "x86_64-pc-windows-msvc" ]]; then diff --git a/src/lib.rs b/src/lib.rs index 7e3035c5..888774ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(rustc_private)] #![feature(fn_align)] #![feature(maybe_uninit_slice)] #![feature(maybe_uninit_uninit_array)] From 811b09f259f62472263e1e51517fc77fe14f1f23 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 09:18:56 +0900 Subject: [PATCH 14/52] short: amd64-rust: shorten the prestub --- scripts/static-pie-prestub-amd64-print.py | 5 +- scripts/static-pie-prestub-amd64-short.asm | 90 +++++++++++++++++++++ scripts/static-pie-prestub-amd64-short.bin | Bin 0 -> 96 bytes scripts/static-pie-template-amd64-short.rs | 2 +- 4 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 scripts/static-pie-prestub-amd64-short.asm create mode 100644 scripts/static-pie-prestub-amd64-short.bin diff --git a/scripts/static-pie-prestub-amd64-print.py b/scripts/static-pie-prestub-amd64-print.py index de11e5eb..e8337d0d 100644 --- a/scripts/static-pie-prestub-amd64-print.py +++ b/scripts/static-pie-prestub-amd64-print.py @@ -1,5 +1,8 @@ +import sys + # read prestub -with open("static-pie-prestub-amd64.bin", "rb") as f: +fname = "static-pie-prestub-amd64.bin" if len(sys.argv) <= 1 else sys.argv[1] +with open(fname, "rb") as f: prestub = f.read() prestub = bytearray(prestub) if len(prestub) > 0 and prestub[-1] == 0: diff --git a/scripts/static-pie-prestub-amd64-short.asm b/scripts/static-pie-prestub-amd64-short.asm new file mode 100644 index 00000000..4fd26ddd --- /dev/null +++ b/scripts/static-pie-prestub-amd64-short.asm @@ -0,0 +1,90 @@ +; -*- tab-width: 4 -*- +; +; The prestub for amd64-rust target +; (prestub: the code that runs before the stub and sets the stage) +; +; build: nasm -f bin -O9 static-pie-prestub-amd64-short.asm -o static-pie-prestub-amd64-short.bin +; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-short.bin +; to obtain the form that can be embedded in Rust as inline assembly. + +BITS 64 +ORG 0 +section .text + +; Align stack to 16 byte boundary +; [rsp+ 32, rsp+120): PLATFORM_DATA +; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention) + enter 56, 0 + push 1 + pop rcx ; Enable ENV_FLAGS_LINUX_STYLE_CHKSTK outside Windows + call _t + +; svc_alloc_rwx for Windows and Linux +; rcx = size +; rdi = pointer to VirtualAlloc (must be supplied before prepending the mov instruction) +_svc_alloc_rwx: + push 9 + pop rax ; syscall id of x64 mmap + jecxz _decode + cdq ; rdx=0 + xor r9d, r9d ; offset + push rsi ; save rsi + xor edi, edi ; rdi=0 + mov esi, ecx ; size + mov dl, 7 ; protect (safe since we have ensured rdx=0) + push 0x22 + pop r10 ; flags + push -1 + pop r8 ; fd + syscall + pop rsi ; restore rsi +_ret: + ret +_svc_alloc_rwx_end: + +; Base91 decoder +_decode: + mov al, 0x1f +_decode_loop: + shl eax, 13 + lodsb + sub al, 0x24 + jc _ret + cdq + xchg eax, edx + lodsb + sub al, 0x24 + imul eax, eax, 91 + add eax, edx +_decode_output: + stosb + shr eax, 8 + test ah, 16 + jnz _decode_output + jmp _decode_loop + +; PLATFORM_DATA +_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx + pop rbx + push rbx + push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress + push rax ; PLATFORM_DATA[16..23] = win_kernel32 + push rcx ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK) + inc ecx + push rcx ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux) + sub rsp, 32 ; shadow space + call rbx ; svc_alloc_rwx + +; Current state: rax = new buffer, rdi = pointer to VirtualAlloc + push rax + xchg rax, rdi ; rdi = new buffer + +; Decode stub (rsi -> rdi) +; Current state: rdi = stub memory (by the previous instruction) +; rsi = STUB_BASE91 (by the Rust template) + xor ecx, ecx + call rbx + +; Call stub (it will perform the below operations) + pop rax + call rax ; This will jump to the start of the new buffer (stub) \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-short.bin b/scripts/static-pie-prestub-amd64-short.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae053f09468710c25082f59c804828a9b5c1caee GIT binary patch literal 96 zcmV-m0H6QJH~;`@0a@rg0001L30P<27nwyd$yPD{iO#YIY9c{eYX3o44+UPsuphzT z4Xi9Aa`l;#tSlsJz*_;(s=??8_QVi%_v_ Date: Tue, 28 Nov 2023 09:52:06 +0900 Subject: [PATCH 15/52] short: update reader.rs --- src/platform/io/reader.rs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/platform/io/reader.rs b/src/platform/io/reader.rs index 76bb27fa..d10e2ae1 100644 --- a/src/platform/io/reader.rs +++ b/src/platform/io/reader.rs @@ -50,20 +50,23 @@ impl Reader { if end <= self.len { /* data already available */ } else { - /* secure space by discarding the already-consumed buffer contents at front */ - if end > Self::BUF_LEN { + unsafe { + /* Secure space by discarding the already-consumed buffer contents at front. + * Note that we expect `readahead` to be small (<100 bytes), so we unconditionally + * copy the contents to the front to reduce code size. When the default buffer size + * is used (which is >100K), this will not happen often and hence shouldn't affect + * performance by a noticeable amount. */ let rem = self.len - self.off; - unsafe { core::ptr::copy(self.buf.as_ptr().add(self.off), self.buf.as_mut_ptr(), rem); } + core::ptr::copy(self.buf.as_ptr().add(self.off), self.buf.as_mut_ptr(), rem); self.len = rem; self.off = 0; - } - unsafe { + /* Although the buffer currently falls short of what has been requested, - * it may still be possible that a full token (which is short) - * is available within the remains. Thus, we check if we can return - * without invoking read_stdio. This is crucial for cases where - * the standard input is a pipe, which includes the local testing - * console environment. */ + * it may still be possible that a full token (which is short) + * is available within the remains. Thus, we check if we can return + * without invoking read_stdio. This is crucial for cases where + * the standard input is a pipe, which includes the local testing + * console environment. */ let mut white_pos = self.off; while white_pos < self.len { if self.buf[white_pos].assume_init() <= b' ' { @@ -73,11 +76,11 @@ impl Reader { } if white_pos == self.len { /* No whitespace has been found. We have to read. - * We try to read as much as possible at once. */ + * We try to read as much as possible at once. */ self.len += services::read_stdio(0, MaybeUninit::slice_assume_init_mut(&mut self.buf[self.len..Self::BUF_LEN])); } /* Add a null-terminator, whether or not the read was nonsaturating (for SIMD-accelerated unsafe integer read routines). - This is safe since we spare 8 bytes at the end of the buffer. */ + * This is safe since we spare 8 bytes at the end of the buffer. */ *self.buf[self.len].assume_init_mut() = 0u8; } } From 7f9dcc5a9797b3f7da4f88ff8d5e11ff759ee332 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 12:19:10 +0900 Subject: [PATCH 16/52] short: amd64-rust: shorten the prestub --- scripts/static-pie-prestub-amd64-short.asm | 10 +++------- scripts/static-pie-prestub-amd64-short.bin | Bin 96 -> 93 bytes scripts/static-pie-template-amd64-short.rs | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-short.asm b/scripts/static-pie-prestub-amd64-short.asm index 4fd26ddd..f4982524 100644 --- a/scripts/static-pie-prestub-amd64-short.asm +++ b/scripts/static-pie-prestub-amd64-short.asm @@ -14,7 +14,7 @@ section .text ; Align stack to 16 byte boundary ; [rsp+ 32, rsp+120): PLATFORM_DATA ; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention) - enter 56, 0 + enter 48, 0 push 1 pop rcx ; Enable ENV_FLAGS_LINUX_STYLE_CHKSTK outside Windows call _t @@ -72,7 +72,7 @@ _t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx push rcx ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK) inc ecx push rcx ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux) - sub rsp, 32 ; shadow space + sub rsp, 40 ; shadow space + compensation call rbx ; svc_alloc_rwx ; Current state: rax = new buffer, rdi = pointer to VirtualAlloc @@ -83,8 +83,4 @@ _t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx ; Current state: rdi = stub memory (by the previous instruction) ; rsi = STUB_BASE91 (by the Rust template) xor ecx, ecx - call rbx - -; Call stub (it will perform the below operations) - pop rax - call rax ; This will jump to the start of the new buffer (stub) \ No newline at end of file + jmp rbx ; This will jump to the start of the new buffer (stub) upon the ret instruction \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-short.bin b/scripts/static-pie-prestub-amd64-short.bin index ae053f09468710c25082f59c804828a9b5c1caee..31b5787fd7a1f612e610b3e8e2abc95d4e0a8a14 100644 GIT binary patch delta 19 bcmYd@WjbLnktvW% Date: Tue, 28 Nov 2023 14:09:45 +0900 Subject: [PATCH 17/52] short: update codegen.rs --- src/bin/codegen.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bin/codegen.rs b/src/bin/codegen.rs index 3086a395..eb294f14 100644 --- a/src/bin/codegen.rs +++ b/src/bin/codegen.rs @@ -234,11 +234,11 @@ extern "C" fn _start() { } /* We prevent inlining solution::main, since if the user allocates - * a large amount of stack memory there, it will be zero-initialized + * a large amount of stack memory there, it will be zero-initialized (or probed) * *before* we increase the stack limits if it is inlined into _start_rust. * This will cause stack overflow, thus we prevent it. */ -#[inline(never)] +#[cfg_attr(not(feature = "short"), inline(never))] fn _call_main() { solution::main(); } From 93774f0a12a4c4e5fb4c029be3ec1d90edc5e365 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 14:15:25 +0900 Subject: [PATCH 18/52] Update services.rs --- src/platform/services.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/platform/services.rs b/src/platform/services.rs index 6325b290..831654df 100644 --- a/src/platform/services.rs +++ b/src/platform/services.rs @@ -27,7 +27,6 @@ pub const ENV_ID_LINUX: u64 = 2; pub const ENV_ID_WASM: u64 = 3; pub const ENV_FLAGS_LINUX_STYLE_CHKSTK: u64 = 0x0001; // disables __chkstk in binaries compiled with Windows target pub const ENV_FLAGS_NATIVE: u64 = 0x0002; // indicates the binary is running without the loader -pub const ENV_FLAGS_BREAKPOINT: u64 = 0x0004; // breakpoint at entrypoint or startup routine #[repr(C, packed)] #[allow(non_snake_case)] From cec65a0737f909c2ee5ee8a93ae6814abd23d002 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 14:29:22 +0900 Subject: [PATCH 19/52] short: update reader.rs and writer.rs --- src/platform/io/reader.rs | 4 +++- src/platform/io/writer.rs | 12 +++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/platform/io/reader.rs b/src/platform/io/reader.rs index d10e2ae1..b9c04154 100644 --- a/src/platform/io/reader.rs +++ b/src/platform/io/reader.rs @@ -57,7 +57,9 @@ impl Reader { * is used (which is >100K), this will not happen often and hence shouldn't affect * performance by a noticeable amount. */ let rem = self.len - self.off; - core::ptr::copy(self.buf.as_ptr().add(self.off), self.buf.as_mut_ptr(), rem); + for i in 0..rem { + *self.buf[i].assume_init_mut() = self.buf[self.off + i].assume_init(); + } self.len = rem; self.off = 0; diff --git a/src/platform/io/writer.rs b/src/platform/io/writer.rs index 32108bd9..28472462 100644 --- a/src/platform/io/writer.rs +++ b/src/platform/io/writer.rs @@ -124,11 +124,12 @@ impl Writer { self.off += 1; } pub fn byte(&mut self, b: u8) { - self.try_flush(1); + self.try_flush(2); self.byte_unchecked(b); } // This function ensures an extra byte in the buffer to make sure that // println() can safely use `byte_unchecked`. + #[cfg(not(feature = "short"))] pub fn bytes(&mut self, mut s: &[u8]) { while !s.is_empty() { let rem = s.len().min(self.buf[self.off..].len()); @@ -138,6 +139,15 @@ impl Writer { self.try_flush(1); } } + // This function ensures an extra byte in the buffer to make sure that + // println() can safely use `byte_unchecked`. This is achieved by + // calling `self.try_flush(2)` (instead of `self.try_flush(1)`) in byte(). + #[cfg(feature = "short")] + pub fn bytes(&mut self, s: &[u8]) { + for x in s { + self.byte(*x); + } + } pub fn str(&mut self, s: &str) { self.bytes(s.as_bytes()); } From 3ca0d088af1d02ae67e1165a7e3a4af8fa7767dd Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 14:50:27 +0900 Subject: [PATCH 20/52] short: amd64-linux: shorten _start --- src/bin/codegen.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/bin/codegen.rs b/src/bin/codegen.rs index eb294f14..266e565e 100644 --- a/src/bin/codegen.rs +++ b/src/bin/codegen.rs @@ -59,6 +59,7 @@ unsafe extern "win64" fn _start() -> ! { // on the 16-byte boundary BEFORE `call` instruction. // However, when called as the entrypoint by the Linux OS, // RSP will be 16-byte aligned AFTER `call` instruction. + #[cfg(not(feature = "short"))] asm!( "clc", // CF=0 (running without loader) / CF=1 (running with loader) "mov rbx, rcx", // Save PLATFORM_DATA table @@ -80,6 +81,21 @@ unsafe extern "win64" fn _start() -> ! { sym _start_rust, options(noreturn) ); + // For "short", we always assume we are running with loader on Linux, + // since "short" is only meaningful when submitting to online judges (not local test runs). + #[cfg(feature = "short")] + asm!( + "clc", // Not needed but packager wants it + "mov rbx, rcx", // Save PLATFORM_DATA table + "lea rdi, [rip + __ehdr_start]", + "lea rsi, [rip + _DYNAMIC]", + "call {0}", + "mov rdi, rbx", + "call {1}", // This won't return since on Linux we invoke SYS_exitgroup in binary + sym loader::amd64_elf::relocate, + sym _start_rust, + options(noreturn) + ); } #[cfg(target_os = "windows")] From f3528eb4e5cef18590699b8e3dab141d20ea8e8c Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 15:31:42 +0900 Subject: [PATCH 21/52] short: update writer.rs --- src/platform/io/writer.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/platform/io/writer.rs b/src/platform/io/writer.rs index 28472462..a32f2738 100644 --- a/src/platform/io/writer.rs +++ b/src/platform/io/writer.rs @@ -245,16 +245,21 @@ impl Writer { } #[cfg(feature = "short")] pub fn u64(&mut self, mut n: u64) { - let mut buf: [MaybeUninit; 20] = MaybeUninit::uninit_array(); - let mut offset = buf.len() - 1; - buf[offset].write(b'0' + (n % 10) as u8); - n /= 10; - while n > 0 { - offset -= 1; - buf[offset].write(b'0' + (n % 10) as u8); + self.try_flush(21); + let mut i = self.off; + loop { + self.buf[i].write(b'0' + (n % 10) as u8); n /= 10; + i += 1; + if n == 0 { break; } + } + let mut j = self.off; + self.off = i; + while j < i { + i -= 1; + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf).swap(j, i); } + j += 1; } - self.bytes(unsafe { MaybeUninit::slice_assume_init_ref(&buf[offset..]) }); } pub fn i128(&mut self, n: i128) { if n < 0 { From 79c22963e0c8564035fd124a446060a16bf83143 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 17:11:47 +0900 Subject: [PATCH 22/52] short: update mod.rs --- src/platform/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/platform/mod.rs b/src/platform/mod.rs index c16d4023..5f2e0f30 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -35,7 +35,10 @@ pub fn init(platform_data_by_loader: usize) { }, _ => { /* use loader services for allocation */ + #[cfg(not(feature = "short"))] os::unknown::init(); + #[cfg(feature = "short")] + unreachable!(); } } } From 97ba8a5be0e41aaf6d60c440b771a4fe5021f141 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 17:21:18 +0900 Subject: [PATCH 23/52] short: amd64-linux: reduce level of indirections --- src/platform/os/linux.rs | 14 +++++++++++--- src/platform/services.rs | 22 ++++++++++------------ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/platform/os/linux.rs b/src/platform/os/linux.rs index e063b54b..4e61b288 100644 --- a/src/platform/os/linux.rs +++ b/src/platform/os/linux.rs @@ -1,4 +1,4 @@ -use super::super::{allocator, services}; +use super::super::allocator; use super::super::malloc::{dlmalloc, dlmalloc_linux}; @@ -209,6 +209,7 @@ unsafe fn dlmalloc_realloc(ptr: *mut u8, old_size: usize, old_align: usize, new_ } } +#[cfg(not(all(feature = "short", target_os = "linux")))] #[cfg(target_arch = "x86_64")] mod services_override { #[inline(always)] @@ -220,6 +221,7 @@ mod services_override { super::syscall::write(fd, buf, count) } } +#[cfg(not(all(feature = "short", target_os = "linux")))] #[cfg(target_arch = "x86")] mod services_override { #[inline(always)] @@ -242,6 +244,7 @@ pub unsafe fn init() { * Thus, instead of parsing the ELF section, we just invoke * the kernel APIs directly. */ #[cfg(not(feature = "short"))] { + use super::super::services; let pd = services::platform_data(); if pd.env_flags & services::ENV_FLAGS_NATIVE != 0 { let mut rlim: syscall::RLimit = Default::default(); @@ -259,6 +262,11 @@ pub unsafe fn init() { dlmalloc_dealloc, dlmalloc_realloc, ); - services::install_single_service(5, services_override::svc_read_stdio as usize); - services::install_single_service(6, services_override::svc_write_stdio as usize); + + /* "short" on "Linux" will use syscalls directly to reduce code size */ + #[cfg(not(all(feature = "short", target_os = "linux")))] { + use super::super::services; + services::install_single_service(5, services_override::svc_read_stdio as usize); + services::install_single_service(6, services_override::svc_write_stdio as usize); + } } \ No newline at end of file diff --git a/src/platform/services.rs b/src/platform/services.rs index 831654df..ba271538 100644 --- a/src/platform/services.rs +++ b/src/platform/services.rs @@ -39,66 +39,64 @@ pub struct PlatformData { pub fn_table: [usize; 7], } -#[inline(always)] pub fn install(platform_data_by_loader: usize) { unsafe { PLATFORM_DATA = platform_data_by_loader; } } -#[inline(always)] unsafe fn addr(fn_id: usize) -> usize { core::ptr::read((PLATFORM_DATA + 32 + fn_id * core::mem::size_of::()) as *mut usize) } -#[inline(always)] pub unsafe fn install_single_service(fn_id: usize, fn_ptr: usize) { core::ptr::write((PLATFORM_DATA + 32 + fn_id * core::mem::size_of::()) as *mut usize, fn_ptr) } -//#[inline(always)] pub unsafe fn alloc(size: usize, align: usize) -> *mut u8 { let fn_ptr: native_func::A = core::mem::transmute(addr(1)); fn_ptr(size, align) } -//#[inline(always)] pub unsafe fn alloc_zeroed(size: usize, align: usize) -> *mut u8 { let fn_ptr: native_func::A = core::mem::transmute(addr(2)); fn_ptr(size, align) } -//#[inline(always)] pub unsafe fn dealloc(ptr: *mut u8, size: usize, align: usize) { let fn_ptr: native_func::B = core::mem::transmute(addr(3)); fn_ptr(ptr, size, align) } -//#[inline(always)] pub unsafe fn realloc(ptr: *mut u8, old_size: usize, old_align: usize, new_size: usize) -> *mut u8 { let fn_ptr: native_func::C = core::mem::transmute(addr(4)); fn_ptr(ptr, old_size, old_align, new_size) } -#[inline(always)] pub fn read_stdio(fd: usize, buf: &mut [u8]) -> usize { + #[cfg(not(all(feature = "short", target_os = "linux")))] unsafe { let fn_ptr: native_func::E = core::mem::transmute(addr(5)); fn_ptr(fd, buf.as_mut_ptr(), buf.len()) } + #[cfg(all(feature = "short", target_os = "linux"))] + unsafe { + super::os::linux::syscall::read(fd, buf.as_mut_ptr(), buf.len()) + } } -#[inline(always)] pub fn write_stdio(fd: usize, buf: &[u8]) -> usize { + #[cfg(not(all(feature = "short", target_os = "linux")))] unsafe { let fn_ptr: native_func::F = core::mem::transmute(addr(6)); fn_ptr(fd, buf.as_ptr(), buf.len()) } + #[cfg(all(feature = "short", target_os = "linux"))] + unsafe { + super::os::linux::syscall::write(fd, buf.as_ptr(), buf.len()) + } } -#[inline(always)] pub fn platform_data() -> PlatformData { unsafe { let pd: *const PlatformData = PLATFORM_DATA as *const PlatformData; core::ptr::read_unaligned(pd) } } -#[inline(always)] pub fn get_exit_status() -> i32 { unsafe { EXIT_CODE } } -#[inline(always)] pub fn set_exit_status(code: i32) { unsafe { EXIT_CODE = code; } } \ No newline at end of file From 9b05218ddff592e5b363804a823baf75fef1f429 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 23:28:15 +0900 Subject: [PATCH 24/52] short: update reader.rs --- src/platform/io/reader.rs | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/platform/io/reader.rs b/src/platform/io/reader.rs index b9c04154..0d811eda 100644 --- a/src/platform/io/reader.rs +++ b/src/platform/io/reader.rs @@ -46,22 +46,24 @@ impl Reader { pub fn try_refill(&mut self, readahead: usize) -> usize { /* readahead cannot exceed the buffer size */ assert!(readahead <= Self::BUF_LEN); - let end = self.off + readahead; - if end <= self.len { - /* data already available */ - } else { - unsafe { + unsafe { + let mut rem = self.len - self.off; + if rem < readahead { /* Secure space by discarding the already-consumed buffer contents at front. * Note that we expect `readahead` to be small (<100 bytes), so we unconditionally * copy the contents to the front to reduce code size. When the default buffer size * is used (which is >100K), this will not happen often and hence shouldn't affect * performance by a noticeable amount. */ - let rem = self.len - self.off; + let mut white_cnt = 0u32; + let mut j = self.off; for i in 0..rem { - *self.buf[i].assume_init_mut() = self.buf[self.off + i].assume_init(); + let c = self.buf[j].assume_init(); + if c <= b' ' { + white_cnt += 1; + } + *self.buf[i].assume_init_mut() = c; + j += 1; } - self.len = rem; - self.off = 0; /* Although the buffer currently falls short of what has been requested, * it may still be possible that a full token (which is short) @@ -69,24 +71,23 @@ impl Reader { * without invoking read_stdio. This is crucial for cases where * the standard input is a pipe, which includes the local testing * console environment. */ - let mut white_pos = self.off; - while white_pos < self.len { - if self.buf[white_pos].assume_init() <= b' ' { - break; - } - white_pos += 1; - } - if white_pos == self.len { + if white_cnt == 0 { /* No whitespace has been found. We have to read. * We try to read as much as possible at once. */ - self.len += services::read_stdio(0, MaybeUninit::slice_assume_init_mut(&mut self.buf[self.len..Self::BUF_LEN])); + rem += services::read_stdio(0, MaybeUninit::slice_assume_init_mut(&mut self.buf[rem..Self::BUF_LEN])); } /* Add a null-terminator, whether or not the read was nonsaturating (for SIMD-accelerated unsafe integer read routines). * This is safe since we spare 8 bytes at the end of the buffer. */ - *self.buf[self.len].assume_init_mut() = 0u8; + *self.buf[rem].assume_init_mut() = 0u8; + + /* Save the new data length */ + self.len = rem; + self.off = 0; + } else { + /* data already available */ } + rem } - self.len - self.off } pub fn try_consume(&mut self, bytes: usize) -> usize { let mut consumed = 0; From c5225a846e795a7885122f45dfa5d6cdfd261633 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 23:37:54 +0900 Subject: [PATCH 25/52] Update codegen.rs --- src/bin/codegen.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bin/codegen.rs b/src/bin/codegen.rs index 266e565e..c18f0bc6 100644 --- a/src/bin/codegen.rs +++ b/src/bin/codegen.rs @@ -86,6 +86,7 @@ unsafe extern "win64" fn _start() -> ! { #[cfg(feature = "short")] asm!( "clc", // Not needed but packager wants it + "push rcx", // Align stack "mov rbx, rcx", // Save PLATFORM_DATA table "lea rdi, [rip + __ehdr_start]", "lea rsi, [rip + _DYNAMIC]", From 3099aefb8847d187ac55c82c0e899d0b889450ee Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Tue, 28 Nov 2023 23:48:51 +0900 Subject: [PATCH 26/52] short: update amd64_elf.rs --- src/platform/loader/amd64_elf.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/platform/loader/amd64_elf.rs b/src/platform/loader/amd64_elf.rs index 0ccf07e4..07ae8431 100644 --- a/src/platform/loader/amd64_elf.rs +++ b/src/platform/loader/amd64_elf.rs @@ -99,9 +99,9 @@ pub unsafe extern "sysv64" fn relocate( if ptr_rela == 0 { return; } + relasz += ptr_rela; - let mut j = 0; - while j < relasz { + while ptr_rela < relasz { let pst_rela = ptr_rela as *mut Elf64Rela; let ul_offset = (*pst_rela).r_offset; let ul_info = (*pst_rela).r_info; @@ -116,7 +116,6 @@ pub unsafe extern "sysv64" fn relocate( /* not implemented */ panic!(); } - j += relaent; ptr_rela += relaent; } } \ No newline at end of file From 46f6f35863fa144a04a36423c67fe749be9f1c21 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Wed, 29 Nov 2023 07:39:23 +0900 Subject: [PATCH 27/52] short: update linux.rs --- src/platform/os/linux.rs | 60 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/src/platform/os/linux.rs b/src/platform/os/linux.rs index 4e61b288..b101f589 100644 --- a/src/platform/os/linux.rs +++ b/src/platform/os/linux.rs @@ -42,6 +42,60 @@ pub mod syscall { pub rlim_max: usize, } + #[cfg(target_arch = "x86_64")] + #[inline(always)] + pub unsafe fn syscall1( + call_id: usize, + arg0: usize, + ) -> usize { + let out; + asm!( + "syscall", + in("rax") call_id, + in("rdi") arg0, + lateout("rax") out, + out("rcx") _, + out("r11") _ + ); + out + } + #[cfg(target_arch = "x86")] + pub unsafe fn syscall1( + call_id: usize, + arg0: usize, + ) -> usize { + syscall(call_id, arg0, 0, 0, 0, 0, 0) + } + #[cfg(target_arch = "x86_64")] + #[inline(always)] + pub unsafe fn syscall3( + call_id: usize, + arg0: usize, + arg1: usize, + arg2: usize, + ) -> usize { + let out; + asm!( + "syscall", + in("rax") call_id, + in("rdi") arg0, + in("rsi") arg1, + in("rdx") arg2, + lateout("rax") out, + out("rcx") _, + out("r11") _ + ); + out + } + #[cfg(target_arch = "x86")] + unsafe extern "cdecl" fn syscall3( + call_id: usize, + arg0: usize, + arg1: usize, + arg2: usize, + ) -> usize { + syscall(call_id, arg0, arg1, arg2, 0, 0, 0) + } #[cfg(target_arch = "x86_64")] #[inline(always)] pub unsafe fn syscall( @@ -149,7 +203,7 @@ pub mod syscall { buf: *mut u8, count: usize ) -> usize { - syscall(id_list::READ, fd, buf as usize, count, 0, 0, 0) + syscall3(id_list::READ, fd, buf as usize, count) } #[inline(always)] pub unsafe fn write( @@ -157,13 +211,13 @@ pub mod syscall { buf: *const u8, count: usize ) -> usize { - syscall(id_list::WRITE, fd, buf as usize, count, 0, 0, 0) + syscall3(id_list::WRITE, fd, buf as usize, count) } #[inline(always)] pub unsafe fn exit_group( status: usize ) -> ! { - syscall(id_list::EXIT_GROUP, status, 0, 0, 0, 0, 0); + syscall1(id_list::EXIT_GROUP, status); unreachable!() } #[inline(always)] From a4e935b54ac272439ede713bf58a42d3e05064ce Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 17:41:25 +0900 Subject: [PATCH 28/52] short: support packaging without compression Reduces the generated code length for extremely short codes. --- scripts/static-pie-gen.py | 65 ++++++++----- scripts/static-pie-prestub-amd64-shorter.asm | 93 +++++++++++++++++++ scripts/static-pie-prestub-amd64-shorter.bin | Bin 0 -> 100 bytes scripts/static-pie-template-amd64-shorter.rs | 8 ++ 4 files changed, 141 insertions(+), 25 deletions(-) create mode 100644 scripts/static-pie-prestub-amd64-shorter.asm create mode 100644 scripts/static-pie-prestub-amd64-shorter.bin create mode 100644 scripts/static-pie-template-amd64-shorter.rs diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index c4aadcae..e91fe862 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -67,6 +67,11 @@ sol[-1] = sol[-1].rstrip() sol = "".join(sol) +# binary (raw) +code_raw_b91 = base91.encode(memory_bin[:-4]).decode('ascii') +code_raw_b91_len = len(code_raw_b91) +code_raw_b91 = '"' + code_raw_b91 + '"' + # binary with open(compressed_binary_path, "rb") as f: code = f.read() @@ -115,29 +120,39 @@ stub_b85 = '"' + stub_b85 + '"' # template -with open(template_path, encoding='utf8') as f: - template = f.read() -template = template.replace("\ufeff", "") - -# putting it all together -# reference: https://stackoverflow.com/a/15448887 -def multiple_replace(string, rep_dict): - pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL) - return pattern.sub(lambda x: rep_dict[x.group(0)], string) - -out = multiple_replace(template, { - "$$$$solution_src$$$$": sol, - "$$$$stub_raw$$$$": stub_raw, - "$$$$stub_base85$$$$": stub_b85, - "$$$$stub_len$$$$": str(len(stub)), - "$$$$stub_base85_len$$$$": str(stub_b85_len), - "$$$$stub_base91$$$$": stub_b91, - "$$$$stub_base91_len$$$$": str(stub_b91_len), - "$$$$binary_base85$$$$": r, - "$$$$binary_base85_len$$$$": str(len(code_b85)), - "$$$$binary_base91$$$$": code_b91, - "$$$$binary_base91_len$$$$": str(code_b91_len), - "$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)), - "$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']), -}) +template_candidates = [template_path] +if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path: + template_candidates.append(template_path.replace("short", "shorter")) + +out = None +for each_template_path in template_candidates: + with open(each_template_path, encoding='utf8') as f: + template = f.read() + template = template.replace("\ufeff", "") + + # putting it all together + # reference: https://stackoverflow.com/a/15448887 + def multiple_replace(string, rep_dict): + pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL) + return pattern.sub(lambda x: rep_dict[x.group(0)], string) + + out_candidate = multiple_replace(template, { + "$$$$solution_src$$$$": sol, + "$$$$stub_raw$$$$": stub_raw, + "$$$$stub_base85$$$$": stub_b85, + "$$$$stub_len$$$$": str(len(stub)), + "$$$$stub_base85_len$$$$": str(stub_b85_len), + "$$$$stub_base91$$$$": stub_b91, + "$$$$stub_base91_len$$$$": str(stub_b91_len), + "$$$$binary_base85$$$$": r, + "$$$$binary_base85_len$$$$": str(len(code_b85)), + "$$$$binary_base91$$$$": code_b91, + "$$$$binary_base91_len$$$$": str(code_b91_len), + "$$$$binary_raw_base91$$$$": code_raw_b91, + "$$$$binary_raw_base91_len$$$$": str(code_raw_b91_len), + "$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)), + "$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']), + }) + if out is None or len(out_candidate) < len(out): + out = out_candidate print(out) \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm new file mode 100644 index 00000000..3f3037b4 --- /dev/null +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -0,0 +1,93 @@ +; -*- tab-width: 4 -*- +; +; The prestub for amd64-rust target +; (prestub: the code that runs before the stub and sets the stage) +; +; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter.asm -o static-pie-prestub-amd64-shorter.bin +; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter.bin +; to obtain the form that can be embedded in Rust as inline assembly. + +BITS 64 +ORG 0 +section .text + +; Align stack to 16 byte boundary +; [rsp+ 32, rsp+120): PLATFORM_DATA +; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention) + enter 48, 0 + push 1 + pop rcx ; Enable ENV_FLAGS_LINUX_STYLE_CHKSTK outside Windows + call _t + +; svc_alloc_rwx for Windows and Linux +; rcx = size +; rdi = pointer to VirtualAlloc (must be supplied before prepending the mov instruction) +_svc_alloc_rwx: + push 9 + pop rax ; syscall id of x64 mmap + jecxz _decode + cdq ; rdx=0 + xor r9d, r9d ; offset + push rsi ; save rsi + xor edi, edi ; rdi=0 + mov esi, ecx ; size + mov dl, 7 ; protect (safe since we have ensured rdx=0) + push 0x22 + pop r10 ; flags + push -1 + pop r8 ; fd + syscall + pop rsi ; restore rsi +_ret: + ret +_svc_alloc_rwx_end: + +; Base91 decoder +_decode: + mov al, 0x1f +_decode_loop: + shl eax, 13 + lodsb + sub al, 0x24 + jc _ret + cdq + xchg eax, edx + lodsb + sub al, 0x24 + imul eax, eax, 91 + add eax, edx +_decode_output: + stosb + shr eax, 8 + test ah, 16 + jnz _decode_output + jmp _decode_loop + +; PLATFORM_DATA +_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx + pop rbx + push rbx + push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress + push rax ; PLATFORM_DATA[16..23] = win_kernel32 + push rcx ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK) + inc ecx + push rcx ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux) + push rsp + call rbx ; svc_alloc_rwx + +; Current state: rax = new buffer + push rax + xchg rax, rdi ; rdi = new buffer + +; Decode stub (rsi -> rdi) +; Current state: rdi = target memory (by the previous instruction) +; rsi = STUB_BASE91 (by the Rust template) + xor ecx, ecx + call rbx + +; Jump to entrypoint + mov eax, dword [rdi-4] + pop rcx + add rax, rcx + pop rcx + call rax \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a281f78636564308ced959386e457a44ecbbc82 GIT binary patch literal 100 zcmV-q0Gt2FFaQ8*0a@rg0001L30P<27nwyd$yPD{iO#YIY9c{eYX3o44+UPsuphzT z4Xi9Aa`l;#tSlsJz*_;(s=??8_QVi%_v_ Date: Thu, 30 Nov 2023 18:29:53 +0900 Subject: [PATCH 29/52] short: run-length encoding (RLE) for zeros --- scripts/base91.py | 40 ++++++++++++++++--- scripts/static-pie-gen.py | 2 +- scripts/static-pie-prestub-amd64-shorter.asm | 14 ++++++- scripts/static-pie-prestub-amd64-shorter.bin | Bin 100 -> 117 bytes scripts/static-pie-template-amd64-shorter.rs | 2 +- 5 files changed, 48 insertions(+), 10 deletions(-) diff --git a/scripts/base91.py b/scripts/base91.py index 8778d797..c3ec28d2 100644 --- a/scripts/base91.py +++ b/scripts/base91.py @@ -1,4 +1,29 @@ -def encode(x): +def encode(x_in, use_rle=False): + sharp_insertion_points = [] + if use_rle: + current_bits, current_bytes, i = 0, 0, 0 + x = bytearray() + while i < len(x_in): + current_bits += 13 + while current_bytes < current_bits // 8: + if i >= len(x_in): + break + x.append(x_in[i]) + current_bytes += 1 + i += 1 + if len(x) > 0 and x[-1] == 0: + zeros_cnt = 1 + while i - 1 + zeros_cnt < len(x_in) and zeros_cnt < 255 and x_in[i - 1 + zeros_cnt] == 0: + zeros_cnt += 1 + if zeros_cnt >= 2: + x.pop() + x.append(zeros_cnt) + sharp_insertion_points.append((current_bits // 13 * 2) + len(sharp_insertion_points)) + i += zeros_cnt - 1 + sharp_insertion_points = list(reversed(sharp_insertion_points)) + else: + x = x_in + out = [] i = 0 cnt5, stack5 = 0, [] @@ -21,11 +46,14 @@ def encode(x): if bits < 13: stack5.append((pos, bits)) cnt5 -= 8 - ret = bytearray(2 * len(out) + 1) - for i in range(len(out)): - ret[2 * i + 0] = 0x24 + (out[i] % 91) - ret[2 * i + 1] = 0x24 + (out[i] // 91) - ret[-1] = ord('!') + ret = bytearray() + for pack in out: + ret.append(0x24 + (pack % 91)) + ret.append(0x24 + (pack // 91)) + if len(sharp_insertion_points) > 0 and len(ret) == sharp_insertion_points[-1]: + ret.append(ord(b'#')) + sharp_insertion_points.pop() + ret.append(ord(b'!')) return bytes(ret) if __name__ == '__main__': diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index e91fe862..64b72daf 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -68,7 +68,7 @@ sol = "".join(sol) # binary (raw) -code_raw_b91 = base91.encode(memory_bin[:-4]).decode('ascii') +code_raw_b91 = base91.encode(memory_bin[:-4], use_rle=True).decode('ascii') code_raw_b91_len = len(code_raw_b91) code_raw_b91 = '"' + code_raw_b91 + '"' diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 3f3037b4..7ca0551f 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -47,10 +47,13 @@ _decode: mov al, 0x1f _decode_loop: shl eax, 13 +_decode_loop_2: lodsb - sub al, 0x24 - jc _ret + sub al, 0x23 cdq + jc _ret + jz _decode_zeros + dec al xchg eax, edx lodsb sub al, 0x24 @@ -62,6 +65,13 @@ _decode_output: test ah, 16 jnz _decode_output jmp _decode_loop +_decode_zeros: + xchg eax, edx + movzx ecx, byte [rdi-1] + dec rdi + rep stosb + xchg eax, edx + jmp _decode_loop_2 ; PLATFORM_DATA _t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 0a281f78636564308ced959386e457a44ecbbc82..8e93ca37bc150f4261d8239636b5ca290f77d941 100644 GIT binary patch delta 60 zcmV-C0K@-ebqUBY003$MS?EoX2`4EdnR4`W7XHYRtSlsJz*_;(s=??8_QVi%_v_!1 S54KPLNdM3Cs*>x~kw_dHR~`=l delta 43 zcmV+`0M!3=WC_SH003$MS?D~G2`3mNa`l;#tSlsJz*_;(s=??8_QVi%_v_=4SsZBG B6AAzT diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index a80aa62b..000d6c7c 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 0e859016a000030c8h,6758096a0000003ch,3156c931459917e3h,41226a07b2ce89ffh,5e050f5841ff6a5ah,2cac0de0c11fb0c3h,242cac9299f57224h,0e8c1aad0015bc06bh,0e3ebf77510c4f608h,51c1ff515052535bh,0c931974850d3ff54h,14859fc478bd3ffh,0d0ff59c8h",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 0e859016a000030c8h,6758096a0000004dh,3156c931459917e3h,41226a07b2ce89ffh,5e050f5841ff6a5ah,2cac0de0c11fb0c3h,0c8fe1674f4729923h,15bc06b242cac92h,10c4f608e8c1aad0h,4fb60f92dfebf775h,0eb92aaf3cfff48ffh,0c1ff515052535bd5h,31974850d3ff5451h,4859fc478bd3ffc9h,0d0ff59c801h",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file From 5dff3515478b1d9aeb9c1e495cd707bbc23cf12d Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 18:36:12 +0900 Subject: [PATCH 30/52] short: shorten the shorter stub --- scripts/static-pie-prestub-amd64-shorter.asm | 55 +++++++------------ scripts/static-pie-prestub-amd64-shorter.bin | Bin 117 -> 98 bytes scripts/static-pie-template-amd64-shorter.rs | 2 +- 3 files changed, 20 insertions(+), 37 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 7ca0551f..fed8872a 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -14,23 +14,18 @@ section .text ; Align stack to 16 byte boundary ; [rsp+ 32, rsp+120): PLATFORM_DATA ; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention) - enter 48, 0 - push 1 - pop rcx ; Enable ENV_FLAGS_LINUX_STYLE_CHKSTK outside Windows - call _t + enter 56, 0 -; svc_alloc_rwx for Windows and Linux -; rcx = size -; rdi = pointer to VirtualAlloc (must be supplied before prepending the mov instruction) +; svc_alloc_rwx for Linux _svc_alloc_rwx: push 9 pop rax ; syscall id of x64 mmap - jecxz _decode cdq ; rdx=0 xor r9d, r9d ; offset push rsi ; save rsi xor edi, edi ; rdi=0 - mov esi, ecx ; size + push 1 + pop rsi ; size mov dl, 7 ; protect (safe since we have ensured rdx=0) push 0x22 pop r10 ; flags @@ -38,20 +33,28 @@ _svc_alloc_rwx: pop r8 ; fd syscall pop rsi ; restore rsi -_ret: - ret -_svc_alloc_rwx_end: + +; PLATFORM_DATA +_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx + push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress + push rax ; PLATFORM_DATA[16..23] = win_kernel32 + push 1 ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK) + push 2 ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux) + +; Current state: rax = new buffer + push rax + xchg rax, rdi ; rdi = new buffer ; Base91 decoder _decode: - mov al, 0x1f + mov al, 0x1f ; syscall preserves rax; hence at this point rax=9 _decode_loop: shl eax, 13 _decode_loop_2: lodsb sub al, 0x23 cdq - jc _ret + jc _jump_to_entrypoint jz _decode_zeros dec al xchg eax, edx @@ -73,31 +76,11 @@ _decode_zeros: xchg eax, edx jmp _decode_loop_2 -; PLATFORM_DATA -_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx - pop rbx - push rbx - push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress - push rax ; PLATFORM_DATA[16..23] = win_kernel32 - push rcx ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK) - inc ecx - push rcx ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux) - push rsp - call rbx ; svc_alloc_rwx - -; Current state: rax = new buffer - push rax - xchg rax, rdi ; rdi = new buffer - -; Decode stub (rsi -> rdi) -; Current state: rdi = target memory (by the previous instruction) -; rsi = STUB_BASE91 (by the Rust template) - xor ecx, ecx - call rbx - ; Jump to entrypoint +_jump_to_entrypoint: mov eax, dword [rdi-4] pop rcx add rax, rcx + push rsp pop rcx call rax \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 8e93ca37bc150f4261d8239636b5ca290f77d941..1f12607d06b6d2421752c5870b665c1cdccd2696 100644 GIT binary patch delta 68 zcmV-K0K5NnVlv1$003$USeZpJ$yPD{Y5`ud2WlcgT5A76SPunWQc!9EY64J5m#`nf a;0>%SBbjm~ku)U>i%0xfNCC)HS^v;6fEh^u delta 87 zcmV-d0I2_Bbu`E@003$MS?Emw003$USZCuGnME Date: Thu, 30 Nov 2023 20:57:06 +0900 Subject: [PATCH 31/52] short: shorten the shorter prestub --- scripts/static-pie-gen.py | 2 +- scripts/static-pie-prestub-amd64-shorter.asm | 5 ++--- scripts/static-pie-prestub-amd64-shorter.bin | Bin 98 -> 96 bytes scripts/static-pie-template-amd64-shorter.rs | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index 64b72daf..61a2f886 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -68,7 +68,7 @@ sol = "".join(sol) # binary (raw) -code_raw_b91 = base91.encode(memory_bin[:-4], use_rle=True).decode('ascii') +code_raw_b91 = base91.encode(memory_bin, use_rle=True).decode('ascii') code_raw_b91_len = len(code_raw_b91) code_raw_b91 = '"' + code_raw_b91 + '"' diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index fed8872a..9886b386 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -78,9 +78,8 @@ _decode_zeros: ; Jump to entrypoint _jump_to_entrypoint: - mov eax, dword [rdi-4] - pop rcx - add rax, rcx + pop rax + add rax, qword [rdi-8] push rsp pop rcx call rax \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 1f12607d06b6d2421752c5870b665c1cdccd2696..7b5d9308a9d45d92a77b62a9348235a679031c83 100644 GIT binary patch delta 14 VcmYdFm=Mkx;lb?wBP8F1`z-N delta 16 XcmYdDnh?(2?fxgygYiU2 Date: Thu, 30 Nov 2023 21:17:52 +0900 Subject: [PATCH 32/52] short: update amd64_elf.rs --- src/platform/loader/amd64_elf.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/platform/loader/amd64_elf.rs b/src/platform/loader/amd64_elf.rs index 07ae8431..f8ff2bce 100644 --- a/src/platform/loader/amd64_elf.rs +++ b/src/platform/loader/amd64_elf.rs @@ -49,6 +49,8 @@ There are currently three files licensed under GPLv2+: #![allow(clippy::cmp_null)] +use core::mem::MaybeUninit; + // Dynamic section entry types const DT_RELA: u64 = 7; const DT_RELASZ: u64 = 8; @@ -78,14 +80,14 @@ pub unsafe extern "sysv64" fn relocate( ) { let mut ptr_dyn: *const Elf64Dyn = addr_dynamic_section as *const Elf64Dyn; let mut ptr_rela = 0; - let mut relasz = 0; - let mut relaent = 0; + let mut relasz = MaybeUninit::::uninit(); + let mut relaent = MaybeUninit::::uninit(); loop { match (*ptr_dyn).d_tag { 0 => { break; } DT_RELA => { ptr_rela = addr_image_base + (*ptr_dyn).d_val_or_ptr; }, - DT_RELASZ => { relasz = (*ptr_dyn).d_val_or_ptr; }, - DT_RELAENT => { relaent = (*ptr_dyn).d_val_or_ptr; }, + DT_RELASZ => { relasz.write((*ptr_dyn).d_val_or_ptr); }, + DT_RELAENT => { relaent.write((*ptr_dyn).d_val_or_ptr); }, _ => () } ptr_dyn = ptr_dyn.add(1); @@ -99,9 +101,9 @@ pub unsafe extern "sysv64" fn relocate( if ptr_rela == 0 { return; } - relasz += ptr_rela; + relasz.write(relasz.assume_init() + ptr_rela); - while ptr_rela < relasz { + while ptr_rela < relasz.assume_init() { let pst_rela = ptr_rela as *mut Elf64Rela; let ul_offset = (*pst_rela).r_offset; let ul_info = (*pst_rela).r_info; @@ -116,6 +118,6 @@ pub unsafe extern "sysv64" fn relocate( /* not implemented */ panic!(); } - ptr_rela += relaent; + ptr_rela += relaent.assume_init(); } } \ No newline at end of file From dd8c4f39649cd6a36e46abd9af1088d02253b57d Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 21:28:42 +0900 Subject: [PATCH 33/52] short: prevent potential aberrant shortening --- scripts/static-pie-gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index 61a2f886..1508a146 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -121,7 +121,7 @@ # template template_candidates = [template_path] -if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path: +if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path and len(memory_bin) <= 4096: template_candidates.append(template_path.replace("short", "shorter")) out = None From 811c82113fbd9fc86240e6939cba512a4d75fff7 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 21:30:29 +0900 Subject: [PATCH 34/52] short add Visual Studio Code task for short --- .vscode/tasks.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 7e68140f..61a5980d 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -35,6 +35,14 @@ "$rustc" ], }, + { + "label": "build-release-amd64-rs-short-submit", + "type": "shell", + "command": "mkdir -p ./target/x86_64-unknown-linux-gnu-short/release; ./release-rs.sh --features short > ./target/x86_64-unknown-linux-gnu-short/release/loader.rs; code --reuse-window ./target/x86_64-unknown-linux-gnu-short/release/loader.rs", + "problemMatcher": [ + "$rustc" + ], + }, { "label": "build-release-i686-submit", "type": "shell", From 21dbb4d6124d25c7ed8e91937e7000930875cf6c Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 22:16:47 +0900 Subject: [PATCH 35/52] short: update mod.rs --- src/platform/mod.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/platform/mod.rs b/src/platform/mod.rs index 5f2e0f30..46a3b091 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -45,9 +45,14 @@ pub fn init(platform_data_by_loader: usize) { } #[cfg(not(test))] pub fn try_exit() { - let pd = services::platform_data(); - if pd.env_id == services::ENV_ID_LINUX { - #[cfg(not(target_arch = "wasm32"))] + #[cfg(not(all(target_arch = "x86_64", feature = "short")))] { + let pd = services::platform_data(); + if pd.env_id == services::ENV_ID_LINUX { + #[cfg(not(target_arch = "wasm32"))] + unsafe { os::linux::syscall::exit_group(services::get_exit_status() as usize); } + } + } + #[cfg(all(target_arch = "x86_64", feature = "short"))] { unsafe { os::linux::syscall::exit_group(services::get_exit_status() as usize); } } } From 38b70459e28c9e1518870959ce2f28cc89703f4a Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 22:22:17 +0900 Subject: [PATCH 36/52] short: synthesize PLATFORM_DATA in ELF (not stub) --- scripts/static-pie-gen.py | 4 +++- scripts/static-pie-prestub-amd64-shorter.asm | 20 ++----------------- scripts/static-pie-prestub-amd64-shorter.bin | Bin 96 -> 82 bytes scripts/static-pie-template-amd64-shorter.rs | 2 +- src/bin/codegen.rs | 8 +++++--- 5 files changed, 11 insertions(+), 23 deletions(-) diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index 1508a146..c0c43d63 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -68,7 +68,9 @@ sol = "".join(sol) # binary (raw) -code_raw_b91 = base91.encode(memory_bin, use_rle=True).decode('ascii') +code_raw = memory_bin[:-8] +code_raw += (len(code_raw) + 8 - loader_fdict['entrypoint_offset']).to_bytes(8, byteorder='little') +code_raw_b91 = base91.encode(code_raw, use_rle=True).decode('ascii') code_raw_b91_len = len(code_raw_b91) code_raw_b91 = '"' + code_raw_b91 + '"' diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 9886b386..ea1c3201 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -11,11 +11,6 @@ BITS 64 ORG 0 section .text -; Align stack to 16 byte boundary -; [rsp+ 32, rsp+120): PLATFORM_DATA -; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention) - enter 56, 0 - ; svc_alloc_rwx for Linux _svc_alloc_rwx: push 9 @@ -34,15 +29,7 @@ _svc_alloc_rwx: syscall pop rsi ; restore rsi -; PLATFORM_DATA -_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx - push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress - push rax ; PLATFORM_DATA[16..23] = win_kernel32 - push 1 ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK) - push 2 ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux) - ; Current state: rax = new buffer - push rax xchg rax, rdi ; rdi = new buffer ; Base91 decoder @@ -78,8 +65,5 @@ _decode_zeros: ; Jump to entrypoint _jump_to_entrypoint: - pop rax - add rax, qword [rdi-8] - push rsp - pop rcx - call rax \ No newline at end of file + sub rdi, qword [rdi-8] + jmp rdi \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 7b5d9308a9d45d92a77b62a9348235a679031c83..6ad336168204b5dc0e09ec66e023aeb43875b187 100644 GIT binary patch delta 39 vcmYcYlFH(YnCWVGGR*LQ7GvBd_ADjGsI32v5&W!i6IG4aJhbb7{C^Gr5Y-OR delta 30 lcmWGaU^!vIz%W6IJt!cHF^ef+qLdM5ga@33UJf diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index 70933ec1..ddfe621b 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 9958096a000038c8h,16aff3156c93145h,6a5a41226a07b25eh,50525e050f5841ffh,0b0974850026a016ah,99232cac0de0c11fh,0ac92c8fe16742572h,0aad0015bc06b242ch,0f77510c4f608e8c1h,48ff4fb60f92dfebh,58d5eb92aaf3cfffh,0d0ff5954f8470348h",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 56c931459958096ah,6a07b25e016aff31h,0f5841ff6a5a4122h,0e0c11fb097485e05h,74257299232cac0dh,6b242cac92c8fe16h,8e8c1aad0015bc0h,92dfebf77510c4f6h,0f3cfff48ff4fb60fh,0f87f2b48d5eb92aah,59391",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file diff --git a/src/bin/codegen.rs b/src/bin/codegen.rs index c18f0bc6..a3db1249 100644 --- a/src/bin/codegen.rs +++ b/src/bin/codegen.rs @@ -86,12 +86,14 @@ unsafe extern "win64" fn _start() -> ! { #[cfg(feature = "short")] asm!( "clc", // Not needed but packager wants it - "push rcx", // Align stack - "mov rbx, rcx", // Save PLATFORM_DATA table + "sub rsp, 80", // 16 + 80 = 96 = 16*6 -> stack alignment preserved + "push 1", // env_flags = 1 (ENV_FLAGS_LINUX_STYLE_CHKSTK) + "push 2", // env_id = 2 (ENV_ID_LINUX) "lea rdi, [rip + __ehdr_start]", "lea rsi, [rip + _DYNAMIC]", "call {0}", - "mov rdi, rbx", + "push rsp", + "pop rcx", "call {1}", // This won't return since on Linux we invoke SYS_exitgroup in binary sym loader::amd64_elf::relocate, sym _start_rust, From d68c1dba7442182a780620c7b332184310b91637 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 22:26:06 +0900 Subject: [PATCH 37/52] short: shorten the shorter prestub --- scripts/static-pie-prestub-amd64-shorter.asm | 4 +--- scripts/static-pie-prestub-amd64-shorter.bin | 2 +- scripts/static-pie-template-amd64-shorter.rs | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index ea1c3201..39878f3f 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -56,11 +56,9 @@ _decode_output: jnz _decode_output jmp _decode_loop _decode_zeros: - xchg eax, edx movzx ecx, byte [rdi-1] dec rdi - rep stosb - xchg eax, edx + rep stosb ; the fact we jumped to here ensures al=0 jmp _decode_loop_2 ; Jump to entrypoint diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 6ad33616..d133d946 100644 --- a/scripts/static-pie-prestub-amd64-shorter.bin +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -1 +1 @@ -j X™E1ÉV1ÿj^²j"AZjÿAX^H—°Áà ¬,#™r%tþÈ’¬,$kÀ[ЪÁèöÄu÷ëß’¶OÿHÿÏóª’ëÕH+øÿç \ No newline at end of file +j X™E1ÉV1ÿj^²j"AZjÿAX^H—°Áà ¬,#™r#tþÈ’¬,$kÀ[ЪÁèöÄu÷ë߶OÿHÿÏóªë×H+øÿç \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index ddfe621b..4c6a8470 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 56c931459958096ah,6a07b25e016aff31h,0f5841ff6a5a4122h,0e0c11fb097485e05h,74257299232cac0dh,6b242cac92c8fe16h,8e8c1aad0015bc0h,92dfebf77510c4f6h,0f3cfff48ff4fb60fh,0f87f2b48d5eb92aah,59391",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 56c931459958096ah,6a07b25e016aff31h,0f5841ff6a5a4122h,0e0c11fb097485e05h,74237299232cac0dh,6b242cac92c8fe16h,8e8c1aad0015bc0h,0fdfebf77510c4f6h,0aaf3cfff48ff4fb6h,0e7fff87f2b48d7ebh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file From 4bb0da78d61d8dcbdcfb57b8d2d03dd46f46324b Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 22:28:16 +0900 Subject: [PATCH 38/52] Update static-pie-prestub-amd64-shorter.asm Fix comment --- scripts/static-pie-prestub-amd64-shorter.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 39878f3f..895f6edc 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -34,7 +34,7 @@ _svc_alloc_rwx: ; Base91 decoder _decode: - mov al, 0x1f ; syscall preserves rax; hence at this point rax=9 + mov al, 0x1f ; syscall preserves all registers except rcx, r11, rax; hence at this point rax=(previous rdi)=0 _decode_loop: shl eax, 13 _decode_loop_2: From 6dcd596a58bf6776639df58cf5ed0cee84b8841b Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 22:48:19 +0900 Subject: [PATCH 39/52] short: shorten the shorter prestub --- scripts/static-pie-prestub-amd64-shorter.asm | 2 +- scripts/static-pie-prestub-amd64-shorter.bin | 2 +- scripts/static-pie-template-amd64-shorter.rs | 2 +- src/bin/codegen.rs | 8 +++----- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 895f6edc..37c1e807 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -64,4 +64,4 @@ _decode_zeros: ; Jump to entrypoint _jump_to_entrypoint: sub rdi, qword [rdi-8] - jmp rdi \ No newline at end of file + call rdi \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index d133d946..bf1c3b48 100644 --- a/scripts/static-pie-prestub-amd64-shorter.bin +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -1 +1 @@ -j X™E1ÉV1ÿj^²j"AZjÿAX^H—°Áà ¬,#™r#tþÈ’¬,$kÀ[ЪÁèöÄu÷ë߶OÿHÿÏóªë×H+øÿç \ No newline at end of file +j X™E1ÉV1ÿj^²j"AZjÿAX^H—°Áà ¬,#™r#tþÈ’¬,$kÀ[ЪÁèöÄu÷ë߶OÿHÿÏóªë×H+øÿ× \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index 4c6a8470..87d7034a 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 56c931459958096ah,6a07b25e016aff31h,0f5841ff6a5a4122h,0e0c11fb097485e05h,74237299232cac0dh,6b242cac92c8fe16h,8e8c1aad0015bc0h,0fdfebf77510c4f6h,0aaf3cfff48ff4fb6h,0e7fff87f2b48d7ebh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 56c931459958096ah,6a07b25e016aff31h,0f5841ff6a5a4122h,0e0c11fb097485e05h,74237299232cac0dh,6b242cac92c8fe16h,8e8c1aad0015bc0h,0fdfebf77510c4f6h,0aaf3cfff48ff4fb6h,0d7fff87f2b48d7ebh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file diff --git a/src/bin/codegen.rs b/src/bin/codegen.rs index a3db1249..9289009e 100644 --- a/src/bin/codegen.rs +++ b/src/bin/codegen.rs @@ -83,17 +83,15 @@ unsafe extern "win64" fn _start() -> ! { ); // For "short", we always assume we are running with loader on Linux, // since "short" is only meaningful when submitting to online judges (not local test runs). + // Note that the stub will ensure that stack is aligned before caling _start. + // Also, for "short" on x86_64 Linux, we don't need PLATFORM_DATA, so we don't fabricate it. #[cfg(feature = "short")] asm!( "clc", // Not needed but packager wants it - "sub rsp, 80", // 16 + 80 = 96 = 16*6 -> stack alignment preserved - "push 1", // env_flags = 1 (ENV_FLAGS_LINUX_STYLE_CHKSTK) - "push 2", // env_id = 2 (ENV_ID_LINUX) + "push rax", // Align stack "lea rdi, [rip + __ehdr_start]", "lea rsi, [rip + _DYNAMIC]", "call {0}", - "push rsp", - "pop rcx", "call {1}", // This won't return since on Linux we invoke SYS_exitgroup in binary sym loader::amd64_elf::relocate, sym _start_rust, From 55b515ec60ce2636ce9329006ab6593492ec477c Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 22:48:24 +0900 Subject: [PATCH 40/52] Update static-pie-gen.py --- scripts/static-pie-gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index c0c43d63..0345ef7a 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -123,7 +123,7 @@ # template template_candidates = [template_path] -if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path and len(memory_bin) <= 4096: +if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path and len(code_raw) <= 4096: template_candidates.append(template_path.replace("short", "shorter")) out = None From 248313db58c45055acd6422b5407528d56ac1626 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Thu, 30 Nov 2023 23:00:41 +0900 Subject: [PATCH 41/52] short: shorten the shorter prestub --- scripts/static-pie-prestub-amd64-shorter.asm | 2 +- scripts/static-pie-prestub-amd64-shorter.bin | 2 +- scripts/static-pie-template-amd64-shorter.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 37c1e807..5aba00cd 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -56,8 +56,8 @@ _decode_output: jnz _decode_output jmp _decode_loop _decode_zeros: - movzx ecx, byte [rdi-1] dec rdi + movzx ecx, byte [rdi] rep stosb ; the fact we jumped to here ensures al=0 jmp _decode_loop_2 diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index bf1c3b48..87ba8fff 100644 --- a/scripts/static-pie-prestub-amd64-shorter.bin +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -1 +1 @@ -j X™E1ÉV1ÿj^²j"AZjÿAX^H—°Áà ¬,#™r#tþÈ’¬,$kÀ[ЪÁèöÄu÷ë߶OÿHÿÏóªë×H+øÿ× \ No newline at end of file +j X™E1ÉV1ÿj^²j"AZjÿAX^H—°Áà ¬,#™r"tþÈ’¬,$kÀ[ЪÁèöÄu÷ëßHÿ϶óªëØH+øÿ× \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index 87d7034a..616b5418 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 56c931459958096ah,6a07b25e016aff31h,0f5841ff6a5a4122h,0e0c11fb097485e05h,74237299232cac0dh,6b242cac92c8fe16h,8e8c1aad0015bc0h,0fdfebf77510c4f6h,0aaf3cfff48ff4fb6h,0d7fff87f2b48d7ebh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 6a07b25e016aff3156c931459958096ah,0e0c11fb097485e050f5841ff6a5a4122h,6b242cac92c8fe1674227299232cac0dh,48dfebf77510c4f608e8c1aad0015bc0h,0d7fff87f2b48d8ebaaf30fb60fcfffh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file From 5e9b73a07ff376be1efbe43d7c486db6d9871743 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 1 Dec 2023 00:12:53 +0900 Subject: [PATCH 42/52] Update VS Code task + Change python -> python3 --- .vscode/tasks.json | 8 ++++++++ release-html.sh | 2 +- release-wasm32.sh | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 61a5980d..3254b66b 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -27,6 +27,14 @@ "$rustc" ], }, + { + "label": "build-release-amd64-short-submit", + "type": "shell", + "command": "mkdir -p ./target/x86_64-unknown-linux-gnu-short/release; ./release.sh --features short > ./target/x86_64-unknown-linux-gnu-short/release/loader.c; code --reuse-window ./target/x86_64-unknown-linux-gnu-short/release/loader.c", + "problemMatcher": [ + "$rustc" + ], + }, { "label": "build-release-amd64-rs-submit", "type": "shell", diff --git a/release-html.sh b/release-html.sh index 1085cc66..99da4a7e 100644 --- a/release-html.sh +++ b/release-html.sh @@ -1,3 +1,3 @@ >&2 echo "Building project for target wasm32-unknown-unknown, language JavaScript, build mode Release" cargo +nightly build --target wasm32-unknown-unknown --bin=basm-submit --release "$@" -python scripts/wasm-gen.py scripts/wasm-template.html \ No newline at end of file +python3 scripts/wasm-gen.py scripts/wasm-template.html \ No newline at end of file diff --git a/release-wasm32.sh b/release-wasm32.sh index 1262d3d0..e3263196 100755 --- a/release-wasm32.sh +++ b/release-wasm32.sh @@ -1,3 +1,3 @@ >&2 echo "Building project for target wasm32-unknown-unknown, language JavaScript, build mode Release" cargo +nightly build --target wasm32-unknown-unknown --bin=basm-submit --release "$@" -python scripts/wasm-gen.py scripts/wasm-template.js \ No newline at end of file +python3 scripts/wasm-gen.py scripts/wasm-template.js \ No newline at end of file From 309a6807cf304546d2ad8b9b6556f9ab34fb0a83 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 1 Dec 2023 00:29:51 +0900 Subject: [PATCH 43/52] short: fix rsp alignment issue --- scripts/static-pie-prestub-amd64-short.asm | 2 +- scripts/static-pie-prestub-amd64-short.bin | Bin 93 -> 93 bytes scripts/static-pie-template-amd64-short.rs | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-short.asm b/scripts/static-pie-prestub-amd64-short.asm index f4982524..406e9b92 100644 --- a/scripts/static-pie-prestub-amd64-short.asm +++ b/scripts/static-pie-prestub-amd64-short.asm @@ -14,7 +14,7 @@ section .text ; Align stack to 16 byte boundary ; [rsp+ 32, rsp+120): PLATFORM_DATA ; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention) - enter 48, 0 + enter 56, 0 push 1 pop rcx ; Enable ENV_FLAGS_LINUX_STYLE_CHKSTK outside Windows call _t diff --git a/scripts/static-pie-prestub-amd64-short.bin b/scripts/static-pie-prestub-amd64-short.bin index 31b5787fd7a1f612e610b3e8e2abc95d4e0a8a14..0c17d9b89c68f15469b07aabe37e216355eb8b6f 100644 GIT binary patch delta 8 Pcma!zWjbLoktrGg3}FJR delta 8 Pcma!zWjbLnktrGg3_${; diff --git a/scripts/static-pie-template-amd64-short.rs b/scripts/static-pie-template-amd64-short.rs index e64a9046..44083009 100644 --- a/scripts/static-pie-template-amd64-short.rs +++ b/scripts/static-pie-template-amd64-short.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;static mut P:[u8;$$$$binary_base91_len$$$$]=*br$$$$binary_base91$$$$;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 0e859016a000030c8h,6758096a0000003ch,3156c931459917e3h,41226a07b2ce89ffh,5e050f5841ff6a5ah,2cac0de0c11fb0c3h,242cac9299f57224h,0e8c1aad0015bc06bh,0e3ebf77510c4f608h,51c1ff515052535bh,4850d3ff28ec8348h,0e3ffc93197h",in("r14")P.as_mut_ptr(),in("rsi")r$$$$stub_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;static mut P:[u8;$$$$binary_base91_len$$$$]=*br$$$$binary_base91$$$$;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 0e859016a000038c8h,6758096a0000003ch,3156c931459917e3h,41226a07b2ce89ffh,5e050f5841ff6a5ah,2cac0de0c11fb0c3h,242cac9299f57224h,0e8c1aad0015bc06bh,0e3ebf77510c4f608h,51c1ff515052535bh,4850d3ff28ec8348h,0e3ffc93197h",in("r14")P.as_mut_ptr(),in("rsi")r$$$$stub_base91$$$$.as_ptr())} \ No newline at end of file From d672279736b507aaf514ced68db3252df298566e Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 1 Dec 2023 10:06:15 +0900 Subject: [PATCH 44/52] short: shorten the shorter prestub --- scripts/base91.py | 4 ++-- scripts/static-pie-prestub-amd64-print.py | 11 ++++++++--- scripts/static-pie-prestub-amd64-shorter.asm | 4 ++-- scripts/static-pie-prestub-amd64-shorter.bin | 2 +- scripts/static-pie-template-amd64-shorter.rs | 2 +- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/scripts/base91.py b/scripts/base91.py index c3ec28d2..7f2e17fa 100644 --- a/scripts/base91.py +++ b/scripts/base91.py @@ -13,11 +13,11 @@ def encode(x_in, use_rle=False): i += 1 if len(x) > 0 and x[-1] == 0: zeros_cnt = 1 - while i - 1 + zeros_cnt < len(x_in) and zeros_cnt < 255 and x_in[i - 1 + zeros_cnt] == 0: + while i - 1 + zeros_cnt < len(x_in) and zeros_cnt < 256 and x_in[i - 1 + zeros_cnt] == 0: zeros_cnt += 1 if zeros_cnt >= 2: x.pop() - x.append(zeros_cnt) + x.append(zeros_cnt - 1) sharp_insertion_points.append((current_bits // 13 * 2) + len(sharp_insertion_points)) i += zeros_cnt - 1 sharp_insertion_points = list(reversed(sharp_insertion_points)) diff --git a/scripts/static-pie-prestub-amd64-print.py b/scripts/static-pie-prestub-amd64-print.py index e8337d0d..9e39f4d9 100644 --- a/scripts/static-pie-prestub-amd64-print.py +++ b/scripts/static-pie-prestub-amd64-print.py @@ -22,9 +22,14 @@ prestub = prestub[:j] # settings -SPECIFIER = ".quad" -CHUNK_SIZE = 8 -ENTRIES_PER_LINE = 4 +if "--octa" in sys.argv: + SPECIFIER = ".octa" + CHUNK_SIZE = 16 + ENTRIES_PER_LINE = 10 +else: + SPECIFIER = ".quad" + CHUNK_SIZE = 8 + ENTRIES_PER_LINE = 4 # pad to align at `CHUNK_SIZE`-byte boundary while len(prestub) % CHUNK_SIZE != 0: diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 5aba00cd..b2ecfca7 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -31,6 +31,7 @@ _svc_alloc_rwx: ; Current state: rax = new buffer xchg rax, rdi ; rdi = new buffer + xor ecx, ecx ; ecx = 0 ; Base91 decoder _decode: @@ -56,8 +57,7 @@ _decode_output: jnz _decode_output jmp _decode_loop _decode_zeros: - dec rdi - movzx ecx, byte [rdi] + xchg byte [rdi-1], cl ; ecx = cl = ((number of zeros) - 1), byte [rdi-1] = 0 rep stosb ; the fact we jumped to here ensures al=0 jmp _decode_loop_2 diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 87ba8fff..1f30a634 100644 --- a/scripts/static-pie-prestub-amd64-shorter.bin +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -1 +1 @@ -j X™E1ÉV1ÿj^²j"AZjÿAX^H—°Áà ¬,#™r"tþÈ’¬,$kÀ[ЪÁèöÄu÷ëßHÿ϶óªëØH+øÿ× \ No newline at end of file +j X™E1ÉV1ÿj^²j"AZjÿAX^H—1É°Áà ¬,#™rtþÈ’¬,$kÀ[ЪÁèöÄu÷ë߆OÿóªëÛH+øÿ× \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index 616b5418..07b7aed9 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 6a07b25e016aff3156c931459958096ah,0e0c11fb097485e050f5841ff6a5a4122h,6b242cac92c8fe1674227299232cac0dh,48dfebf77510c4f608e8c1aad0015bc0h,0d7fff87f2b48d8ebaaf30fb60fcfffh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 6a07b25e016aff3156c931459958096ah,1fb0c93197485e050f5841ff6a5a4122h,2cac92c8fe16741f7299232cac0de0c1h,0ebf77510c4f608e8c1aad0015bc06b24h,0d7fff87f2b48dbebaaf3ff4f86dfh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file From f571825d97b3797a5efa56a60755d0ef540f5a3f Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 1 Dec 2023 10:10:35 +0900 Subject: [PATCH 45/52] short: shorten the shorter prestub --- scripts/static-pie-gen.py | 2 +- scripts/static-pie-prestub-amd64-shorter.asm | 5 ++--- scripts/static-pie-prestub-amd64-shorter.bin | 2 +- scripts/static-pie-template-amd64-shorter.rs | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index 0345ef7a..07e8dbc6 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -123,7 +123,7 @@ # template template_candidates = [template_path] -if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path and len(code_raw) <= 4096: +if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path and len(code_raw) <= 4096 - 256: template_candidates.append(template_path.replace("short", "shorter")) out = None diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index b2ecfca7..d338463e 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -42,8 +42,7 @@ _decode_loop_2: lodsb sub al, 0x23 cdq - jc _jump_to_entrypoint - jz _decode_zeros + jbe _decode_zeros dec al xchg eax, edx lodsb @@ -59,7 +58,7 @@ _decode_output: _decode_zeros: xchg byte [rdi-1], cl ; ecx = cl = ((number of zeros) - 1), byte [rdi-1] = 0 rep stosb ; the fact we jumped to here ensures al=0 - jmp _decode_loop_2 + jz _decode_loop_2 ; Jump to entrypoint _jump_to_entrypoint: diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 1f30a634..408a944b 100644 --- a/scripts/static-pie-prestub-amd64-shorter.bin +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -1 +1 @@ -j X™E1ÉV1ÿj^²j"AZjÿAX^H—1É°Áà ¬,#™rtþÈ’¬,$kÀ[ЪÁèöÄu÷ë߆OÿóªëÛH+øÿ× \ No newline at end of file +j X™E1ÉV1ÿj^²j"AZjÿAX^H—1É°Áà ¬,#™vþÈ’¬,$kÀ[ЪÁèöÄu÷ëá†OÿóªtÝH+øÿ× \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index 07b7aed9..53066157 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 6a07b25e016aff3156c931459958096ah,1fb0c93197485e050f5841ff6a5a4122h,2cac92c8fe16741f7299232cac0de0c1h,0ebf77510c4f608e8c1aad0015bc06b24h,0d7fff87f2b48dbebaaf3ff4f86dfh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 6a07b25e016aff3156c931459958096ah,1fb0c93197485e050f5841ff6a5a4122h,6b242cac92c8fe167699232cac0de0c1h,86e1ebf77510c4f608e8c1aad0015bc0h,0d7fff87f2b48dd74aaf3ff4fh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file From 78007327dd0711c972a2d63283265af07428f261 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 1 Dec 2023 13:47:01 +0900 Subject: [PATCH 46/52] short: shorten the shorter prestub --- scripts/static-pie-gen.py | 1 + scripts/static-pie-prestub-amd64-shorter.asm | 12 +++++------- scripts/static-pie-prestub-amd64-shorter.bin | 2 +- scripts/static-pie-template-amd64-shorter.rs | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index 07e8dbc6..a8f7488b 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -68,6 +68,7 @@ sol = "".join(sol) # binary (raw) +# Since we append a little-endian 8-byte nonnegative integer, we can practically ensure that the last byte is zero. code_raw = memory_bin[:-8] code_raw += (len(code_raw) + 8 - loader_fdict['entrypoint_offset']).to_bytes(8, byteorder='little') code_raw_b91 = base91.encode(code_raw, use_rle=True).decode('ascii') diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index d338463e..1316865d 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -19,8 +19,7 @@ _svc_alloc_rwx: xor r9d, r9d ; offset push rsi ; save rsi xor edi, edi ; rdi=0 - push 1 - pop rsi ; size + mov esi, eax ; size (anything in [1, 4096]) mov dl, 7 ; protect (safe since we have ensured rdx=0) push 0x22 pop r10 ; flags @@ -31,7 +30,6 @@ _svc_alloc_rwx: ; Current state: rax = new buffer xchg rax, rdi ; rdi = new buffer - xor ecx, ecx ; ecx = 0 ; Base91 decoder _decode: @@ -40,15 +38,15 @@ _decode_loop: shl eax, 13 _decode_loop_2: lodsb + xor ecx, ecx ; ecx = 0 sub al, 0x23 - cdq jbe _decode_zeros dec al - xchg eax, edx + xchg eax, ecx lodsb sub al, 0x24 imul eax, eax, 91 - add eax, edx + add eax, ecx _decode_output: stosb shr eax, 8 @@ -57,7 +55,7 @@ _decode_output: jmp _decode_loop _decode_zeros: xchg byte [rdi-1], cl ; ecx = cl = ((number of zeros) - 1), byte [rdi-1] = 0 - rep stosb ; the fact we jumped to here ensures al=0 + rep stosb ; we have made sure the last byte is zero (in the packager) jz _decode_loop_2 ; Jump to entrypoint diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 408a944b..84271c4b 100644 --- a/scripts/static-pie-prestub-amd64-shorter.bin +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -1 +1 @@ -j X™E1ÉV1ÿj^²j"AZjÿAX^H—1É°Áà ¬,#™vþÈ’¬,$kÀ[ЪÁèöÄu÷ëá†OÿóªtÝH+øÿ× \ No newline at end of file +j X™E1ÉV1ÿ‰Æ²j"AZjÿAX^H—°Áà ¬1É,#vþÈ‘¬,$kÀ[ȪÁèöÄu÷ëà†OÿóªtÜH+øÿ× \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index 53066157..cca645c3 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 6a07b25e016aff3156c931459958096ah,1fb0c93197485e050f5841ff6a5a4122h,6b242cac92c8fe167699232cac0de0c1h,86e1ebf77510c4f608e8c1aad0015bc0h,0d7fff87f2b48dd74aaf3ff4fh",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 226a07b2c689ff3156c931459958096ah,0de0c11fb097485e050f5841ff6a5a41h,5bc06b242cac91c8fe1676232cc931ach,0ff4f86e0ebf77510c4f608e8c1aac801h,0d7fff87f2b48dc74aaf3h",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file From 1d4095964ab91cb814af22fccc88b7b4d2d0602e Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 1 Dec 2023 17:37:48 +0900 Subject: [PATCH 47/52] amd64-elf: streamline DYNAMIC section We eliminate all entries except necessary relocation-related ones. --- scripts/static-pie-elf2bin.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/scripts/static-pie-elf2bin.py b/scripts/static-pie-elf2bin.py index 7cb885d3..e32d6b73 100644 --- a/scripts/static-pie-elf2bin.py +++ b/scripts/static-pie-elf2bin.py @@ -160,7 +160,28 @@ def load_elf64(elf): continue # since bytearray is zero-initialized dst_off, src_off, cnt = sh_dict['sh_addr'], sh_dict['sh_offset'], sh_dict['sh_size'] - memory_bin[dst_off:dst_off+cnt] = elf[src_off:src_off+cnt] + blob = elf[src_off:src_off+cnt] + + if sh_dict['sh_type'] == SHT_DYNAMIC: + # Trim the DYNAMIC section, leaving only relocation-related entries + # 16 == sizeof(Elf64_Dyn) + dst = 0 + for src in range(0, len(blob), 16): + # Included entries: + # DT_PLTRELSZ = 2, DT_RELA = 7, DT_RELASZ = 8, DT_RELAENT = 9, + # DT_REL = 17, DT_RELSZ = 18, DT_RELENT = 19, DT_PLTREL = 20, + # DT_TEXT_REL = 22, DT_JMPREL = 23. + # + # Note: DT_RELACOUNT = 0x6fff_fff9 and DT_RELCOUNT = 0x6fff_fffa + # are not included since they are redundant since + # DT_RELACOUNT = DT_RELASZ/DT_RELAENT and + # DT_RELCOUNT = DT_RELSZ/DT_RELENT. + if b2i(blob[src:src+8]) in [2, 7, 8, 9, 17, 18, 19, 20, 22, 23]: + blob[dst:dst+16] = blob[src:src+16] + dst += 16 + blob[dst:] = bytearray(len(blob[dst:])) # fill remaining part with zeros + + memory_bin[dst_off:dst_off+cnt] = blob entrypoint_offset = b2i(elf[24:32]) return memory_bin, pos_begin, entrypoint_offset From fdea9c5401f314679fe9b3d0529e384b1042e9d3 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Fri, 1 Dec 2023 23:13:52 +0900 Subject: [PATCH 48/52] short: introduce shorter template for C/C++ Enables 156KB/0ms with the shorter generation paradigm. --- scripts/static-pie-gen.py | 6 +- scripts/static-pie-prestub-amd64-print.py | 12 ++- .../static-pie-prestub-amd64-shorter-c.asm | 98 ++++++++++++++++++ .../static-pie-prestub-amd64-shorter-c.bin | Bin 0 -> 148 bytes scripts/static-pie-template-amd64-shorter.c | 10 ++ 5 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 scripts/static-pie-prestub-amd64-shorter-c.asm create mode 100644 scripts/static-pie-prestub-amd64-shorter-c.bin create mode 100644 scripts/static-pie-template-amd64-shorter.c diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index a8f7488b..c7404639 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -74,6 +74,10 @@ code_raw_b91 = base91.encode(code_raw, use_rle=True).decode('ascii') code_raw_b91_len = len(code_raw_b91) code_raw_b91 = '"' + code_raw_b91 + '"' +if lang_name == "C": + # Escape '\' and '?' + code_raw_b91 = code_raw_b91.replace('\\', '\\\\') + code_raw_b91 = code_raw_b91.replace('?', '\\?') # binary with open(compressed_binary_path, "rb") as f: @@ -124,7 +128,7 @@ # template template_candidates = [template_path] -if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path and len(code_raw) <= 4096 - 256: +if lang_name in ["C", "Rust"] and "x86_64" in target_name and "short" in template_path and len(code_raw) <= 4096 - 256: template_candidates.append(template_path.replace("short", "shorter")) out = None diff --git a/scripts/static-pie-prestub-amd64-print.py b/scripts/static-pie-prestub-amd64-print.py index 9e39f4d9..4fb77564 100644 --- a/scripts/static-pie-prestub-amd64-print.py +++ b/scripts/static-pie-prestub-amd64-print.py @@ -30,6 +30,14 @@ SPECIFIER = ".quad" CHUNK_SIZE = 8 ENTRIES_PER_LINE = 4 +if "--c" in sys.argv: + PREFIX = "0x" + SUFFIX = "" + SPECIFIER = "" + ENTRIES_PER_LINE = 100 +else: + PREFIX = "" + SUFFIX = "h" # pad to align at `CHUNK_SIZE`-byte boundary while len(prestub) % CHUNK_SIZE != 0: @@ -48,8 +56,8 @@ def to_hex_short(y): nonzero_idx = len(out) while nonzero_idx > 1 and out[nonzero_idx-1] == '0': nonzero_idx -= 1 - out2 = out[:nonzero_idx] + "h<<" + str((len(out) - nonzero_idx) * 4) - out = out + "h" + out2 = PREFIX + out[:nonzero_idx] + SUFFIX + "<<" + str((len(out) - nonzero_idx) * 4) + out = PREFIX + out + SUFFIX if len(out2) < len(out): out = out2 if ord(out[0]) >= ord('a'): diff --git a/scripts/static-pie-prestub-amd64-shorter-c.asm b/scripts/static-pie-prestub-amd64-shorter-c.asm new file mode 100644 index 00000000..f894356a --- /dev/null +++ b/scripts/static-pie-prestub-amd64-shorter-c.asm @@ -0,0 +1,98 @@ +; -*- tab-width: 4 -*- +; +; The prestub for amd64-rust target +; (prestub: the code that runs before the stub and sets the stage) +; +; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter-c.asm -o static-pie-prestub-amd64-shorter-c.bin +; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter-c.bin --c +; to obtain the form that can be embedded in Rust as inline assembly. + +BITS 64 +ORG 0 +section .text + +; Reserve space on stack + and rsp, 0xffffffffffffff80 ; ensures at least 128 bytes + +; mprotect: make stack executable + mov eax, 10 ; mprotect + mov esi, 0x1000 ; len + push rdi ; Save binary_raw_base91 + lea rdi, [rsp + 8] ; addr + push 7 ; protect (RWX) + pop rdx + and rdi, 0xfffffffffffff000 ; align to page boundary (4K) + syscall + +; Relocate to stack + lea rsi, [rel _start] + lea rdi, [rsp + 8] + push rdi ; _start of relocated stub + mov ecx, _end - _start + rep movsb + +; Jump to stack + pop rax ; _start of relocated stub + call rax + +_start: + +; Free the .text section + pop rdi ; Get RIP saved on stack by call instruction + and rdi, 0xfffffffffffff000 + mov esi, 0x1000 + mov eax, 11 + syscall + +; svc_alloc_rwx for Linux +_svc_alloc_rwx: + push 9 + pop rax ; syscall id of x64 mmap + cdq ; rdx=0 + xor r9d, r9d ; offset + xor edi, edi ; rdi=0 + mov esi, eax ; size (anything in [1, 4096]) + mov dl, 7 ; protect (safe since we have ensured rdx=0) + push 0x22 + pop r10 ; flags + push -1 + pop r8 ; fd + syscall + pop rsi ; restore rsi + +; Current state: rax = new buffer + xchg rax, rdi ; rdi = new buffer + +; Base91 decoder +_decode: + mov al, 0x1f ; syscall preserves all registers except rcx, r11, rax; hence at this point rax=(previous rdi)=0 +_decode_loop: + shl eax, 13 +_decode_loop_2: + lodsb + xor ecx, ecx ; ecx = 0 + sub al, 0x23 + jbe _decode_zeros + dec al + xchg eax, ecx + lodsb + sub al, 0x24 + imul eax, eax, 91 + add eax, ecx +_decode_output: + stosb + shr eax, 8 + test ah, 16 + jnz _decode_output + jmp _decode_loop +_decode_zeros: + xchg byte [rdi-1], cl ; ecx = cl = ((number of zeros) - 1), byte [rdi-1] = 0 + rep stosb ; we have made sure the last byte is zero (in the packager) + jz _decode_loop_2 + +; Jump to entrypoint +_jump_to_entrypoint: + sub rdi, qword [rdi-8] + call rdi + +_end: \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter-c.bin b/scripts/static-pie-prestub-amd64-shorter-c.bin new file mode 100644 index 0000000000000000000000000000000000000000..437271e31ea342fa3fcd13387174f74da2cb57d8 GIT binary patch literal 148 zcmeZhe$udmi-Cb*AA1ue=@PFsAP3&1pj!{|v9V7Tz<2 Date: Fri, 1 Dec 2023 23:29:53 +0900 Subject: [PATCH 49/52] Update static-pie-prestub-amd64-shorter-c.asm --- scripts/static-pie-prestub-amd64-shorter-c.asm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-shorter-c.asm b/scripts/static-pie-prestub-amd64-shorter-c.asm index f894356a..1c73b353 100644 --- a/scripts/static-pie-prestub-amd64-shorter-c.asm +++ b/scripts/static-pie-prestub-amd64-shorter-c.asm @@ -1,11 +1,11 @@ ; -*- tab-width: 4 -*- ; -; The prestub for amd64-rust target +; The prestub for amd64-C target ; (prestub: the code that runs before the stub and sets the stage) ; ; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter-c.asm -o static-pie-prestub-amd64-shorter-c.bin ; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter-c.bin --c -; to obtain the form that can be embedded in Rust as inline assembly. +; to obtain the form that can be embedded in C. BITS 64 ORG 0 @@ -95,4 +95,4 @@ _jump_to_entrypoint: sub rdi, qword [rdi-8] call rdi -_end: \ No newline at end of file +_end: From 97fffe16687786432553c0927a83d8855b67958f Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Sat, 2 Dec 2023 21:55:15 +0900 Subject: [PATCH 50/52] CI: test short --- .github/workflows/build-linux.yml | 21 ++++++++++++++++++++- .github/workflows/release-rs-short.sh | 1 + .github/workflows/release-short.sh | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100755 .github/workflows/release-rs-short.sh create mode 100755 .github/workflows/release-short.sh diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 8790f156..66dd32a9 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -28,7 +28,7 @@ jobs: with: toolchain: nightly target: ${{ matrix.target }} - profile: minimal + profile: default override: true components: clippy - name: Install node.js @@ -39,6 +39,7 @@ jobs: run: | sudo apt update sudo apt install gcc-multilib nasm + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu - name: Clippy run: cargo clippy env: @@ -57,6 +58,15 @@ jobs: python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out + - name: Check C (x86_64) - short + if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} + run: | + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out - name: Check C (x86) if: ${{ matrix.target == 'i686-unknown-linux-gnu' }} run: | @@ -75,6 +85,15 @@ jobs: python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out + - name: Check Rust (x86_64) - short + if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} + run: | + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out - name: Check wasm32 if: ${{ matrix.target == 'wasm32-unknown-unknown' }} run: | diff --git a/.github/workflows/release-rs-short.sh b/.github/workflows/release-rs-short.sh new file mode 100755 index 00000000..089ed938 --- /dev/null +++ b/.github/workflows/release-rs-short.sh @@ -0,0 +1 @@ +./release-rs.sh --features short \ No newline at end of file diff --git a/.github/workflows/release-short.sh b/.github/workflows/release-short.sh new file mode 100755 index 00000000..7855cb5d --- /dev/null +++ b/.github/workflows/release-short.sh @@ -0,0 +1 @@ +./release.sh --features short \ No newline at end of file From 041ec04ee22fa0d00fe7e50b5f96043dd4c0f6cf Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Sat, 2 Dec 2023 22:06:11 +0900 Subject: [PATCH 51/52] CI: add Baekjoon Online Judge 2587 --- .github/workflows/build-linux.yml | 6 ++++++ .github/workflows/build-windows.yml | 3 +++ tests/boj_2587.in | 5 +++++ tests/boj_2587.out | 2 ++ tests/boj_2587.rs | 20 ++++++++++++++++++++ 5 files changed, 36 insertions(+) create mode 100644 tests/boj_2587.in create mode 100644 tests/boj_2587.out create mode 100644 tests/boj_2587.rs diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 66dd32a9..609f904a 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -54,6 +54,7 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out @@ -63,6 +64,7 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out @@ -72,6 +74,7 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out @@ -81,6 +84,7 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out @@ -90,6 +94,7 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out @@ -99,6 +104,7 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 98e03af8..0b08b3bf 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -55,6 +55,7 @@ jobs: run: | python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\src\solution.rs .\tests\boj_1000.in .\tests\boj_1000.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_1001.rs .\tests\boj_1001.in .\tests\boj_1001.out + python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_2587.rs .\tests\boj_2587.in .\tests\boj_2587.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_2751.rs .\tests\boj_2751.in.zip .\tests\boj_2751.out.zip python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_3745.rs .\tests\boj_3745.in .\tests\boj_3745.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_14939.rs .\tests\boj_14939.in .\tests\boj_14939.out @@ -64,6 +65,7 @@ jobs: run: | python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\src\solution.rs .\tests\boj_1000.in .\tests\boj_1000.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_1001.rs .\tests\boj_1001.in .\tests\boj_1001.out + python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_2587.rs .\tests\boj_2587.in .\tests\boj_2587.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_2751.rs .\tests\boj_2751.in.zip .\tests\boj_2751.out.zip python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_3745.rs .\tests\boj_3745.in .\tests\boj_3745.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_14939.rs .\tests\boj_14939.in .\tests\boj_14939.out @@ -73,6 +75,7 @@ jobs: run: | python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\src\solution.rs .\tests\boj_1000.in .\tests\boj_1000.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_1001.rs .\tests\boj_1001.in .\tests\boj_1001.out + python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_2587.rs .\tests\boj_2587.in .\tests\boj_2587.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_2751.rs .\tests\boj_2751.in.zip .\tests\boj_2751.out.zip python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_3745.rs .\tests\boj_3745.in .\tests\boj_3745.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_14939.rs .\tests\boj_14939.in .\tests\boj_14939.out diff --git a/tests/boj_2587.in b/tests/boj_2587.in new file mode 100644 index 00000000..3fcb0955 --- /dev/null +++ b/tests/boj_2587.in @@ -0,0 +1,5 @@ +10 +40 +30 +60 +30 \ No newline at end of file diff --git a/tests/boj_2587.out b/tests/boj_2587.out new file mode 100644 index 00000000..f9e5d27b --- /dev/null +++ b/tests/boj_2587.out @@ -0,0 +1,2 @@ +34 +30 \ No newline at end of file diff --git a/tests/boj_2587.rs b/tests/boj_2587.rs new file mode 100644 index 00000000..02320531 --- /dev/null +++ b/tests/boj_2587.rs @@ -0,0 +1,20 @@ +use basm::platform::io::{Reader, Writer, Print}; +pub fn main() { + let mut reader = Reader::<128>::new(); + let mut writer = Writer::<128>::new(); + let mut x = [0; 5]; + let mut sum = 0; + for i in 0..5 { + x[i] = reader.u32(); + sum += x[i]; + } + for i in 0..5 { + for j in i+1..5 { + if x[i] > x[j] { + x.swap(i, j); + } + } + } + writer.println(sum / 5); + writer.println(x[2]); +} \ No newline at end of file From 8bbf4690461ee6f33b4fad7c3ff7467da4f178d9 Mon Sep 17 00:00:00 2001 From: Byeongkeun Ahn <7p54ks3@naver.com> Date: Sat, 2 Dec 2023 22:12:29 +0900 Subject: [PATCH 52/52] short: amd64-rust-shorter: fix rsp alignment --- scripts/static-pie-prestub-amd64-shorter.asm | 3 ++- scripts/static-pie-prestub-amd64-shorter.bin | 2 +- scripts/static-pie-template-amd64-shorter.rs | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm index 1316865d..7f698051 100644 --- a/scripts/static-pie-prestub-amd64-shorter.asm +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -4,7 +4,7 @@ ; (prestub: the code that runs before the stub and sets the stage) ; ; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter.asm -o static-pie-prestub-amd64-shorter.bin -; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter.bin +; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter.bin --octa ; to obtain the form that can be embedded in Rust as inline assembly. BITS 64 @@ -61,4 +61,5 @@ _decode_zeros: ; Jump to entrypoint _jump_to_entrypoint: sub rdi, qword [rdi-8] + and rsp, 0xfffffffffffffff0 call rdi \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin index 84271c4b..bd4d3117 100644 --- a/scripts/static-pie-prestub-amd64-shorter.bin +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -1 +1 @@ -j X™E1ÉV1ÿ‰Æ²j"AZjÿAX^H—°Áà ¬1É,#vþÈ‘¬,$kÀ[ȪÁèöÄu÷ëà†OÿóªtÜH+øÿ× \ No newline at end of file +j X™E1ÉV1ÿ‰Æ²j"AZjÿAX^H—°Áà ¬1É,#vþÈ‘¬,$kÀ[ȪÁèöÄu÷ëà†OÿóªtÜH+øHƒäðÿ× \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs index cca645c3..40636049 100644 --- a/scripts/static-pie-template-amd64-shorter.rs +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -5,4 +5,4 @@ $$$$solution_src$$$$ } // SOLUTION END -#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 226a07b2c689ff3156c931459958096ah,0de0c11fb097485e050f5841ff6a5a41h,5bc06b242cac91c8fe1676232cc931ach,0ff4f86e0ebf77510c4f608e8c1aac801h,0d7fff87f2b48dc74aaf3h",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 226a07b2c689ff3156c931459958096ah,0de0c11fb097485e050f5841ff6a5a41h,5bc06b242cac91c8fe1676232cc931ach,0ff4f86e0ebf77510c4f608e8c1aac801h,0d7fff0e48348f87f2b48dc74aaf3h",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file