diff --git a/.cargo/x86_64-unknown-linux-gnu-short.json b/.cargo/x86_64-unknown-linux-gnu-short.json new file mode 100644 index 00000000..56da8ed9 --- /dev/null +++ b/.cargo/x86_64-unknown-linux-gnu-short.json @@ -0,0 +1,50 @@ +{ + "arch": "x86_64", + "cpu": "x86-64", + "crt-objects-fallback": "false", + "crt-static-respected": true, + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", + "dynamic-linking": true, + "eh-frame-header": false, + "env": "gnu", + "has-rpath": true, + "has-thread-local": true, + "is-builtin": false, + "linker-flavor": "gnu-cc", + "llvm-target": "x86_64-unknown-linux-gnu", + "max-atomic-width": 64, + "os": "linux", + "plt-by-default": false, + "position-independent-executables": true, + "pre-link-args": { + "gnu-cc": [ + "-m64" + ], + "gnu-lld-cc": [ + "-m64" + ] + }, + "relro-level": "off", + "stack-probes": { + "kind": "none" + }, + "static-position-independent-executables": true, + "supported-sanitizers": [ + "address", + "cfi", + "leak", + "memory", + "thread", + "safestack" + ], + "supported-split-debuginfo": [ + "packed", + "unpacked", + "off" + ], + "supports-xray": true, + "target-family": [ + "unix" + ], + "target-pointer-width": "64" +} \ No newline at end of file diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 8790f156..609f904a 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -28,7 +28,7 @@ jobs: with: toolchain: nightly target: ${{ matrix.target }} - profile: minimal + profile: default override: true components: clippy - name: Install node.js @@ -39,6 +39,7 @@ jobs: run: | sudo apt update sudo apt install gcc-multilib nasm + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu - name: Clippy run: cargo clippy env: @@ -53,15 +54,27 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release.sh C 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out + - name: Check C (x86_64) - short + if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} + run: | + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-short.sh C 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out - name: Check C (x86) if: ${{ matrix.target == 'i686-unknown-linux-gnu' }} run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-32bit.sh C 32 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out @@ -71,15 +84,27 @@ jobs: run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-rs.sh Rust 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out + - name: Check Rust (x86_64) - short + if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} + run: | + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./.github/workflows/release-rs-short.sh Rust 64 ./tests/reloc.rs ./tests/reloc.in ./tests/reloc.out - name: Check wasm32 if: ${{ matrix.target == 'wasm32-unknown-unknown' }} run: | python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./src/solution.rs ./tests/boj_1000.in ./tests/boj_1000.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_1001.rs ./tests/boj_1001.in ./tests/boj_1001.out + python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_2587.rs ./tests/boj_2587.in ./tests/boj_2587.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_2751.rs ./tests/boj_2751.in.zip ./tests/boj_2751.out.zip python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_3745.rs ./tests/boj_3745.in ./tests/boj_3745.out python ./scripts/build-and-judge.py ${{ runner.temp }} ./release-wasm32.sh JavaScript 32 ./tests/boj_14939.rs ./tests/boj_14939.in ./tests/boj_14939.out diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 98e03af8..0b08b3bf 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -55,6 +55,7 @@ jobs: run: | python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\src\solution.rs .\tests\boj_1000.in .\tests\boj_1000.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_1001.rs .\tests\boj_1001.in .\tests\boj_1001.out + python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_2587.rs .\tests\boj_2587.in .\tests\boj_2587.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_2751.rs .\tests\boj_2751.in.zip .\tests\boj_2751.out.zip python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_3745.rs .\tests\boj_3745.in .\tests\boj_3745.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows.cmd C 64 .\tests\boj_14939.rs .\tests\boj_14939.in .\tests\boj_14939.out @@ -64,6 +65,7 @@ jobs: run: | python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\src\solution.rs .\tests\boj_1000.in .\tests\boj_1000.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_1001.rs .\tests\boj_1001.in .\tests\boj_1001.out + python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_2587.rs .\tests\boj_2587.in .\tests\boj_2587.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_2751.rs .\tests\boj_2751.in.zip .\tests\boj_2751.out.zip python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_3745.rs .\tests\boj_3745.in .\tests\boj_3745.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-64bit-windows-rs.cmd Rust 64 .\tests\boj_14939.rs .\tests\boj_14939.in .\tests\boj_14939.out @@ -73,6 +75,7 @@ jobs: run: | python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\src\solution.rs .\tests\boj_1000.in .\tests\boj_1000.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_1001.rs .\tests\boj_1001.in .\tests\boj_1001.out + python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_2587.rs .\tests\boj_2587.in .\tests\boj_2587.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_2751.rs .\tests\boj_2751.in.zip .\tests\boj_2751.out.zip python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_3745.rs .\tests\boj_3745.in .\tests\boj_3745.out python .\scripts\build-and-judge.py ${{ runner.temp }} .\release-wasm32.cmd JavaScript 32 .\tests\boj_14939.rs .\tests\boj_14939.in .\tests\boj_14939.out diff --git a/.github/workflows/release-rs-short.sh b/.github/workflows/release-rs-short.sh new file mode 100755 index 00000000..089ed938 --- /dev/null +++ b/.github/workflows/release-rs-short.sh @@ -0,0 +1 @@ +./release-rs.sh --features short \ No newline at end of file diff --git a/.github/workflows/release-short.sh b/.github/workflows/release-short.sh new file mode 100755 index 00000000..7855cb5d --- /dev/null +++ b/.github/workflows/release-short.sh @@ -0,0 +1 @@ +./release.sh --features short \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 7e68140f..3254b66b 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -27,6 +27,14 @@ "$rustc" ], }, + { + "label": "build-release-amd64-short-submit", + "type": "shell", + "command": "mkdir -p ./target/x86_64-unknown-linux-gnu-short/release; ./release.sh --features short > ./target/x86_64-unknown-linux-gnu-short/release/loader.c; code --reuse-window ./target/x86_64-unknown-linux-gnu-short/release/loader.c", + "problemMatcher": [ + "$rustc" + ], + }, { "label": "build-release-amd64-rs-submit", "type": "shell", @@ -35,6 +43,14 @@ "$rustc" ], }, + { + "label": "build-release-amd64-rs-short-submit", + "type": "shell", + "command": "mkdir -p ./target/x86_64-unknown-linux-gnu-short/release; ./release-rs.sh --features short > ./target/x86_64-unknown-linux-gnu-short/release/loader.rs; code --reuse-window ./target/x86_64-unknown-linux-gnu-short/release/loader.rs", + "problemMatcher": [ + "$rustc" + ], + }, { "label": "build-release-i686-submit", "type": "shell", diff --git a/Cargo.toml b/Cargo.toml index f75934ff..a0047e08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,10 +24,21 @@ bench = false path = "src/bin/basm-submit.rs" [dependencies] -compiler_builtins = { version = "0.1.101", features = ["mem"] } libm = "0.2.7" ryu = "1.0" +[target.x86_64-pc-windows-msvc.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } +[target.x86_64-unknown-linux-gnu.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } +[target.i686-unknown-linux-gnu.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } +[target.wasm32-unknown-unknown.dependencies] +compiler_builtins = { version = "0.1.103", features = ["mem"] } + +[features] +short = [] + [profile.dev] panic = "abort" diff --git a/build.rs b/build.rs index 398c9cf1..ddac1f7c 100644 --- a/build.rs +++ b/build.rs @@ -18,7 +18,7 @@ fn main() { link_args_basm.push("/EMITPOGOPHASEINFO"); link_args_basm_submit.push("/ALIGN:128"); }, - "x86_64-unknown-linux-gnu" | "i686-unknown-linux-gnu" => { + "x86_64-unknown-linux-gnu" | "x86_64-unknown-linux-gnu-short" | "i686-unknown-linux-gnu" => { link_args_basm.push("-nostartfiles"); link_args_basm.push("-nostdlib"); link_args_basm.push("-static-pie"); @@ -27,12 +27,12 @@ fn main() { link_args_basm.push("-fno-unwind-tables"); link_args_basm.push("-fno-stack-protector"); link_args_basm.push("-fno-plt"); - if target == "x86_64-unknown-linux-gnu" { - link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro"); - } else { + if target == "i686-unknown-linux-gnu" { // Prevent linker from putting data into text, which is non-writable and hence not relocatable. // This prevents the hack for getting the _DYNAMIC symbol in the entrypoint. link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro,-z,notext"); + } else { + link_args_basm.push("-Wl,--build-id=none,--gc-sections,--no-eh-frame-hdr,-z,norelro"); } link_args_basm_submit.push("-Wl,-z,max-page-size=128"); }, diff --git a/release-html.sh b/release-html.sh index 1085cc66..99da4a7e 100644 --- a/release-html.sh +++ b/release-html.sh @@ -1,3 +1,3 @@ >&2 echo "Building project for target wasm32-unknown-unknown, language JavaScript, build mode Release" cargo +nightly build --target wasm32-unknown-unknown --bin=basm-submit --release "$@" -python scripts/wasm-gen.py scripts/wasm-template.html \ No newline at end of file +python3 scripts/wasm-gen.py scripts/wasm-template.html \ No newline at end of file diff --git a/release-wasm32.sh b/release-wasm32.sh index 1262d3d0..e3263196 100755 --- a/release-wasm32.sh +++ b/release-wasm32.sh @@ -1,3 +1,3 @@ >&2 echo "Building project for target wasm32-unknown-unknown, language JavaScript, build mode Release" cargo +nightly build --target wasm32-unknown-unknown --bin=basm-submit --release "$@" -python scripts/wasm-gen.py scripts/wasm-template.js \ No newline at end of file +python3 scripts/wasm-gen.py scripts/wasm-template.js \ No newline at end of file diff --git a/scripts/base91.py b/scripts/base91.py index 8778d797..7f2e17fa 100644 --- a/scripts/base91.py +++ b/scripts/base91.py @@ -1,4 +1,29 @@ -def encode(x): +def encode(x_in, use_rle=False): + sharp_insertion_points = [] + if use_rle: + current_bits, current_bytes, i = 0, 0, 0 + x = bytearray() + while i < len(x_in): + current_bits += 13 + while current_bytes < current_bits // 8: + if i >= len(x_in): + break + x.append(x_in[i]) + current_bytes += 1 + i += 1 + if len(x) > 0 and x[-1] == 0: + zeros_cnt = 1 + while i - 1 + zeros_cnt < len(x_in) and zeros_cnt < 256 and x_in[i - 1 + zeros_cnt] == 0: + zeros_cnt += 1 + if zeros_cnt >= 2: + x.pop() + x.append(zeros_cnt - 1) + sharp_insertion_points.append((current_bits // 13 * 2) + len(sharp_insertion_points)) + i += zeros_cnt - 1 + sharp_insertion_points = list(reversed(sharp_insertion_points)) + else: + x = x_in + out = [] i = 0 cnt5, stack5 = 0, [] @@ -21,11 +46,14 @@ def encode(x): if bits < 13: stack5.append((pos, bits)) cnt5 -= 8 - ret = bytearray(2 * len(out) + 1) - for i in range(len(out)): - ret[2 * i + 0] = 0x24 + (out[i] % 91) - ret[2 * i + 1] = 0x24 + (out[i] // 91) - ret[-1] = ord('!') + ret = bytearray() + for pack in out: + ret.append(0x24 + (pack % 91)) + ret.append(0x24 + (pack // 91)) + if len(sharp_insertion_points) > 0 and len(ret) == sharp_insertion_points[-1]: + ret.append(ord(b'#')) + sharp_insertion_points.pop() + ret.append(ord(b'!')) return bytes(ret) if __name__ == '__main__': diff --git a/scripts/static-pie-elf2bin.py b/scripts/static-pie-elf2bin.py index 7cb885d3..e32d6b73 100644 --- a/scripts/static-pie-elf2bin.py +++ b/scripts/static-pie-elf2bin.py @@ -160,7 +160,28 @@ def load_elf64(elf): continue # since bytearray is zero-initialized dst_off, src_off, cnt = sh_dict['sh_addr'], sh_dict['sh_offset'], sh_dict['sh_size'] - memory_bin[dst_off:dst_off+cnt] = elf[src_off:src_off+cnt] + blob = elf[src_off:src_off+cnt] + + if sh_dict['sh_type'] == SHT_DYNAMIC: + # Trim the DYNAMIC section, leaving only relocation-related entries + # 16 == sizeof(Elf64_Dyn) + dst = 0 + for src in range(0, len(blob), 16): + # Included entries: + # DT_PLTRELSZ = 2, DT_RELA = 7, DT_RELASZ = 8, DT_RELAENT = 9, + # DT_REL = 17, DT_RELSZ = 18, DT_RELENT = 19, DT_PLTREL = 20, + # DT_TEXT_REL = 22, DT_JMPREL = 23. + # + # Note: DT_RELACOUNT = 0x6fff_fff9 and DT_RELCOUNT = 0x6fff_fffa + # are not included since they are redundant since + # DT_RELACOUNT = DT_RELASZ/DT_RELAENT and + # DT_RELCOUNT = DT_RELSZ/DT_RELENT. + if b2i(blob[src:src+8]) in [2, 7, 8, 9, 17, 18, 19, 20, 22, 23]: + blob[dst:dst+16] = blob[src:src+16] + dst += 16 + blob[dst:] = bytearray(len(blob[dst:])) # fill remaining part with zeros + + memory_bin[dst_off:dst_off+cnt] = blob entrypoint_offset = b2i(elf[24:32]) return memory_bin, pos_begin, entrypoint_offset diff --git a/scripts/static-pie-gen.py b/scripts/static-pie-gen.py index c4aadcae..c7404639 100644 --- a/scripts/static-pie-gen.py +++ b/scripts/static-pie-gen.py @@ -67,6 +67,18 @@ sol[-1] = sol[-1].rstrip() sol = "".join(sol) +# binary (raw) +# Since we append a little-endian 8-byte nonnegative integer, we can practically ensure that the last byte is zero. +code_raw = memory_bin[:-8] +code_raw += (len(code_raw) + 8 - loader_fdict['entrypoint_offset']).to_bytes(8, byteorder='little') +code_raw_b91 = base91.encode(code_raw, use_rle=True).decode('ascii') +code_raw_b91_len = len(code_raw_b91) +code_raw_b91 = '"' + code_raw_b91 + '"' +if lang_name == "C": + # Escape '\' and '?' + code_raw_b91 = code_raw_b91.replace('\\', '\\\\') + code_raw_b91 = code_raw_b91.replace('?', '\\?') + # binary with open(compressed_binary_path, "rb") as f: code = f.read() @@ -115,29 +127,39 @@ stub_b85 = '"' + stub_b85 + '"' # template -with open(template_path, encoding='utf8') as f: - template = f.read() -template = template.replace("\ufeff", "") - -# putting it all together -# reference: https://stackoverflow.com/a/15448887 -def multiple_replace(string, rep_dict): - pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL) - return pattern.sub(lambda x: rep_dict[x.group(0)], string) - -out = multiple_replace(template, { - "$$$$solution_src$$$$": sol, - "$$$$stub_raw$$$$": stub_raw, - "$$$$stub_base85$$$$": stub_b85, - "$$$$stub_len$$$$": str(len(stub)), - "$$$$stub_base85_len$$$$": str(stub_b85_len), - "$$$$stub_base91$$$$": stub_b91, - "$$$$stub_base91_len$$$$": str(stub_b91_len), - "$$$$binary_base85$$$$": r, - "$$$$binary_base85_len$$$$": str(len(code_b85)), - "$$$$binary_base91$$$$": code_b91, - "$$$$binary_base91_len$$$$": str(code_b91_len), - "$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)), - "$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']), -}) +template_candidates = [template_path] +if lang_name in ["C", "Rust"] and "x86_64" in target_name and "short" in template_path and len(code_raw) <= 4096 - 256: + template_candidates.append(template_path.replace("short", "shorter")) + +out = None +for each_template_path in template_candidates: + with open(each_template_path, encoding='utf8') as f: + template = f.read() + template = template.replace("\ufeff", "") + + # putting it all together + # reference: https://stackoverflow.com/a/15448887 + def multiple_replace(string, rep_dict): + pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL) + return pattern.sub(lambda x: rep_dict[x.group(0)], string) + + out_candidate = multiple_replace(template, { + "$$$$solution_src$$$$": sol, + "$$$$stub_raw$$$$": stub_raw, + "$$$$stub_base85$$$$": stub_b85, + "$$$$stub_len$$$$": str(len(stub)), + "$$$$stub_base85_len$$$$": str(stub_b85_len), + "$$$$stub_base91$$$$": stub_b91, + "$$$$stub_base91_len$$$$": str(stub_b91_len), + "$$$$binary_base85$$$$": r, + "$$$$binary_base85_len$$$$": str(len(code_b85)), + "$$$$binary_base91$$$$": code_b91, + "$$$$binary_base91_len$$$$": str(code_b91_len), + "$$$$binary_raw_base91$$$$": code_raw_b91, + "$$$$binary_raw_base91_len$$$$": str(code_raw_b91_len), + "$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)), + "$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']), + }) + if out is None or len(out_candidate) < len(out): + out = out_candidate print(out) \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-print.py b/scripts/static-pie-prestub-amd64-print.py index de11e5eb..4fb77564 100644 --- a/scripts/static-pie-prestub-amd64-print.py +++ b/scripts/static-pie-prestub-amd64-print.py @@ -1,5 +1,8 @@ +import sys + # read prestub -with open("static-pie-prestub-amd64.bin", "rb") as f: +fname = "static-pie-prestub-amd64.bin" if len(sys.argv) <= 1 else sys.argv[1] +with open(fname, "rb") as f: prestub = f.read() prestub = bytearray(prestub) if len(prestub) > 0 and prestub[-1] == 0: @@ -19,9 +22,22 @@ prestub = prestub[:j] # settings -SPECIFIER = ".quad" -CHUNK_SIZE = 8 -ENTRIES_PER_LINE = 4 +if "--octa" in sys.argv: + SPECIFIER = ".octa" + CHUNK_SIZE = 16 + ENTRIES_PER_LINE = 10 +else: + SPECIFIER = ".quad" + CHUNK_SIZE = 8 + ENTRIES_PER_LINE = 4 +if "--c" in sys.argv: + PREFIX = "0x" + SUFFIX = "" + SPECIFIER = "" + ENTRIES_PER_LINE = 100 +else: + PREFIX = "" + SUFFIX = "h" # pad to align at `CHUNK_SIZE`-byte boundary while len(prestub) % CHUNK_SIZE != 0: @@ -40,8 +56,8 @@ def to_hex_short(y): nonzero_idx = len(out) while nonzero_idx > 1 and out[nonzero_idx-1] == '0': nonzero_idx -= 1 - out2 = out[:nonzero_idx] + "h<<" + str((len(out) - nonzero_idx) * 4) - out = out + "h" + out2 = PREFIX + out[:nonzero_idx] + SUFFIX + "<<" + str((len(out) - nonzero_idx) * 4) + out = PREFIX + out + SUFFIX if len(out2) < len(out): out = out2 if ord(out[0]) >= ord('a'): diff --git a/scripts/static-pie-prestub-amd64-short.asm b/scripts/static-pie-prestub-amd64-short.asm new file mode 100644 index 00000000..406e9b92 --- /dev/null +++ b/scripts/static-pie-prestub-amd64-short.asm @@ -0,0 +1,86 @@ +; -*- tab-width: 4 -*- +; +; The prestub for amd64-rust target +; (prestub: the code that runs before the stub and sets the stage) +; +; build: nasm -f bin -O9 static-pie-prestub-amd64-short.asm -o static-pie-prestub-amd64-short.bin +; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-short.bin +; to obtain the form that can be embedded in Rust as inline assembly. + +BITS 64 +ORG 0 +section .text + +; Align stack to 16 byte boundary +; [rsp+ 32, rsp+120): PLATFORM_DATA +; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention) + enter 56, 0 + push 1 + pop rcx ; Enable ENV_FLAGS_LINUX_STYLE_CHKSTK outside Windows + call _t + +; svc_alloc_rwx for Windows and Linux +; rcx = size +; rdi = pointer to VirtualAlloc (must be supplied before prepending the mov instruction) +_svc_alloc_rwx: + push 9 + pop rax ; syscall id of x64 mmap + jecxz _decode + cdq ; rdx=0 + xor r9d, r9d ; offset + push rsi ; save rsi + xor edi, edi ; rdi=0 + mov esi, ecx ; size + mov dl, 7 ; protect (safe since we have ensured rdx=0) + push 0x22 + pop r10 ; flags + push -1 + pop r8 ; fd + syscall + pop rsi ; restore rsi +_ret: + ret +_svc_alloc_rwx_end: + +; Base91 decoder +_decode: + mov al, 0x1f +_decode_loop: + shl eax, 13 + lodsb + sub al, 0x24 + jc _ret + cdq + xchg eax, edx + lodsb + sub al, 0x24 + imul eax, eax, 91 + add eax, edx +_decode_output: + stosb + shr eax, 8 + test ah, 16 + jnz _decode_output + jmp _decode_loop + +; PLATFORM_DATA +_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx + pop rbx + push rbx + push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress + push rax ; PLATFORM_DATA[16..23] = win_kernel32 + push rcx ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK) + inc ecx + push rcx ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux) + sub rsp, 40 ; shadow space + compensation + call rbx ; svc_alloc_rwx + +; Current state: rax = new buffer, rdi = pointer to VirtualAlloc + push rax + xchg rax, rdi ; rdi = new buffer + +; Decode stub (rsi -> rdi) +; Current state: rdi = stub memory (by the previous instruction) +; rsi = STUB_BASE91 (by the Rust template) + xor ecx, ecx + jmp rbx ; This will jump to the start of the new buffer (stub) upon the ret instruction \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-short.bin b/scripts/static-pie-prestub-amd64-short.bin new file mode 100644 index 00000000..0c17d9b8 Binary files /dev/null and b/scripts/static-pie-prestub-amd64-short.bin differ diff --git a/scripts/static-pie-prestub-amd64-shorter-c.asm b/scripts/static-pie-prestub-amd64-shorter-c.asm new file mode 100644 index 00000000..1c73b353 --- /dev/null +++ b/scripts/static-pie-prestub-amd64-shorter-c.asm @@ -0,0 +1,98 @@ +; -*- tab-width: 4 -*- +; +; The prestub for amd64-C target +; (prestub: the code that runs before the stub and sets the stage) +; +; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter-c.asm -o static-pie-prestub-amd64-shorter-c.bin +; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter-c.bin --c +; to obtain the form that can be embedded in C. + +BITS 64 +ORG 0 +section .text + +; Reserve space on stack + and rsp, 0xffffffffffffff80 ; ensures at least 128 bytes + +; mprotect: make stack executable + mov eax, 10 ; mprotect + mov esi, 0x1000 ; len + push rdi ; Save binary_raw_base91 + lea rdi, [rsp + 8] ; addr + push 7 ; protect (RWX) + pop rdx + and rdi, 0xfffffffffffff000 ; align to page boundary (4K) + syscall + +; Relocate to stack + lea rsi, [rel _start] + lea rdi, [rsp + 8] + push rdi ; _start of relocated stub + mov ecx, _end - _start + rep movsb + +; Jump to stack + pop rax ; _start of relocated stub + call rax + +_start: + +; Free the .text section + pop rdi ; Get RIP saved on stack by call instruction + and rdi, 0xfffffffffffff000 + mov esi, 0x1000 + mov eax, 11 + syscall + +; svc_alloc_rwx for Linux +_svc_alloc_rwx: + push 9 + pop rax ; syscall id of x64 mmap + cdq ; rdx=0 + xor r9d, r9d ; offset + xor edi, edi ; rdi=0 + mov esi, eax ; size (anything in [1, 4096]) + mov dl, 7 ; protect (safe since we have ensured rdx=0) + push 0x22 + pop r10 ; flags + push -1 + pop r8 ; fd + syscall + pop rsi ; restore rsi + +; Current state: rax = new buffer + xchg rax, rdi ; rdi = new buffer + +; Base91 decoder +_decode: + mov al, 0x1f ; syscall preserves all registers except rcx, r11, rax; hence at this point rax=(previous rdi)=0 +_decode_loop: + shl eax, 13 +_decode_loop_2: + lodsb + xor ecx, ecx ; ecx = 0 + sub al, 0x23 + jbe _decode_zeros + dec al + xchg eax, ecx + lodsb + sub al, 0x24 + imul eax, eax, 91 + add eax, ecx +_decode_output: + stosb + shr eax, 8 + test ah, 16 + jnz _decode_output + jmp _decode_loop +_decode_zeros: + xchg byte [rdi-1], cl ; ecx = cl = ((number of zeros) - 1), byte [rdi-1] = 0 + rep stosb ; we have made sure the last byte is zero (in the packager) + jz _decode_loop_2 + +; Jump to entrypoint +_jump_to_entrypoint: + sub rdi, qword [rdi-8] + call rdi + +_end: diff --git a/scripts/static-pie-prestub-amd64-shorter-c.bin b/scripts/static-pie-prestub-amd64-shorter-c.bin new file mode 100644 index 00000000..437271e3 Binary files /dev/null and b/scripts/static-pie-prestub-amd64-shorter-c.bin differ diff --git a/scripts/static-pie-prestub-amd64-shorter.asm b/scripts/static-pie-prestub-amd64-shorter.asm new file mode 100644 index 00000000..7f698051 --- /dev/null +++ b/scripts/static-pie-prestub-amd64-shorter.asm @@ -0,0 +1,65 @@ +; -*- tab-width: 4 -*- +; +; The prestub for amd64-rust target +; (prestub: the code that runs before the stub and sets the stage) +; +; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter.asm -o static-pie-prestub-amd64-shorter.bin +; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter.bin --octa +; to obtain the form that can be embedded in Rust as inline assembly. + +BITS 64 +ORG 0 +section .text + +; svc_alloc_rwx for Linux +_svc_alloc_rwx: + push 9 + pop rax ; syscall id of x64 mmap + cdq ; rdx=0 + xor r9d, r9d ; offset + push rsi ; save rsi + xor edi, edi ; rdi=0 + mov esi, eax ; size (anything in [1, 4096]) + mov dl, 7 ; protect (safe since we have ensured rdx=0) + push 0x22 + pop r10 ; flags + push -1 + pop r8 ; fd + syscall + pop rsi ; restore rsi + +; Current state: rax = new buffer + xchg rax, rdi ; rdi = new buffer + +; Base91 decoder +_decode: + mov al, 0x1f ; syscall preserves all registers except rcx, r11, rax; hence at this point rax=(previous rdi)=0 +_decode_loop: + shl eax, 13 +_decode_loop_2: + lodsb + xor ecx, ecx ; ecx = 0 + sub al, 0x23 + jbe _decode_zeros + dec al + xchg eax, ecx + lodsb + sub al, 0x24 + imul eax, eax, 91 + add eax, ecx +_decode_output: + stosb + shr eax, 8 + test ah, 16 + jnz _decode_output + jmp _decode_loop +_decode_zeros: + xchg byte [rdi-1], cl ; ecx = cl = ((number of zeros) - 1), byte [rdi-1] = 0 + rep stosb ; we have made sure the last byte is zero (in the packager) + jz _decode_loop_2 + +; Jump to entrypoint +_jump_to_entrypoint: + sub rdi, qword [rdi-8] + and rsp, 0xfffffffffffffff0 + call rdi \ No newline at end of file diff --git a/scripts/static-pie-prestub-amd64-shorter.bin b/scripts/static-pie-prestub-amd64-shorter.bin new file mode 100644 index 00000000..bd4d3117 --- /dev/null +++ b/scripts/static-pie-prestub-amd64-shorter.bin @@ -0,0 +1 @@ +j X™E1ÉV1ÿ‰Æ²j"AZjÿAX^H—°Áà ¬1É,#vþÈ‘¬,$kÀ[ȪÁèöÄu÷ëà†OÿóªtÜH+øHƒäðÿ× \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-short.c b/scripts/static-pie-template-amd64-short.c new file mode 100644 index 00000000..25996119 --- /dev/null +++ b/scripts/static-pie-template-amd64-short.c @@ -0,0 +1,67 @@ +// Generated with https://github.com/kiwiyou/basm-rs +// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box! +// SOLUTION BEGIN +$$$$solution_src$$$$ +// SOLUTION END +#include +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +#define BASMCALL __attribute__((ms_abi)) +// Base85 decoder. Code adapted from: +// https://github.com/rafagafe/base85/blob/master/base85.c +const char *b85 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>\?@^_`{|}~"; +void b85tobin(void *dest, char const *src) { + u32 *p = (u32 *)dest; + u8 digittobin[256]; + for (u8 i=0; i<85; i++) digittobin[(u8)b85[i]] = i; + while (1) { + while (*src == '\0') src++; + if (*src == ']') break; + u32 value = 0; + for (u32 i=0; i<5; i++) { + value *= 85; + value += digittobin[(u8)*src++]; + } + *p++ = (value >> 24) | ((value >> 8) & 0xff00) | ((value << 8) & 0xff0000) | (value << 24); + } +} +#pragma pack(push, 1) +typedef struct { + u64 env_id; + u64 env_flags; + u64 win[2]; + void *fn_table[6]; +} PLATFORM_DATA; +#pragma pack(pop) +typedef int (BASMCALL *stub_ptr)(void *, void *); +char payload[][$$$$min_len_4096$$$$] = $$$$binary_base85$$$$; +int main() {} +#ifdef __cplusplus +extern "C" +#endif +int __libc_start_main( + void *func_ptr, + int argc, + char* argv[], + void (*init_func)(void), + void (*fini_func)(void), + void (*rtld_fini_func)(void), + void *stack_end) { + PLATFORM_DATA pd; + pd.env_id = 2; + pd.env_flags = 1; + u8 stubbuf[68 + $$$$stub_len$$$$]; + b85tobin(stubbuf, "QMd~L002n8@6D@;XGJ3cz5oya01pLO>naZmS5~+Q0000n|450>x(5IN07=KfA^-pYO)> 12) << 12; + syscall(10, base, len, 0x7); + pd.fn_table[0] = (void *) (stubbuf + 0x1c); + b85tobin(payload, (char const *)payload); + return ((stub_ptr) stubbuf)(&pd, payload); +} \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-short.rs b/scripts/static-pie-template-amd64-short.rs new file mode 100644 index 00000000..44083009 --- /dev/null +++ b/scripts/static-pie-template-amd64-short.rs @@ -0,0 +1,8 @@ +// Generated with https://github.com/kiwiyou/basm-rs +// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box! +// SOLUTION BEGIN +#![crate_type="cdylib"]#![no_std]#[cfg(any())]mod x{ +$$$$solution_src$$$$ +} +// SOLUTION END +#[no_link]extern crate std;static mut P:[u8;$$$$binary_base91_len$$$$]=*br$$$$binary_base91$$$$;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 0e859016a000038c8h,6758096a0000003ch,3156c931459917e3h,41226a07b2ce89ffh,5e050f5841ff6a5ah,2cac0de0c11fb0c3h,242cac9299f57224h,0e8c1aad0015bc06bh,0e3ebf77510c4f608h,51c1ff515052535bh,4850d3ff28ec8348h,0e3ffc93197h",in("r14")P.as_mut_ptr(),in("rsi")r$$$$stub_base91$$$$.as_ptr())} \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.c b/scripts/static-pie-template-amd64-shorter.c new file mode 100644 index 00000000..35141972 --- /dev/null +++ b/scripts/static-pie-template-amd64-shorter.c @@ -0,0 +1,10 @@ +// Generated with https://github.com/kiwiyou/basm-rs +// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box! +// SOLUTION BEGIN +$$$$solution_src$$$$ +// SOLUTION END +__attribute__((section(".text#")))unsigned long long s[]={0xab880e48348,0x485700001000be00,0x485a076a08247c8d,0x50ffffff000e781,0x4800000010358d48,0x5db95708247c8d,0x5fd0ff58a4f30000,0xbefffff000e78148,0xbb800001000,0x459958096a050f00,0x7b2c689ff31c931,0x5841ff6a5a41226a,0xc11fb097485e050f,0x76232cc931ac0de0,0x6b242cac91c8fe16,0x8e8c1aac8015bc0,0x86e0ebf77510c4f6,0x2b48dc74aaf3ff4f,0xd7fff87f};char t[]=$$$$binary_raw_base91$$$$;int main(){return 0;} +#if defined(__cplusplus) +extern "C" +#endif +int __libc_start_main(){return((int(*)(void*))s)(t);} \ No newline at end of file diff --git a/scripts/static-pie-template-amd64-shorter.rs b/scripts/static-pie-template-amd64-shorter.rs new file mode 100644 index 00000000..40636049 --- /dev/null +++ b/scripts/static-pie-template-amd64-shorter.rs @@ -0,0 +1,8 @@ +// Generated with https://github.com/kiwiyou/basm-rs +// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box! +// SOLUTION BEGIN +#![crate_type="cdylib"]#![no_std]#[cfg(any())]mod x{ +$$$$solution_src$$$$ +} +// SOLUTION END +#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".octa 226a07b2c689ff3156c931459958096ah,0de0c11fb097485e050f5841ff6a5a41h,5bc06b242cac91c8fe1676232cc931ach,0ff4f86e0ebf77510c4f608e8c1aac801h,0d7fff0e48348f87f2b48dc74aaf3h",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())} \ No newline at end of file diff --git a/scripts/static-pie.sh b/scripts/static-pie.sh index f1969c9e..63570c39 100755 --- a/scripts/static-pie.sh +++ b/scripts/static-pie.sh @@ -11,9 +11,17 @@ shift if [[ "$target_name" == "x86_64-unknown-linux-gnu" ]]; then stub="static-pie-stub-amd64.bin" if [[ "$lang_name" == "C" ]]; then - template="static-pie-template-amd64.c" + if [[ "$*" == *"short"* ]]; then + template="static-pie-template-amd64-short.c" + else + template="static-pie-template-amd64.c" + fi elif [[ "$lang_name" == "Rust" ]]; then - template="static-pie-template-amd64.rs" + if [[ "$*" == *"short"* ]]; then + template="static-pie-template-amd64-short.rs" + else + template="static-pie-template-amd64.rs" + fi else >&2 echo "Language ${lang_name} is not supported for target ${target_name}" exit @@ -48,13 +56,23 @@ else >&2 echo "Unknown build mode ${build_mode}" exit fi + +if [[ "$target_name" == "x86_64-unknown-linux-gnu" && "$*" == *"short"* ]]; then + target_name_cargo=".cargo/x86_64-unknown-linux-gnu-short.json" + target_name="x86_64-unknown-linux-gnu-short" + extra_config='-Zbuild-std=core,compiler_builtins,alloc -Zbuild-std-features=compiler-builtins-mem' +else + target_name_cargo="$target_name" + extra_config="" +fi + >&2 echo "Building project for target ${target_name}, language ${lang_name}, build mode ${build_mode}" binary_path=basm.bin if [[ "$build_mode" == "Debug" ]]; then - cargo +nightly build --target "$target_name" --bin basm-submit "$@" + cargo +nightly build $extra_config --target "$target_name_cargo" --bin basm-submit "$@" else - cargo +nightly build --target "$target_name" --bin basm-submit --release "$@" + cargo +nightly build $extra_config --target "$target_name_cargo" --bin basm-submit --release "$@" fi if [[ "$target_name" == "x86_64-pc-windows-msvc" ]]; then @@ -62,6 +80,6 @@ if [[ "$target_name" == "x86_64-pc-windows-msvc" ]]; then else cp target/"$target_name"/"$build_mode_dir"/basm-submit target/"$target_name"/"$build_mode_dir"/basm-submit-stripped objcopy --strip-all target/"$target_name"/"$build_mode_dir"/basm-submit-stripped - objcopy --remove-section .eh_frame target/"$target_name"/"$build_mode_dir"/basm-submit-stripped + objcopy --remove-section .eh_frame --remove-section .gcc_except_table --remove-section .gnu.hash target/"$target_name"/"$build_mode_dir"/basm-submit-stripped python3 scripts/static-pie-gen.py src/solution.rs "$target_name" target/"$target_name"/"$build_mode_dir"/basm-submit-stripped scripts/"$stub" "$lang_name" scripts/"$template" fi diff --git a/src/bin/codegen.rs b/src/bin/codegen.rs index 3086a395..9289009e 100644 --- a/src/bin/codegen.rs +++ b/src/bin/codegen.rs @@ -59,6 +59,7 @@ unsafe extern "win64" fn _start() -> ! { // on the 16-byte boundary BEFORE `call` instruction. // However, when called as the entrypoint by the Linux OS, // RSP will be 16-byte aligned AFTER `call` instruction. + #[cfg(not(feature = "short"))] asm!( "clc", // CF=0 (running without loader) / CF=1 (running with loader) "mov rbx, rcx", // Save PLATFORM_DATA table @@ -80,6 +81,22 @@ unsafe extern "win64" fn _start() -> ! { sym _start_rust, options(noreturn) ); + // For "short", we always assume we are running with loader on Linux, + // since "short" is only meaningful when submitting to online judges (not local test runs). + // Note that the stub will ensure that stack is aligned before caling _start. + // Also, for "short" on x86_64 Linux, we don't need PLATFORM_DATA, so we don't fabricate it. + #[cfg(feature = "short")] + asm!( + "clc", // Not needed but packager wants it + "push rax", // Align stack + "lea rdi, [rip + __ehdr_start]", + "lea rsi, [rip + _DYNAMIC]", + "call {0}", + "call {1}", // This won't return since on Linux we invoke SYS_exitgroup in binary + sym loader::amd64_elf::relocate, + sym _start_rust, + options(noreturn) + ); } #[cfg(target_os = "windows")] @@ -234,11 +251,11 @@ extern "C" fn _start() { } /* We prevent inlining solution::main, since if the user allocates - * a large amount of stack memory there, it will be zero-initialized + * a large amount of stack memory there, it will be zero-initialized (or probed) * *before* we increase the stack limits if it is inlined into _start_rust. * This will cause stack overflow, thus we prevent it. */ -#[inline(never)] +#[cfg_attr(not(feature = "short"), inline(never))] fn _call_main() { solution::main(); } diff --git a/src/lib.rs b/src/lib.rs index 7e3035c5..888774ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(rustc_private)] #![feature(fn_align)] #![feature(maybe_uninit_slice)] #![feature(maybe_uninit_uninit_array)] diff --git a/src/platform/io/reader.rs b/src/platform/io/reader.rs index 4e2f3442..0d811eda 100644 --- a/src/platform/io/reader.rs +++ b/src/platform/io/reader.rs @@ -46,42 +46,48 @@ impl Reader { pub fn try_refill(&mut self, readahead: usize) -> usize { /* readahead cannot exceed the buffer size */ assert!(readahead <= Self::BUF_LEN); - let end = self.off + readahead; - if end <= self.len { - /* data already available */ - } else { - /* secure space by discarding the already-consumed buffer contents at front */ - if end > Self::BUF_LEN { - let rem = self.len - self.off; - unsafe { core::ptr::copy(self.buf.as_ptr().add(self.off), self.buf.as_mut_ptr(), rem); } - self.len = rem; - self.off = 0; - } - unsafe { - /* Although the buffer currently falls short of what has been requested, - * it may still be possible that a full token (which is short) - * is available within the remains. Thus, we check if we can return - * without invoking read_stdio. This is crucial for cases where - * the standard input is a pipe, which includes the local testing - * console environment. */ - let mut white_pos = self.off; - while white_pos < self.len { - if self.buf[white_pos].assume_init() <= b' ' { - break; + unsafe { + let mut rem = self.len - self.off; + if rem < readahead { + /* Secure space by discarding the already-consumed buffer contents at front. + * Note that we expect `readahead` to be small (<100 bytes), so we unconditionally + * copy the contents to the front to reduce code size. When the default buffer size + * is used (which is >100K), this will not happen often and hence shouldn't affect + * performance by a noticeable amount. */ + let mut white_cnt = 0u32; + let mut j = self.off; + for i in 0..rem { + let c = self.buf[j].assume_init(); + if c <= b' ' { + white_cnt += 1; } - white_pos += 1; + *self.buf[i].assume_init_mut() = c; + j += 1; } - if white_pos == self.len { + + /* Although the buffer currently falls short of what has been requested, + * it may still be possible that a full token (which is short) + * is available within the remains. Thus, we check if we can return + * without invoking read_stdio. This is crucial for cases where + * the standard input is a pipe, which includes the local testing + * console environment. */ + if white_cnt == 0 { /* No whitespace has been found. We have to read. - * We try to read as much as possible at once. */ - self.len += services::read_stdio(0, MaybeUninit::slice_assume_init_mut(&mut self.buf[self.len..Self::BUF_LEN])); + * We try to read as much as possible at once. */ + rem += services::read_stdio(0, MaybeUninit::slice_assume_init_mut(&mut self.buf[rem..Self::BUF_LEN])); } /* Add a null-terminator, whether or not the read was nonsaturating (for SIMD-accelerated unsafe integer read routines). - This is safe since we spare 8 bytes at the end of the buffer. */ - *self.buf[self.len].assume_init_mut() = 0u8; + * This is safe since we spare 8 bytes at the end of the buffer. */ + *self.buf[rem].assume_init_mut() = 0u8; + + /* Save the new data length */ + self.len = rem; + self.off = 0; + } else { + /* data already available */ } + rem } - self.len - self.off } pub fn try_consume(&mut self, bytes: usize) -> usize { let mut consumed = 0; @@ -228,6 +234,7 @@ impl Reader { buf } + #[cfg(not(feature = "short"))] fn noskip_u64(&mut self) -> u64 { const POW10: [u32; 9] = [1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]; let mut out = 0; @@ -245,6 +252,20 @@ impl Reader { out += c; } } + #[cfg(feature = "short")] + fn noskip_u64(&mut self) -> u64 { + let mut n = 0; + loop { + let b = unsafe { self.buf[self.off].assume_init() }; + if b > 32 { + n *= 10; + n += (b - b'0') as u64; + self.off += 1; + } else { + break n; + } + } + } fn noskip_u128(&mut self) -> u128 { let mut n = 0; while self.off < self.len { diff --git a/src/platform/io/writer.rs b/src/platform/io/writer.rs index 129c993d..a32f2738 100644 --- a/src/platform/io/writer.rs +++ b/src/platform/io/writer.rs @@ -18,8 +18,10 @@ impl Drop for Writer { } } +#[cfg(not(feature = "short"))] #[repr(align(16))] struct B128([u8; 16]); +#[cfg(not(feature = "short"))] #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] #[target_feature(enable = "avx2")] unsafe fn cvt8(out: &mut B128, n: u32) -> usize { @@ -67,6 +69,7 @@ unsafe fn cvt8(out: &mut B128, n: u32) -> usize { _mm_store_si128(out.0.as_mut_ptr().cast(), ascii); offset } +#[cfg(not(feature = "short"))] #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] unsafe fn cvt8(out: &mut B128, mut n: u32) -> usize { let mut offset = 16; @@ -121,21 +124,30 @@ impl Writer { self.off += 1; } pub fn byte(&mut self, b: u8) { - self.try_flush(1); + self.try_flush(2); self.byte_unchecked(b); } // This function ensures an extra byte in the buffer to make sure that // println() can safely use `byte_unchecked`. - pub fn bytes(&mut self, s: &[u8]) { - let mut i = 0; - while i < s.len() { - let rem = s[i..].len().min(self.buf[self.off..].len()); - unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + rem]).copy_from_slice(&s[i..i + rem]); } + #[cfg(not(feature = "short"))] + pub fn bytes(&mut self, mut s: &[u8]) { + while !s.is_empty() { + let rem = s.len().min(self.buf[self.off..].len()); + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + rem]).copy_from_slice(&s[..rem]); } self.off += rem; - i += rem; + s = &s[rem..]; self.try_flush(1); } } + // This function ensures an extra byte in the buffer to make sure that + // println() can safely use `byte_unchecked`. This is achieved by + // calling `self.try_flush(2)` (instead of `self.try_flush(1)`) in byte(). + #[cfg(feature = "short")] + pub fn bytes(&mut self, s: &[u8]) { + for x in s { + self.byte(*x); + } + } pub fn str(&mut self, s: &str) { self.bytes(s.as_bytes()); } @@ -159,6 +171,7 @@ impl Writer { self.u32(n as u32); } } + #[cfg(not(feature = "short"))] pub fn u32(&mut self, n: u32) { self.try_flush(11); let mut b128 = B128([0u8; 16]); @@ -180,6 +193,10 @@ impl Writer { unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + len]).copy_from_slice(&b128.0[off..]); } self.off += len; } + #[cfg(feature = "short")] + pub fn u32(&mut self, n: u32) { + self.u64(n as u64) + } pub fn i64(&mut self, n: i64) { if n < 0 { self.byte(b'-'); @@ -188,6 +205,7 @@ impl Writer { self.u64(n as u64); } } + #[cfg(not(feature = "short"))] pub fn u64(&mut self, n: u64) { self.try_flush(21); let mut hi128 = B128([0u8; 16]); @@ -225,6 +243,24 @@ impl Writer { unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[self.off..self.off + len]).copy_from_slice(&lo128.0[looff..]); } self.off += len; } + #[cfg(feature = "short")] + pub fn u64(&mut self, mut n: u64) { + self.try_flush(21); + let mut i = self.off; + loop { + self.buf[i].write(b'0' + (n % 10) as u8); + n /= 10; + i += 1; + if n == 0 { break; } + } + let mut j = self.off; + self.off = i; + while j < i { + i -= 1; + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf).swap(j, i); } + j += 1; + } + } pub fn i128(&mut self, n: i128) { if n < 0 { self.byte(b'-'); diff --git a/src/platform/loader/amd64_elf.rs b/src/platform/loader/amd64_elf.rs index ebd4efe4..f8ff2bce 100644 --- a/src/platform/loader/amd64_elf.rs +++ b/src/platform/loader/amd64_elf.rs @@ -49,6 +49,8 @@ There are currently three files licensed under GPLv2+: #![allow(clippy::cmp_null)] +use core::mem::MaybeUninit; + // Dynamic section entry types const DT_RELA: u64 = 7; const DT_RELASZ: u64 = 8; @@ -72,35 +74,37 @@ struct Elf64Rela { } -unsafe fn find_tag(mut ptr: *const Elf64Dyn, tag: u64) -> *const Elf64Dyn { - while (*ptr).d_tag != 0 { - if (*ptr).d_tag == tag { - return ptr; - } - ptr = ptr.add(1); - } - core::ptr::null() -} - pub unsafe extern "sysv64" fn relocate( addr_image_base: u64, addr_dynamic_section: u64 ) { - let ptr_dyn: *const Elf64Dyn = addr_dynamic_section as *const Elf64Dyn; - let ptr_rela = find_tag(ptr_dyn, DT_RELA); - let ptr_relasz = find_tag(ptr_dyn, DT_RELASZ); - let ptr_relaent = find_tag(ptr_dyn, DT_RELAENT); - - /* do not use .is_null() since the method itself requires relocations, at least in debug mode */ - if ptr_rela == core::ptr::null() || - ptr_relasz == core::ptr::null() || - ptr_relaent == core::ptr::null() { + let mut ptr_dyn: *const Elf64Dyn = addr_dynamic_section as *const Elf64Dyn; + let mut ptr_rela = 0; + let mut relasz = MaybeUninit::::uninit(); + let mut relaent = MaybeUninit::::uninit(); + loop { + match (*ptr_dyn).d_tag { + 0 => { break; } + DT_RELA => { ptr_rela = addr_image_base + (*ptr_dyn).d_val_or_ptr; }, + DT_RELASZ => { relasz.write((*ptr_dyn).d_val_or_ptr); }, + DT_RELAENT => { relaent.write((*ptr_dyn).d_val_or_ptr); }, + _ => () + } + ptr_dyn = ptr_dyn.add(1); + } + + /* 1) Do not use .is_null() since the method itself requires relocations, at least in debug mode. + * 2) When DT_RELA is present, the other entries DT_RELASZ and DT_RELAENT must exist. + * Source: https://docs.oracle.com/cd/E19683-01/817-3677/chapter6-42444/index.html + * ("This element requires the DT_RELASZ and DT_RELAENT elements also be present.") + */ + if ptr_rela == 0 { return; } + relasz.write(relasz.assume_init() + ptr_rela); - let mut j = 0; - while j < (*ptr_relasz).d_val_or_ptr { - let pst_rela = (addr_image_base + (*ptr_rela).d_val_or_ptr + j) as *mut Elf64Rela; + while ptr_rela < relasz.assume_init() { + let pst_rela = ptr_rela as *mut Elf64Rela; let ul_offset = (*pst_rela).r_offset; let ul_info = (*pst_rela).r_info; let l_addend = (*pst_rela).r_addend; @@ -114,6 +118,6 @@ pub unsafe extern "sysv64" fn relocate( /* not implemented */ panic!(); } - j += (*ptr_relaent).d_val_or_ptr; + ptr_rela += relaent.assume_init(); } } \ No newline at end of file diff --git a/src/platform/mod.rs b/src/platform/mod.rs index d72b2d3e..46a3b091 100644 --- a/src/platform/mod.rs +++ b/src/platform/mod.rs @@ -18,6 +18,7 @@ pub fn init(platform_data_by_loader: usize) { unsafe { match pd.env_id { #[cfg(not(target_arch = "wasm32"))] + #[cfg(not(feature = "short"))] services::ENV_ID_WINDOWS => { /* use OS APIs directly */ os::windows::init(); @@ -34,16 +35,24 @@ pub fn init(platform_data_by_loader: usize) { }, _ => { /* use loader services for allocation */ + #[cfg(not(feature = "short"))] os::unknown::init(); + #[cfg(feature = "short")] + unreachable!(); } } } } #[cfg(not(test))] pub fn try_exit() { - let pd = services::platform_data(); - if pd.env_id == services::ENV_ID_LINUX { - #[cfg(not(target_arch = "wasm32"))] + #[cfg(not(all(target_arch = "x86_64", feature = "short")))] { + let pd = services::platform_data(); + if pd.env_id == services::ENV_ID_LINUX { + #[cfg(not(target_arch = "wasm32"))] + unsafe { os::linux::syscall::exit_group(services::get_exit_status() as usize); } + } + } + #[cfg(all(target_arch = "x86_64", feature = "short"))] { unsafe { os::linux::syscall::exit_group(services::get_exit_status() as usize); } } } diff --git a/src/platform/os/linux.rs b/src/platform/os/linux.rs index 2eb2c528..b101f589 100644 --- a/src/platform/os/linux.rs +++ b/src/platform/os/linux.rs @@ -1,4 +1,4 @@ -use super::super::{allocator, services}; +use super::super::allocator; use super::super::malloc::{dlmalloc, dlmalloc_linux}; @@ -42,6 +42,60 @@ pub mod syscall { pub rlim_max: usize, } + #[cfg(target_arch = "x86_64")] + #[inline(always)] + pub unsafe fn syscall1( + call_id: usize, + arg0: usize, + ) -> usize { + let out; + asm!( + "syscall", + in("rax") call_id, + in("rdi") arg0, + lateout("rax") out, + out("rcx") _, + out("r11") _ + ); + out + } + #[cfg(target_arch = "x86")] + pub unsafe fn syscall1( + call_id: usize, + arg0: usize, + ) -> usize { + syscall(call_id, arg0, 0, 0, 0, 0, 0) + } + #[cfg(target_arch = "x86_64")] + #[inline(always)] + pub unsafe fn syscall3( + call_id: usize, + arg0: usize, + arg1: usize, + arg2: usize, + ) -> usize { + let out; + asm!( + "syscall", + in("rax") call_id, + in("rdi") arg0, + in("rsi") arg1, + in("rdx") arg2, + lateout("rax") out, + out("rcx") _, + out("r11") _ + ); + out + } + #[cfg(target_arch = "x86")] + unsafe extern "cdecl" fn syscall3( + call_id: usize, + arg0: usize, + arg1: usize, + arg2: usize, + ) -> usize { + syscall(call_id, arg0, arg1, arg2, 0, 0, 0) + } #[cfg(target_arch = "x86_64")] #[inline(always)] pub unsafe fn syscall( @@ -149,7 +203,7 @@ pub mod syscall { buf: *mut u8, count: usize ) -> usize { - syscall(id_list::READ, fd, buf as usize, count, 0, 0, 0) + syscall3(id_list::READ, fd, buf as usize, count) } #[inline(always)] pub unsafe fn write( @@ -157,13 +211,13 @@ pub mod syscall { buf: *const u8, count: usize ) -> usize { - syscall(id_list::WRITE, fd, buf as usize, count, 0, 0, 0) + syscall3(id_list::WRITE, fd, buf as usize, count) } #[inline(always)] pub unsafe fn exit_group( status: usize ) -> ! { - syscall(id_list::EXIT_GROUP, status, 0, 0, 0, 0, 0); + syscall1(id_list::EXIT_GROUP, status); unreachable!() } #[inline(always)] @@ -209,6 +263,7 @@ unsafe fn dlmalloc_realloc(ptr: *mut u8, old_size: usize, old_align: usize, new_ } } +#[cfg(not(all(feature = "short", target_os = "linux")))] #[cfg(target_arch = "x86_64")] mod services_override { #[inline(always)] @@ -220,6 +275,7 @@ mod services_override { super::syscall::write(fd, buf, count) } } +#[cfg(not(all(feature = "short", target_os = "linux")))] #[cfg(target_arch = "x86")] mod services_override { #[inline(always)] @@ -241,13 +297,16 @@ pub unsafe fn init() { * by the runtime startup code (e.g., glibc). * Thus, instead of parsing the ELF section, we just invoke * the kernel APIs directly. */ - let pd = services::platform_data(); - if pd.env_flags & services::ENV_FLAGS_NATIVE != 0 { - let mut rlim: syscall::RLimit = Default::default(); - let ret = syscall::getrlimit(syscall::RLIMIT_STACK, &mut rlim); - if ret == 0 && rlim.rlim_cur < 256 * 1024 * 1024 { - rlim.rlim_cur = 256 * 1024 * 1024; - syscall::setrlimit(syscall::RLIMIT_STACK, &rlim); + #[cfg(not(feature = "short"))] { + use super::super::services; + let pd = services::platform_data(); + if pd.env_flags & services::ENV_FLAGS_NATIVE != 0 { + let mut rlim: syscall::RLimit = Default::default(); + let ret = syscall::getrlimit(syscall::RLIMIT_STACK, &mut rlim); + if ret == 0 && rlim.rlim_cur < 256 * 1024 * 1024 { + rlim.rlim_cur = 256 * 1024 * 1024; + syscall::setrlimit(syscall::RLIMIT_STACK, &rlim); + } } } @@ -257,6 +316,11 @@ pub unsafe fn init() { dlmalloc_dealloc, dlmalloc_realloc, ); - services::install_single_service(5, services_override::svc_read_stdio as usize); - services::install_single_service(6, services_override::svc_write_stdio as usize); + + /* "short" on "Linux" will use syscalls directly to reduce code size */ + #[cfg(not(all(feature = "short", target_os = "linux")))] { + use super::super::services; + services::install_single_service(5, services_override::svc_read_stdio as usize); + services::install_single_service(6, services_override::svc_write_stdio as usize); + } } \ No newline at end of file diff --git a/src/platform/services.rs b/src/platform/services.rs index 6325b290..ba271538 100644 --- a/src/platform/services.rs +++ b/src/platform/services.rs @@ -27,7 +27,6 @@ pub const ENV_ID_LINUX: u64 = 2; pub const ENV_ID_WASM: u64 = 3; pub const ENV_FLAGS_LINUX_STYLE_CHKSTK: u64 = 0x0001; // disables __chkstk in binaries compiled with Windows target pub const ENV_FLAGS_NATIVE: u64 = 0x0002; // indicates the binary is running without the loader -pub const ENV_FLAGS_BREAKPOINT: u64 = 0x0004; // breakpoint at entrypoint or startup routine #[repr(C, packed)] #[allow(non_snake_case)] @@ -40,66 +39,64 @@ pub struct PlatformData { pub fn_table: [usize; 7], } -#[inline(always)] pub fn install(platform_data_by_loader: usize) { unsafe { PLATFORM_DATA = platform_data_by_loader; } } -#[inline(always)] unsafe fn addr(fn_id: usize) -> usize { core::ptr::read((PLATFORM_DATA + 32 + fn_id * core::mem::size_of::()) as *mut usize) } -#[inline(always)] pub unsafe fn install_single_service(fn_id: usize, fn_ptr: usize) { core::ptr::write((PLATFORM_DATA + 32 + fn_id * core::mem::size_of::()) as *mut usize, fn_ptr) } -//#[inline(always)] pub unsafe fn alloc(size: usize, align: usize) -> *mut u8 { let fn_ptr: native_func::A = core::mem::transmute(addr(1)); fn_ptr(size, align) } -//#[inline(always)] pub unsafe fn alloc_zeroed(size: usize, align: usize) -> *mut u8 { let fn_ptr: native_func::A = core::mem::transmute(addr(2)); fn_ptr(size, align) } -//#[inline(always)] pub unsafe fn dealloc(ptr: *mut u8, size: usize, align: usize) { let fn_ptr: native_func::B = core::mem::transmute(addr(3)); fn_ptr(ptr, size, align) } -//#[inline(always)] pub unsafe fn realloc(ptr: *mut u8, old_size: usize, old_align: usize, new_size: usize) -> *mut u8 { let fn_ptr: native_func::C = core::mem::transmute(addr(4)); fn_ptr(ptr, old_size, old_align, new_size) } -#[inline(always)] pub fn read_stdio(fd: usize, buf: &mut [u8]) -> usize { + #[cfg(not(all(feature = "short", target_os = "linux")))] unsafe { let fn_ptr: native_func::E = core::mem::transmute(addr(5)); fn_ptr(fd, buf.as_mut_ptr(), buf.len()) } + #[cfg(all(feature = "short", target_os = "linux"))] + unsafe { + super::os::linux::syscall::read(fd, buf.as_mut_ptr(), buf.len()) + } } -#[inline(always)] pub fn write_stdio(fd: usize, buf: &[u8]) -> usize { + #[cfg(not(all(feature = "short", target_os = "linux")))] unsafe { let fn_ptr: native_func::F = core::mem::transmute(addr(6)); fn_ptr(fd, buf.as_ptr(), buf.len()) } + #[cfg(all(feature = "short", target_os = "linux"))] + unsafe { + super::os::linux::syscall::write(fd, buf.as_ptr(), buf.len()) + } } -#[inline(always)] pub fn platform_data() -> PlatformData { unsafe { let pd: *const PlatformData = PLATFORM_DATA as *const PlatformData; core::ptr::read_unaligned(pd) } } -#[inline(always)] pub fn get_exit_status() -> i32 { unsafe { EXIT_CODE } } -#[inline(always)] pub fn set_exit_status(code: i32) { unsafe { EXIT_CODE = code; } } \ No newline at end of file diff --git a/src/solution.rs b/src/solution.rs index c8e6f2df..39565a53 100644 --- a/src/solution.rs +++ b/src/solution.rs @@ -1,11 +1,8 @@ use basm::platform::io::{Reader, Writer, Print}; - -#[cfg_attr(not(debug_assertions), inline(always))] -#[cfg_attr(debug_assertions, inline(never))] pub fn main() { let mut reader: Reader = Default::default(); let mut writer: Writer = Default::default(); let a = reader.i64(); let b = reader.i64(); writer.println(a + b); -} +} \ No newline at end of file diff --git a/tests/boj_2587.in b/tests/boj_2587.in new file mode 100644 index 00000000..3fcb0955 --- /dev/null +++ b/tests/boj_2587.in @@ -0,0 +1,5 @@ +10 +40 +30 +60 +30 \ No newline at end of file diff --git a/tests/boj_2587.out b/tests/boj_2587.out new file mode 100644 index 00000000..f9e5d27b --- /dev/null +++ b/tests/boj_2587.out @@ -0,0 +1,2 @@ +34 +30 \ No newline at end of file diff --git a/tests/boj_2587.rs b/tests/boj_2587.rs new file mode 100644 index 00000000..02320531 --- /dev/null +++ b/tests/boj_2587.rs @@ -0,0 +1,20 @@ +use basm::platform::io::{Reader, Writer, Print}; +pub fn main() { + let mut reader = Reader::<128>::new(); + let mut writer = Writer::<128>::new(); + let mut x = [0; 5]; + let mut sum = 0; + for i in 0..5 { + x[i] = reader.u32(); + sum += x[i]; + } + for i in 0..5 { + for j in i+1..5 { + if x[i] > x[j] { + x.swap(i, j); + } + } + } + writer.println(sum / 5); + writer.println(x[2]); +} \ No newline at end of file