Skip to content

Commit 978ad33

Browse files
committed
Add support for different encodings
Add an '--encoding' flag to the CLI options to allow setting a custom encoding. When no encoding is passed, attempt to read the encoding from BOM Tread as UTF-8 if all else fails. This uses the encoding_rs_io crate to do this
1 parent 862f5e7 commit 978ad33

10 files changed

+231
-101
lines changed

Cargo.lock

+68-48
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ structopt = "0.3"
1818
indexmap = "1.6"
1919
chrono = "0.4"
2020
glob = "0.3.0"
21+
encoding_rs = "0.8"
22+
encoding_rs_io = "0.1"
2123
codespan-reporting = "0.11.1"
2224

2325

examples/encoding.st

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
PROGRAM ä
2+
(* Cömment *)
3+
END_PROGRAM

examples/encoding_utf_16.st

74 Bytes
Binary file not shown.

examples/encoding_win.st

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
PROGRAM ä
2+
(* Cömment *)
3+
END_PROGRAM

src/cli.rs

+23
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright (c) 2021 Ghaith Hachem and Mathias Rieder
22
use std::path::Path;
3+
use encoding_rs::Encoding;
34
use structopt::{clap::ArgGroup, StructOpt};
45

56
#[derive(PartialEq, Debug)]
@@ -68,6 +69,14 @@ pub struct CompileParameters {
6869
)]
6970
pub target: Option<String>,
7071

72+
#[structopt(
73+
long,
74+
name = "encoding",
75+
help = "The file encoding used to read the input-files, as defined by the Encoding Standard",
76+
parse(try_from_str = parse_encoding),
77+
)]
78+
pub encoding: Option<&'static Encoding>,
79+
7180
#[structopt(
7281
name = "input-files",
7382
help = "Read input from <input-files>, may be a glob expression like 'src/**/*' or a sequence of files",
@@ -78,7 +87,12 @@ pub struct CompileParameters {
7887
pub input: Vec<String>,
7988
}
8089

90+
fn parse_encoding(encoding : &str) -> Result<&'static Encoding, String> {
91+
Encoding::for_label(encoding.as_bytes()).ok_or(format!("Unknown encoding {}", encoding))
92+
}
93+
8194
impl CompileParameters {
95+
8296
pub fn parse(args: Vec<String>) -> Result<CompileParameters, ParameterError> {
8397
CompileParameters::from_iter_safe(args)
8498
}
@@ -130,6 +144,7 @@ impl CompileParameters {
130144
#[cfg(test)]
131145
mod cli_tests {
132146
use super::{CompileParameters, FormatOption, ParameterError};
147+
use pretty_assertions::assert_eq;
133148
use structopt::clap::ErrorKind;
134149

135150
fn expect_argument_error(args: Vec<String>, expected_error_kind: ErrorKind) {
@@ -255,6 +270,14 @@ mod cli_tests {
255270
assert_eq!(parameters.output_format_or_default(), super::DEFAULT_FORMAT);
256271
}
257272

273+
#[test]
274+
fn encoding_resolution() {
275+
let parameters = CompileParameters::parse(vec_of_strings!("input.st", "--ir", "--encoding", "cp1252")).unwrap();
276+
assert_eq!(parameters.encoding, Some(encoding_rs::WINDOWS_1252));
277+
let parameters = CompileParameters::parse(vec_of_strings!("input.st", "--ir", "--encoding", "windows-1252")).unwrap();
278+
assert_eq!(parameters.encoding, Some(encoding_rs::WINDOWS_1252));
279+
}
280+
258281
#[test]
259282
fn valid_output_formats() {
260283
let parameters = CompileParameters::parse(vec_of_strings!("input.st", "--ir")).unwrap();

0 commit comments

Comments
 (0)