diff --git a/.vscode/launch.json b/.vscode/launch.json index c80edf54d4..603c91a450 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,6 +4,7 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { "type": "lldb", "request": "launch", @@ -38,7 +39,7 @@ "kind": "bin" } }, - "args": ["examples/MainProg.st"], + "args": ["--ir", "examples/encoding.st"], "cwd": "${workspaceFolder}" }, { diff --git a/Cargo.lock b/Cargo.lock index 28bd50d0aa..b688f5239a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "0.7.15" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" dependencies = [ "memchr", ] @@ -39,9 +39,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "beef" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409" +checksum = "bed554bd50246729a1ec158d08aa3235d1b69d94ad120ebe187e28894787e736" [[package]] name = "bitflags" @@ -51,9 +51,9 @@ checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "cc" -version = "1.0.67" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd" +checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" [[package]] name = "cfg-if" @@ -121,6 +121,24 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "encoding_rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "encoding_rs_io" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" +dependencies = [ + "encoding_rs", +] + [[package]] name = "fnv" version = "1.0.7" @@ -135,33 +153,33 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "hashbrown" -version = "0.9.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" [[package]] name = "heck" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ "unicode-segmentation", ] [[package]] name = "hermit-abi" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "indexmap" -version = "1.6.2" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" +checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", "hashbrown", @@ -170,7 +188,7 @@ dependencies = [ [[package]] name = "inkwell" version = "0.1.0" -source = "git+https://github.com/TheDan64/inkwell?branch=master#f768691fccb04fe262a6ccf22c215657dc08de98" +source = "git+https://github.com/TheDan64/inkwell?branch=master#41857f909dad274b5c9fd004bc5b1f9f3e37e3b8" dependencies = [ "either", "inkwell_internals", @@ -184,7 +202,7 @@ dependencies = [ [[package]] name = "inkwell_internals" version = "0.3.0" -source = "git+https://github.com/TheDan64/inkwell?branch=master#f768691fccb04fe262a6ccf22c215657dc08de98" +source = "git+https://github.com/TheDan64/inkwell?branch=master#41857f909dad274b5c9fd004bc5b1f9f3e37e3b8" dependencies = [ "proc-macro2", "quote", @@ -193,9 +211,9 @@ dependencies = [ [[package]] name = "instant" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +checksum = "bee0328b1209d157ef001c94dd85b4f8f64139adb0eac2659f4b08382b2f474d" dependencies = [ "cfg-if", ] @@ -208,9 +226,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.92" +version = "0.2.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714" +checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" [[package]] name = "llvm-sys" @@ -227,9 +245,9 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +checksum = "0382880606dff6d15c9476c416d18690b72742aa7b605bb6dd6ec9030fbf07eb" dependencies = [ "scopeguard", ] @@ -260,9 +278,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.3.4" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" [[package]] name = "num-integer" @@ -285,9 +303,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.7.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" [[package]] name = "output_vt100" @@ -370,9 +388,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.24" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" dependencies = [ "unicode-xid", ] @@ -388,18 +406,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.5" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9" +checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.4.5" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "aho-corasick", "memchr", @@ -408,9 +426,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.23" +version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] name = "rusty" @@ -418,6 +436,8 @@ version = "0.2.0" dependencies = [ "chrono", "codespan-reporting", + "encoding_rs", + "encoding_rs_io", "glob", "indexmap", "inkwell", @@ -465,9 +485,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "structopt" -version = "0.3.21" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5277acd7ee46e63e5168a80734c9f6ee81b1367a7d8772a2d765df2a3705d28c" +checksum = "69b041cdcb67226aca307e6e7be44c8806423d83e018bd662360a93dabce4d71" dependencies = [ "clap", "lazy_static", @@ -476,9 +496,9 @@ dependencies = [ [[package]] name = "structopt-derive" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ba9cdfda491b814720b6b06e0cac513d922fc407582032e8706e9f137976f90" +checksum = "7813934aecf5f51a54775e00068c237de98489463968231a51746bbbc03f9c10" dependencies = [ "heck", "proc-macro-error", @@ -489,9 +509,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.67" +version = "1.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6498a9efc342871f91cc2d0d694c674368b4ceb40f62b65a7a08c3792935e702" +checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" dependencies = [ "proc-macro2", "quote", @@ -518,18 +538,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.24" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e" +checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.24" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" +checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" dependencies = [ "proc-macro2", "quote", @@ -555,9 +575,9 @@ checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" [[package]] name = "unicode-segmentation" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" [[package]] name = "unicode-width" @@ -567,9 +587,9 @@ checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" [[package]] name = "unicode-xid" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" [[package]] name = "utf8-ranges" diff --git a/Cargo.toml b/Cargo.toml index 9ab1d76255..7866b15f28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,8 @@ structopt = "0.3" indexmap = "1.6" chrono = "0.4" glob = "0.3.0" +encoding_rs = "0.8" +encoding_rs_io = "0.1" codespan-reporting = "0.11.1" diff --git a/examples/program_with_variables.st b/examples/program_with_variables.st index 8b5aacf931..55f8df2a49 100644 --- a/examples/program_with_variables.st +++ b/examples/program_with_variables.st @@ -1,6 +1,8 @@ PROGRAM hello VAR - x : INT; + x : INT y : BOOL; END_VAR + IF x = 2 THEN + x := 3 END_PROGRAM diff --git a/src/cli.rs b/src/cli.rs index 2d062d82ef..a91a0c5a89 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,4 +1,5 @@ // Copyright (c) 2021 Ghaith Hachem and Mathias Rieder +use encoding_rs::Encoding; use std::path::Path; use structopt::{clap::ArgGroup, StructOpt}; @@ -68,6 +69,14 @@ pub struct CompileParameters { )] pub target: Option, + #[structopt( + long, + name = "encoding", + help = "The file encoding used to read the input-files, as defined by the Encoding Standard", + parse(try_from_str = parse_encoding), + )] + pub encoding: Option<&'static Encoding>, + #[structopt( name = "input-files", help = "Read input from , may be a glob expression like 'src/**/*' or a sequence of files", @@ -78,6 +87,10 @@ pub struct CompileParameters { pub input: Vec, } +fn parse_encoding(encoding: &str) -> Result<&'static Encoding, String> { + Encoding::for_label(encoding.as_bytes()).ok_or(format!("Unknown encoding {}", encoding)) +} + impl CompileParameters { pub fn parse(args: Vec) -> Result { CompileParameters::from_iter_safe(args) @@ -130,6 +143,7 @@ impl CompileParameters { #[cfg(test)] mod cli_tests { use super::{CompileParameters, FormatOption, ParameterError}; + use pretty_assertions::assert_eq; use structopt::clap::ErrorKind; fn expect_argument_error(args: Vec, expected_error_kind: ErrorKind) { @@ -255,6 +269,30 @@ mod cli_tests { assert_eq!(parameters.output_format_or_default(), super::DEFAULT_FORMAT); } + #[test] + fn encoding_resolution() { + let parameters = + CompileParameters::parse(vec_of_strings!("input.st", "--ir", "--encoding", "cp1252")) + .unwrap(); + assert_eq!(parameters.encoding, Some(encoding_rs::WINDOWS_1252)); + let parameters = CompileParameters::parse(vec_of_strings!( + "input.st", + "--ir", + "--encoding", + "windows-1252" + )) + .unwrap(); + assert_eq!(parameters.encoding, Some(encoding_rs::WINDOWS_1252)); + } + + #[test] + fn invalid_encoding_resolution() { + expect_argument_error( + vec_of_strings!("input.st", "--ir", "--encoding", "invalid"), + ErrorKind::ValueValidation, + ); + } + #[test] fn valid_output_formats() { let parameters = CompileParameters::parse(vec_of_strings!("input.st", "--ir")).unwrap(); diff --git a/src/compile_error.rs b/src/compile_error.rs index 2137729d1d..b2082b56b1 100644 --- a/src/compile_error.rs +++ b/src/compile_error.rs @@ -35,7 +35,10 @@ pub enum CompileError { }, #[error("Cannot read File {path:}: {reason:}")] - IoError { path: String, reason: String }, + IoReadError { path: String, reason: String }, + + #[error("Cannot write File {path:}: {reason:}")] + IoWriteError { path: String, reason: String }, } impl From for CompileError { @@ -79,9 +82,12 @@ impl CompileError { CompileError::CodeGenError { message, location } } - /// creates a CompileError:IoError with the given parameters - pub fn io_error(path: String, reason: String) -> CompileError { - CompileError::IoError { path, reason } + pub fn io_read_error(path: String, reason: String) -> CompileError { + CompileError::IoReadError { path, reason } + } + + pub fn io_write_error(path: String, reason: String) -> CompileError { + CompileError::IoWriteError { path, reason } } pub fn no_type_associated(type_name: &str, location: SourceRange) -> CompileError { diff --git a/src/lib.rs b/src/lib.rs index 630fd109c5..4600350fd6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ //! [`ST`]: https://en.wikipedia.org/wiki/Structured_text //! [`IEC61131-3`]: https://en.wikipedia.org/wiki/IEC_61131-3 //! [`IR`]: https://llvm.org/docs/LangRef.html +use std::fs; use std::path::Path; use ast::{PouType, SourceRange}; @@ -25,12 +26,15 @@ use codespan_reporting::files::SimpleFiles; use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; use codespan_reporting::term::{self, Chars, Styles}; use compile_error::CompileError; +use encoding_rs::Encoding; +use encoding_rs_io::DecodeReaderBytesBuilder; use index::Index; use inkwell::context::Context; use inkwell::targets::{ CodeModel, FileType, InitializationConfig, RelocMode, Target, TargetMachine, TargetTriple, }; use parser::ParsedAst; +use std::{fs::File, io::Read}; use crate::ast::CompilationUnit; mod ast; @@ -99,17 +103,35 @@ impl Diagnostic { } } -pub type Sources<'a> = [&'a dyn SourceContainer]; - /// SourceContainers offer source-code to be compiled via the load_source function. /// Furthermore it offers a location-String used when reporting diagnostics. pub trait SourceContainer { /// loads and returns the SourceEntry that contains the SourceCode and the path it was loaded from - fn load_source(&self) -> Result; + fn load_source(self, encoding: Option<&'static Encoding>) -> Result; /// returns the location of this source-container. Used when reporting diagnostics. fn get_location(&self) -> &str; } +pub struct FilePath { + pub path: String, +} + +impl SourceContainer for FilePath { + fn load_source(self, encoding: Option<&'static Encoding>) -> Result { + let mut file = File::open(&self.path).map_err(|err| err.to_string())?; + let source = create_source_code(&mut file, encoding)?; + + Ok(SourceCode { + source, + path: self.path, + }) + } + + fn get_location(&self) -> &str { + &self.path + } +} + /// The SourceCode unit is the smallest unit of compilation that can be passed to the compiler #[derive(Clone)] pub struct SourceCode { @@ -119,29 +141,37 @@ pub struct SourceCode { pub path: String, } -impl SourceCode { - /// casts the SourceCode into a SourceContainer - pub fn as_source_container(&self) -> &dyn SourceContainer { - self - } -} - /// tests can provide a SourceCode directly impl SourceContainer for SourceCode { - fn load_source(&self) -> Result { - Ok(self.clone()) + fn load_source(self, _: Option<&'static Encoding>) -> Result { + Ok(self) } fn get_location(&self) -> &str { - self.path.as_str() + &self.path } } +fn create_source_code( + reader: &mut T, + encoding: Option<&'static Encoding>, +) -> Result { + let mut buffer = String::new(); + let mut decoder = DecodeReaderBytesBuilder::new() + .encoding(encoding) + .build(reader); + decoder + .read_to_string(&mut buffer) + .map_err(|err| format!("{:}", err))?; + Ok(buffer) +} + /// /// Compiles the given source into an object file and saves it in output /// -fn compile_to_obj( - sources: &Sources, +fn compile_to_obj( + sources: Vec, + encoding: Option<&'static Encoding>, output: &str, reloc: RelocMode, triple: Option, @@ -168,7 +198,7 @@ fn compile_to_obj( .unwrap(); let c = Context::create(); - let code_generator = compile_module(&c, sources)?; + let code_generator = compile_module(&c, sources, encoding)?; machine .write_to_file(&code_generator.module, FileType::Object, Path::new(output)) .unwrap(); @@ -184,12 +214,13 @@ fn compile_to_obj( /// * `output` - the location on disk to save the output /// * `target` - an optional llvm target triple /// If not provided, the machine's triple will be used. -pub fn compile_to_static_obj( - sources: &Sources, +pub fn compile_to_static_obj( + sources: Vec, + encoding: Option<&'static Encoding>, output: &str, target: Option, ) -> Result<(), CompileError> { - compile_to_obj(sources, output, RelocMode::Default, target) + compile_to_obj(sources, encoding, output, RelocMode::Default, target) } /// Compiles a given source string to a shared position independent object and saves the output. @@ -200,12 +231,13 @@ pub fn compile_to_static_obj( /// * `output` - the location on disk to save the output /// * `target` - an optional llvm target triple /// If not provided, the machine's triple will be used. -pub fn compile_to_shared_pic_object( - sources: &Sources, +pub fn compile_to_shared_pic_object( + sources: Vec, + encoding: Option<&'static Encoding>, output: &str, target: Option, ) -> Result<(), CompileError> { - compile_to_obj(sources, output, RelocMode::PIC, target) + compile_to_obj(sources, encoding, output, RelocMode::PIC, target) } /// Compiles a given source string to a dynamic non PIC object and saves the output. @@ -216,12 +248,13 @@ pub fn compile_to_shared_pic_object( /// * `output` - the location on disk to save the output /// * `target` - an optional llvm target triple /// If not provided, the machine's triple will be used. -pub fn compile_to_shared_object( - sources: &Sources, +pub fn compile_to_shared_object( + sources: Vec, + encoding: Option<&'static Encoding>, output: &str, target: Option, ) -> Result<(), CompileError> { - compile_to_obj(sources, output, RelocMode::DynamicNoPic, target) + compile_to_obj(sources, encoding, output, RelocMode::DynamicNoPic, target) } /// @@ -231,10 +264,14 @@ pub fn compile_to_shared_object( /// /// * `sources` - the source to be compiled /// * `output` - the location on disk to save the output -pub fn compile_to_bitcode(sources: &Sources, output: &str) -> Result<(), CompileError> { +pub fn compile_to_bitcode( + sources: Vec, + encoding: Option<&'static Encoding>, + output: &str, +) -> Result<(), CompileError> { let path = Path::new(output); let c = Context::create(); - let code_generator = compile_module(&c, sources)?; + let code_generator = compile_module(&c, sources, encoding)?; code_generator.module.write_bitcode_to_path(path); Ok(()) } @@ -245,10 +282,16 @@ pub fn compile_to_bitcode(sources: &Sources, output: &str) -> Result<(), Compile /// # Arguments /// /// * `sources` - the source to be compiled -pub fn compile_to_ir(sources: &Sources) -> Result { +pub fn compile_to_ir( + sources: Vec, + encoding: Option<&'static Encoding>, + output: &str, +) -> Result<(), CompileError> { let c = Context::create(); - let code_gen = compile_module(&c, sources)?; - Ok(code_gen.module.print_to_string().to_string()) + let code_gen = compile_module(&c, sources, encoding)?; + let ir = code_gen.module.print_to_string().to_string(); + fs::write(output, ir) + .map_err(|err| CompileError::io_write_error(output.into(), err.to_string())) } /// @@ -258,18 +301,20 @@ pub fn compile_to_ir(sources: &Sources) -> Result { /// /// * `context` - the LLVM Context to be used for the compilation /// * `sources` - the source to be compiled -pub fn compile_module<'c>( +pub fn compile_module<'c, T: SourceContainer>( context: &'c Context, - sources: &Sources, + sources: Vec, + encoding: Option<&'static Encoding>, ) -> Result, CompileError> { let mut full_index = Index::new(); let mut unit = CompilationUnit::default(); // let mut diagnostics : Vec = vec![]; let mut files: SimpleFiles = SimpleFiles::new(); for container in sources { + let location: String = container.get_location().into(); let e = container - .load_source() - .map_err(|err| CompileError::io_error(err, container.get_location().to_string()))?; + .load_source(encoding) + .map_err(|err| CompileError::io_read_error(err, location.clone()))?; let (mut parse_result, diagnostics) = parse(e.source.as_str())?; ast::pre_process(&mut parse_result); @@ -277,7 +322,7 @@ pub fn compile_module<'c>( unit.import(parse_result); //log errors - let file_id = files.add(e.path.clone(), e.source.clone()); + let file_id = files.add(location, e.source.clone()); for error in diagnostics { let diag = diagnostic::Diagnostic::error() .with_message(error.get_message()) @@ -317,3 +362,46 @@ fn parse(source: &str) -> Result { //TODO : Parser should also return compile errors with sane locations parser::parse(lexer).map_err(|err| err.into()) } + +#[cfg(test)] +mod tests { + use crate::create_source_code; + + #[test] + fn windows_encoded_file_content_read() { + let expected = r"PROGRAM ä +(* Cöment *) +END_PROGRAM +"; + let mut source = &b"\x50\x52\x4f\x47\x52\x41\x4d\x20\xe4\x0a\x28\x2a\x20\x43\xf6\x6d\x65\x6e\x74\x20\x2a\x29\x0a\x45\x4e\x44\x5f\x50\x52\x4f\x47\x52\x41\x4d\x0a"[..]; + // let read = std::io::Read() + let source = create_source_code(&mut source, Some(encoding_rs::WINDOWS_1252)).unwrap(); + + assert_eq!(expected, &source); + } + + #[test] + fn utf_16_encoded_file_content_read() { + let expected = r"PROGRAM ä +(* Cömment *) +END_PROGRAM +"; + + let mut source = &b"\xff\xfe\x50\x00\x52\x00\x4f\x00\x47\x00\x52\x00\x41\x00\x4d\x00\x20\x00\xe4\x00\x0a\x00\x28\x00\x2a\x00\x20\x00\x43\x00\xf6\x00\x6d\x00\x6d\x00\x65\x00\x6e\x00\x74\x00\x20\x00\x2a\x00\x29\x00\x0a\x00\x45\x00\x4e\x00\x44\x00\x5f\x00\x50\x00\x52\x00\x4f\x00\x47\x00\x52\x00\x41\x00\x4d\x00\x0a\x00" [..]; + + let source = create_source_code(&mut source, None).unwrap(); + assert_eq!(expected, &source); + } + + #[test] + fn utf_8_encoded_file_content_read() { + let expected = r"PROGRAM ä +(* Cöment *) +END_PROGRAM +"; + + let mut source = &b"\x50\x52\x4f\x47\x52\x41\x4d\x20\xc3\xa4\x0a\x28\x2a\x20\x43\xc3\xb6\x6d\x65\x6e\x74\x20\x2a\x29\x0a\x45\x4e\x44\x5f\x50\x52\x4f\x47\x52\x41\x4d\x0a" [..]; + let source = create_source_code(&mut source, None).unwrap(); + assert_eq!(expected, &source); + } +} diff --git a/src/main.rs b/src/main.rs index 1d07fbee07..dba694c8db 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,11 +20,8 @@ use glob::glob; use rusty::{ cli::{CompileParameters, FormatOption, ParameterError}, - compile_error::CompileError, - compile_to_bitcode, compile_to_ir, compile_to_shared_object, compile_to_static_obj, SourceCode, - SourceContainer, + compile_to_bitcode, compile_to_ir, compile_to_shared_object, compile_to_static_obj, FilePath, }; -use std::fs; fn main() { let args: Vec = std::env::args().collect(); @@ -37,24 +34,6 @@ fn main() { } } -struct FilePath { - path: String, -} - -impl SourceContainer for FilePath { - fn load_source(&self) -> Result { - //why do I need to clone here :-( ??? - let path = self.get_location().to_string(); - fs::read_to_string(self.path.to_string()) - .map(move |source| SourceCode { source, path }) - .map_err(|err| format!("{:}", err)) - } - - fn get_location(&self) -> &str { - self.path.as_str() - } -} - fn create_file_paths(inputs: &[String]) -> Result, String> { let mut sources = Vec::new(); for input in inputs { @@ -72,32 +51,35 @@ fn create_file_paths(inputs: &[String]) -> Result, String> { } fn main_compile(parameters: CompileParameters) { - let file_paths = create_file_paths(¶meters.input).unwrap(); - let sources: Vec<_> = file_paths - .iter() - .map(|it| it as &dyn SourceContainer) - .collect::>(); + let sources = create_file_paths(¶meters.input).unwrap(); - let sources = sources.as_slice(); let output_filename = parameters.output_name().unwrap(); + let encoding = parameters.encoding; match parameters.output_format_or_default() { FormatOption::Static => { - compile_to_static_obj(sources, output_filename.as_str(), parameters.target).unwrap(); + compile_to_static_obj( + sources, + encoding, + output_filename.as_str(), + parameters.target, + ) + .unwrap(); } FormatOption::Shared | FormatOption::PIC => { - compile_to_shared_object(sources, output_filename.as_str(), parameters.target).unwrap(); + compile_to_shared_object( + sources, + encoding, + output_filename.as_str(), + parameters.target, + ) + .unwrap(); } FormatOption::Bitcode => { - compile_to_bitcode(sources, output_filename.as_str()).unwrap(); + compile_to_bitcode(sources, encoding, output_filename.as_str()).unwrap(); } FormatOption::IR => { - generate_ir(sources, output_filename.as_str()).unwrap(); + compile_to_ir(sources, encoding, &output_filename).unwrap(); } } } -fn generate_ir(sources: &[&dyn SourceContainer], output: &str) -> Result<(), CompileError> { - let ir = compile_to_ir(sources)?; - fs::write(output, ir).unwrap(); - Ok(()) -} diff --git a/tests/correctness/external_functions.rs b/tests/correctness/external_functions.rs index d9c7edee8d..29dccdf0d8 100644 --- a/tests/correctness/external_functions.rs +++ b/tests/correctness/external_functions.rs @@ -30,11 +30,11 @@ fn test_external_function_called() { Target::initialize_native(&InitializationConfig::default()).unwrap(); let context: Context = Context::create(); - let source = &SourceCode { + let source = SourceCode { path: "external_test.st".to_string(), source: prog.to_string(), }; - let code_gen = compile_module(&context, &[source.as_source_container()]).unwrap(); + let code_gen = compile_module(&context, vec![source], None).unwrap(); let exec_engine = code_gen .module .create_jit_execution_engine(inkwell::OptimizationLevel::None) diff --git a/tests/integration/data/encoding_utf_16.st b/tests/integration/data/encoding_utf_16.st new file mode 100644 index 0000000000..307e089c51 Binary files /dev/null and b/tests/integration/data/encoding_utf_16.st differ diff --git a/tests/integration/data/encoding_win.st b/tests/integration/data/encoding_win.st new file mode 100644 index 0000000000..023ac67ed8 --- /dev/null +++ b/tests/integration/data/encoding_win.st @@ -0,0 +1,3 @@ +PROGRAM a +(* Cömment *) +END_PROGRAM diff --git a/tests/integration/data/test_file.st b/tests/integration/data/test_file.st new file mode 100644 index 0000000000..5716ccc62e --- /dev/null +++ b/tests/integration/data/test_file.st @@ -0,0 +1,3 @@ +PROGRAM a +(* Cömment *) +END_PROGRAM diff --git a/tests/integration/external_files.rs b/tests/integration/external_files.rs new file mode 100644 index 0000000000..316dbc28a3 --- /dev/null +++ b/tests/integration/external_files.rs @@ -0,0 +1,47 @@ +// Copyright (c) 2020 Ghaith Hachem and Mathias Rieder + +use std::{env, fs, path::PathBuf}; + +use encoding_rs::Encoding; +use rusty::{FilePath, compile_to_bitcode, compile_to_ir, compile_to_shared_object, compile_to_shared_pic_object, compile_to_static_obj}; + + +fn compile_all(name : &str, encoding : Option<&'static Encoding>) { + let path = get_file(name); + let out = format!("{}.out", &path); + compile_to_ir(vec![FilePath{path : path.clone()}], encoding, &out).unwrap(); + fs::remove_file(&out).unwrap(); + compile_to_bitcode(vec![FilePath{path : path.clone()}], encoding, &out).unwrap(); + fs::remove_file(&out).unwrap(); + compile_to_shared_object(vec![FilePath{path : path.clone()}], encoding, &out, None).unwrap(); + fs::remove_file(&out).unwrap(); + compile_to_shared_pic_object(vec![FilePath{path : path.clone()}], encoding, &out, None).unwrap(); + fs::remove_file(&out).unwrap(); + compile_to_static_obj(vec![FilePath{path : path.clone()}], encoding, &out, None).unwrap(); + fs::remove_file(&out).unwrap(); + +} + +fn get_file(name : &str) -> String { + let mut data_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + data_path.push("tests"); + data_path.push("integration"); + data_path.push("data"); + data_path.push(name); + + assert!(data_path.exists()); + + data_path.display().to_string() + +} + +#[test] +fn compile_external_file() { + compile_all("test_file.st", None); +} + +#[test] +fn compile_external_file_with_encoding() { + compile_all("encoding_utf_16.st", None); + compile_all("encoding_win.st", Some(encoding_rs::WINDOWS_1252)); +} \ No newline at end of file diff --git a/tests/tests.rs b/tests/tests.rs index 35157473f7..5ff38ac195 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -18,6 +18,10 @@ mod correctness { mod sums; } +mod integration { + mod external_files; +} + #[macro_export] macro_rules! assert_almost_eq { ($left:expr, $right:expr, $prec:expr) => {{ @@ -48,7 +52,7 @@ pub fn compile(context: &Context, source: String) -> ExecutionEngine { path: "external_test.st".to_string(), source, }; - let code_gen = compile_module(context, &[source.as_source_container()]).unwrap(); + let code_gen = compile_module(context, vec![source], None).unwrap(); code_gen .module .create_jit_execution_engine(inkwell::OptimizationLevel::None)