From ca1d28f557dde384cb480a4dad2842ff8ae8b5a4 Mon Sep 17 00:00:00 2001 From: jakeroggenbuck Date: Sat, 21 Aug 2021 16:53:38 -0700 Subject: [PATCH] Get comments to be ~ --- README.md | 19 ++++--------- examples/string.ja | 8 ++++++ examples/test.ja | 6 ++-- examples/test_two.ja | 4 +-- jai/__init__.py | 32 +++++++++++---------- jai/parser.py | 2 +- src/lib.rs | 66 +++++++++++++++++++++++++++++--------------- 7 files changed, 80 insertions(+), 57 deletions(-) create mode 100644 examples/string.ja diff --git a/README.md b/README.md index fd0f947..21a1032 100644 --- a/README.md +++ b/README.md @@ -50,21 +50,14 @@ type variable = value; ## Functions ```c -// First suggestion -"returns int"; -"takes int, str"; -myfunc() { } -``` - -```c -// Second suggestion -"returns int and takes int, str"; -myfunc() { } +~ returns int ~; +~ takes int, str ~; +fn myfunc() { } ``` ## Comment ```c -"this is a comment"; +~ this is a comment ~; ``` # Types @@ -76,8 +69,8 @@ myfunc() { } # Turning source into tokens ## Source code ```c -"returns string"; -"takes str, int"; +~ returns string ~; +~ takes str, int ~; jai(name, version) { return "Name: " + name + " Version:" + version; } diff --git a/examples/string.ja b/examples/string.ja new file mode 100644 index 0000000..4645618 --- /dev/null +++ b/examples/string.ja @@ -0,0 +1,8 @@ +~ This is a string ~ +str name = "Jake"; + +~ takes str ~ +~ returns str ~ +fn my_func(name) { + return "Hello, " + name; +} diff --git a/examples/test.ja b/examples/test.ja index 6b04127..3643b38 100644 --- a/examples/test.ja +++ b/examples/test.ja @@ -1,5 +1,5 @@ -"returns string"; -"takes str, int"; -jai(name, version) { +~ returns string ~ +~ takes str, int ~ +fn jai(name, version) { return "This lang is called " + name + " and we are on version: " + version; } diff --git a/examples/test_two.ja b/examples/test_two.ja index 691a9cc..e9766d0 100644 --- a/examples/test_two.ja +++ b/examples/test_two.ja @@ -1,5 +1,5 @@ -"returns string"; -"takes str, int"; +~ returns string ~ +~ takes str, int ~ jai(name, version) { return "Name: " + name + " Version:" + version; } diff --git a/jai/__init__.py b/jai/__init__.py index f502413..97eccb4 100644 --- a/jai/__init__.py +++ b/jai/__init__.py @@ -42,23 +42,24 @@ class Tokens(Enum): At = 20 Percent = 21 Bang = 22 - BackSlash = 23 + Til = 23 + BackSlash = 24 - Arrow = 24 - Equal = 25 + Arrow = 25 + Equal = 26 - Space = 26 - Tab = 27 - Newline = 28 + Space = 27 + Tab = 28 + Newline = 29 - SingleQuote = 29 - DoubleQuote = 30 - Identifier = 31 - NumericLiteral = 32 - StringLiteral = 33 + SingleQuote = 30 + DoubleQuote = 31 + Identifier = 32 + NumericLiteral = 33 + StringLiteral = 34 - LoopExit = 34 - Return = 35 + LoopExit = 35 + Return = 36 Empty = 0xF09F @@ -68,8 +69,9 @@ class Tokens(Enum): class Settings: - PARSE_STRING = 1 - ALL = PARSE_STRING + PARSE_STRING = 0x1 + PARSE_COMMENTS = 0x10 + ALL = PARSE_STRING + PARSE_COMMENTS __version__ = "0.1.1" diff --git a/jai/parser.py b/jai/parser.py index 57b022f..c11827e 100644 --- a/jai/parser.py +++ b/jai/parser.py @@ -73,7 +73,7 @@ def __init__(self): pass def spawn_lexer(self, source: str): - return Lexer(source, Settings.PARSE_STRING) + return Lexer(source, Settings.ALL) def loop(self, source: str): lexer = self.spawn_lexer(source) diff --git a/src/lib.rs b/src/lib.rs index 27f1b5b..7a31f6b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ enum Tokens { At, Percent, Bang, + Til, BackSlash, Arrow, @@ -45,6 +46,7 @@ enum Tokens { Identifier, NumericLiteral, StringLiteral, + CommentLiteral, LoopExit, Return, @@ -82,23 +84,25 @@ impl Tokens { 20 => Tokens::At, 21 => Tokens::Percent, 22 => Tokens::Bang, - 23 => Tokens::BackSlash, + 23 => Tokens::Til, + 24 => Tokens::BackSlash, - 24 => Tokens::Arrow, - 25 => Tokens::Equal, + 25 => Tokens::Arrow, + 26 => Tokens::Equal, - 26 => Tokens::Space, - 27 => Tokens::Tab, - 28 => Tokens::Newline, + 27 => Tokens::Space, + 28 => Tokens::Tab, + 29 => Tokens::Newline, - 29 => Tokens::SingleQuote, - 30 => Tokens::DoubleQuote, - 31 => Tokens::Identifier, - 32 => Tokens::NumericLiteral, - 33 => Tokens::StringLiteral, + 30 => Tokens::SingleQuote, + 31 => Tokens::DoubleQuote, + 32 => Tokens::Identifier, + 33 => Tokens::NumericLiteral, + 34 => Tokens::StringLiteral, + 35 => Tokens::CommentLiteral, - 34 => Tokens::LoopExit, - 35 => Tokens::Return, + 36 => Tokens::LoopExit, + 37 => Tokens::Return, 61599 => Tokens::Empty, _ => panic!("Unknown value: {}", value), @@ -175,9 +179,8 @@ impl Token { #[pyfunction] fn is_char_symbol(ch: char) -> bool { match ch { - '[' | ']' | '{' | '}' | '(' | ')' | '.' | ',' | ':' | ';' | '=' | '\'' | '\"' | '\\' => { - true - } + '[' | ']' | '{' | '}' | '(' | ')' | '.' | ',' | ':' | ';' | '=' | '\'' | '\"' | '\\' + | '~' => true, _ => false, } } @@ -277,6 +280,7 @@ fn tokenize(part: &str) -> Token { "@" => Tokens::At, "%" => Tokens::Percent, "!" => Tokens::Bang, + "~" => Tokens::Til, "\\" => Tokens::BackSlash, "->" => Tokens::Arrow, @@ -306,6 +310,11 @@ fn tokenize(part: &str) -> Token { token = Tokens::StringLiteral; part.pop(); } + + if part.ends_with("~") { + token = Tokens::CommentLiteral; + part.pop(); + } } return Token { part, token }; @@ -324,8 +333,9 @@ struct Lexer { bitflags! { struct Settings: u32 { - const PARSE_STRING = 0b00000001; - const ALL = Self::PARSE_STRING.bits; + const PARSE_STRING = 0b1; + const PARSE_COMMENTS = 0b10; + const ALL = Self::PARSE_STRING.bits + Self::PARSE_COMMENTS.bits; } } @@ -356,15 +366,18 @@ impl Lexer { self.curr_char = self.chars[self.index]; } - fn skip_over_char_set(&mut self, ch: char) -> String { + fn skip_over_char_set(&mut self, ch: char, start_skip: bool) -> String { let mut string: String = String::new(); - self.char_forward(); + if start_skip { + self.char_forward(); + } while !(self.curr_char == ch) { string.push(self.curr_char); self.char_forward(); } // Add something at the end to identify it + // This is so the tokenize function can catch what it was skipping over string = string + &ch.to_string(); self.char_forward(); return string; @@ -391,14 +404,21 @@ impl Lexer { self.curr_char = self.chars[self.index]; self.next_char = self.chars[self.index + 1]; - if (self.settings & Settings::PARSE_STRING.bits) == Settings::PARSE_STRING.bits { + if (self.settings - Settings::PARSE_STRING.bits) != self.settings { if self.curr_char == '"' { - let skipped_over = self.skip_over_char_set('"'); + let skipped_over = self.skip_over_char_set('"', true); return Some(tokenize(&skipped_over)); } if self.curr_char == '\'' { - let skipped_over = self.skip_over_char_set('\''); + let skipped_over = self.skip_over_char_set('\'', true); + return Some(tokenize(&skipped_over)); + } + } + + if (self.settings - Settings::PARSE_COMMENTS.bits) != self.settings { + if self.curr_char == '~' { + let skipped_over = self.skip_over_char_set('~', false); return Some(tokenize(&skipped_over)); } }