Skip to content

Commit d161662

Browse files
committed
feat: headers, codeblock, code inline, rulers
- add support for parsing: - inline code - code block with language - horizontal ruler - add meta function: - parser::Parser.create_token - parser::Parser.create_paragraph - correct paragraph parsing - only parse headings at the begin of the line - skip '\r' - correct heading number / depth - only parse horizontal rulers if at the beginning of the line
1 parent a437a2d commit d161662

File tree

1 file changed

+119
-19
lines changed

1 file changed

+119
-19
lines changed

src/parser.rs

Lines changed: 119 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ use std::fs;
33
use crate::token::{self, Token};
44

55
pub struct Parser {
6-
current_char: char,
7-
input: String,
8-
pos: usize,
9-
line: usize,
10-
line_pos: usize,
6+
pub current_char: char,
7+
pub input: String,
8+
pub pos: usize,
9+
pub line: usize,
10+
pub line_pos: usize,
1111
}
1212

1313
impl Parser {
@@ -51,30 +51,62 @@ impl Parser {
5151
}
5252
}
5353

54+
fn create_token(&mut self, token_kind: token::TokenKind, token_value: String) -> Token {
55+
Token {
56+
line: self.line,
57+
line_pos: self.line_pos - token_value.len(),
58+
pos: self.pos,
59+
kind: token_kind,
60+
content: token_value,
61+
}
62+
}
63+
64+
fn create_paragraph(&mut self, text: &str) -> Token {
65+
self.create_token(token::TokenKind::Paragraph, String::from(text))
66+
}
67+
5468
pub fn parse(&mut self) -> Vec<Token> {
5569
let mut res: Vec<Token> = vec![];
70+
let mut last_paragraph = String::new();
5671
while !self.at_end() && self.current_char != '\0' {
5772
let mut token_value = String::new();
5873
let mut token_kind = token::TokenKind::Unknown;
5974

6075
match self.current_char {
61-
' ' | '\t' | '\r' => {
62-
self.advance();
63-
continue;
64-
}
6576
'\n' => {
77+
if !last_paragraph.is_empty() {
78+
res.push(self.create_paragraph(&last_paragraph));
79+
last_paragraph = String::new();
80+
}
6681
self.line += 1;
6782
self.line_pos = 0;
6883
self.advance();
6984
continue;
7085
}
86+
'\r' => {
87+
self.advance();
88+
continue;
89+
}
7190
'#' => {
91+
if !(self.line_pos == 0 || self.line_pos == 1) {
92+
last_paragraph.push(self.current_char);
93+
self.advance();
94+
continue;
95+
}
96+
if !last_paragraph.is_empty() {
97+
res.push(self.create_paragraph(&last_paragraph));
98+
last_paragraph = String::new();
99+
}
100+
72101
// skip over '#' with a counter:
73-
let mut heading_id = 0;
102+
let mut heading_id = 1;
103+
self.advance();
74104
while self.current_char == '#' {
75-
heading_id += 1;
76105
self.advance();
106+
heading_id += 1;
77107
}
108+
// consume last #
109+
self.advance();
78110

79111
while !self.peek_equals('\n') {
80112
token_value.push(self.current_char);
@@ -89,24 +121,96 @@ impl Parser {
89121
5 => token::TokenKind::Heading5,
90122
6 => token::TokenKind::Heading6,
91123
_ => token::TokenKind::Paragraph,
124+
};
125+
}
126+
'`' => {
127+
if !last_paragraph.is_empty() {
128+
res.push(self.create_paragraph(&last_paragraph));
129+
last_paragraph = String::new();
130+
}
131+
if self.peek_equals('`') {
132+
self.advance();
133+
if self.peek_equals('`') {
134+
let mut code_lang = String::new();
135+
self.advance();
136+
self.advance();
137+
while self.current_char != '\n' {
138+
code_lang.push(self.current_char);
139+
self.advance();
140+
}
141+
142+
while self.current_char != '`' {
143+
token_value.push(self.current_char);
144+
self.advance();
145+
}
146+
147+
token_kind = token::TokenKind::CodeBlock(code_lang);
148+
}
149+
} else {
150+
self.advance();
151+
while self.current_char != '`' {
152+
token_value.push(self.current_char);
153+
self.advance();
154+
}
155+
token_kind = token::TokenKind::CodeInline;
156+
// consume `
157+
self.advance();
92158
}
93159
}
94160
'_' => {
95-
// consume opening '*'
161+
if !last_paragraph.is_empty() {
162+
res.push(self.create_paragraph(&last_paragraph));
163+
last_paragraph = String::new();
164+
}
165+
// consume opening '_'
96166
self.advance();
97167
token_kind = token::TokenKind::Italic;
98168
while self.current_char != '_' {
99169
token_value.push(self.current_char);
100170
self.advance();
101171
}
102-
// consume closing '*'
172+
dbg!(&token_value);
173+
// consume closing '_'
103174
self.advance();
104175
}
176+
'-' => {
177+
if self.peek_equals('-') {
178+
self.advance();
179+
if self.peek_equals('-') {
180+
if !last_paragraph.is_empty() {
181+
res.push(self.create_paragraph(&last_paragraph));
182+
last_paragraph = String::new();
183+
}
184+
res.push(self.create_token(token::TokenKind::Ruler, String::new()));
185+
self.advance();
186+
continue;
187+
}
188+
} else {
189+
last_paragraph.push(self.current_char);
190+
self.advance();
191+
}
192+
}
105193
'*' => {
106194
if self.peek_equals('*') {
195+
if !last_paragraph.is_empty() {
196+
res.push(self.create_paragraph(&last_paragraph));
197+
last_paragraph = String::new();
198+
}
107199
token_kind = token::TokenKind::Bold;
108200
// consume opening '*'
109201
self.advance();
202+
203+
// check for horizontal ruler
204+
if self.peek_equals('*') {
205+
if !last_paragraph.is_empty() {
206+
res.push(self.create_paragraph(&last_paragraph));
207+
last_paragraph = String::new();
208+
}
209+
res.push(self.create_token(token::TokenKind::Ruler, String::new()));
210+
self.advance();
211+
continue;
212+
}
213+
110214
// consume second opening '*'
111215
self.advance();
112216
while self.current_char != '*' {
@@ -118,18 +222,14 @@ impl Parser {
118222
}
119223
}
120224
_ => {
121-
// TODO: stop skipping everything else 💀
225+
last_paragraph.push(self.current_char);
122226
self.advance();
123227
continue;
124228
}
125229
}
126230

127231
if token_kind != token::TokenKind::Unknown {
128-
res.push(Token {
129-
pos: self.pos - token_value.len(),
130-
kind: token_kind,
131-
content: String::from(token_value.trim_start()),
132-
});
232+
res.push(self.create_token(token_kind, token_value))
133233
}
134234
self.advance();
135235
}

0 commit comments

Comments
 (0)