Skip to content

Commit

Permalink
gccrs: Remove unnecessary methods of Rust::Lexer
Browse files Browse the repository at this point in the history
gcc/rust/ChangeLog:

	* lex/rust-lex.cc (Lexer::build_token): Replace function call.
	(Lexer::parse_string): Likewise.
	(Lexer::parse_identifier_or_keyword): Likewise.
	(Lexer::parse_raw_string): Likewise.
	(Lexer::parse_char_or_lifetime): Likewise.
	(Lexer::get_input_codepoint_length): Deleted.
	(Lexer::peek_codepoint_input): Deleted.
	(Lexer::skip_codepoint_input): Deleted.
	* lex/rust-lex.h: Remove some methods and fields

Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
  • Loading branch information
tamaroning authored and CohenArthur committed Jul 3, 2023
1 parent b49a958 commit 5d33b88
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 83 deletions.
128 changes: 50 additions & 78 deletions gcc/rust/lex/rust-lex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,7 @@ Lexer::build_token ()
Location loc = get_current_location ();

current_char = peek_input ();
current_char32 = peek_codepoint_input ();
skip_codepoint_input ();
skip_input ();

// detect shebang
// Must be the first thing on the first line, starting with #!
Expand Down Expand Up @@ -1089,7 +1088,7 @@ Lexer::build_token ()
}

// find identifiers and keywords.
if (is_identifier_start (current_char32.value))
if (is_identifier_start (current_char.value))
return parse_identifier_or_keyword (loc);

// int and float literals
Expand Down Expand Up @@ -1998,59 +1997,56 @@ Lexer::skip_broken_string_input (Codepoint current_char)
current_column);
}

// Parses a unicode string.
// Parses a string.
TokenPtr
Lexer::parse_string (Location loc)
{
Codepoint current_char32;

std::string str;
str.reserve (16); // some sensible default

int length = 1;
current_char32 = peek_codepoint_input ();
current_char = peek_input ();

// FIXME: This fails if the input ends. How do we check for EOF?
while (current_char32.value != '"' && !current_char32.is_eof ())
while (current_char.value != '"' && !current_char.is_eof ())
{
if (current_char32.value == '\\')
if (current_char.value == '\\')
{
// parse escape
auto utf8_escape_pair = parse_utf8_escape ();
current_char32 = std::get<0> (utf8_escape_pair);
current_char = std::get<0> (utf8_escape_pair);

if (current_char32 == Codepoint (0) && std::get<2> (utf8_escape_pair))
if (current_char == Codepoint (0) && std::get<2> (utf8_escape_pair))
length = std::get<1> (utf8_escape_pair) - 1;
else
length += std::get<1> (utf8_escape_pair);

if (current_char32 != Codepoint (0)
|| !std::get<2> (utf8_escape_pair))
str += current_char32;

// required as parsing utf8 escape only changes current_char
current_char32 = peek_codepoint_input ();
if (current_char != Codepoint (0) || !std::get<2> (utf8_escape_pair))
str += current_char.as_string ();

// FIXME: should remove this but can't.
// `parse_utf8_escape` does not update `current_char` correctly.
current_char = peek_input ();
continue;
}

length += get_input_codepoint_length ();
length++;

str += current_char32;
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char;
skip_input ();
current_char = peek_input ();
}

current_column += length;

if (current_char32.value == '"')
if (current_char.value == '"')
{
current_column++;

skip_input ();
current_char = peek_input ();
}
else if (current_char32.is_eof ())
else if (current_char.is_eof ())
{
rust_error_at (get_current_location (), "unended string literal");
return Token::make (END_OF_FILE, get_current_location ());
Expand All @@ -2072,22 +2068,22 @@ Lexer::parse_identifier_or_keyword (Location loc)
{
std::string str;
str.reserve (16); // default
str += current_char32.as_string ();
str += current_char.as_string ();

bool first_is_underscore = current_char == '_';

int length = 1;
current_char32 = peek_codepoint_input ();
current_char = peek_input ();

// loop through entire name
while (is_identifier_continue (current_char32.value))
while (is_identifier_continue (current_char.value))
{
auto s = current_char32.as_string ();
auto s = current_char.as_string ();
length++;

str += current_char32.as_string ();
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char.as_string ();
skip_input ();
current_char = peek_input ();
}

current_column += length;
Expand Down Expand Up @@ -2141,11 +2137,11 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)

length++;
skip_input ();
Codepoint current_char32 = peek_codepoint_input ();
current_char = peek_input ();

while (!current_char32.is_eof ())
while (!current_char.is_eof ())
{
if (current_char32.value == '"')
if (current_char.value == '"')
{
bool enough_hashes = true;

Expand All @@ -2170,9 +2166,9 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)

length++;

str += current_char32;
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char.as_string ();
skip_input ();
current_char = peek_input ();
}

current_column += length;
Expand Down Expand Up @@ -2424,29 +2420,27 @@ Lexer::parse_decimal_int_or_float (Location loc)
TokenPtr
Lexer::parse_char_or_lifetime (Location loc)
{
Codepoint current_char32;

int length = 1;

current_char32 = peek_codepoint_input ();
if (current_char32.is_eof ())
current_char = peek_input ();
if (current_char.is_eof ())
return nullptr;

// parse escaped char literal
if (current_char32.value == '\\')
if (current_char.value == '\\')
{
// parse escape
auto utf8_escape_pair = parse_utf8_escape ();
current_char32 = std::get<0> (utf8_escape_pair);
Codepoint escaped_char = std::get<0> (utf8_escape_pair);
length += std::get<1> (utf8_escape_pair);

if (peek_codepoint_input ().value != '\'')
if (peek_input ().value != '\'')
{
rust_error_at (get_current_location (), "unended character literal");
}
else
{
skip_codepoint_input ();
skip_input ();
current_char = peek_input ();
length++;
}
Expand All @@ -2455,15 +2449,16 @@ Lexer::parse_char_or_lifetime (Location loc)

loc += length - 1;

return Token::make_char (loc, current_char32);
return Token::make_char (loc, escaped_char);
}
else
{
skip_codepoint_input ();
skip_input ();

if (peek_codepoint_input ().value == '\'')
if (peek_input ().value == '\'')
{
// parse non-escaped char literal
Codepoint non_escaped_char = current_char;

// skip the ' character
skip_input ();
Expand All @@ -2474,21 +2469,21 @@ Lexer::parse_char_or_lifetime (Location loc)

loc += 2;

return Token::make_char (loc, current_char32);
return Token::make_char (loc, non_escaped_char);
}
else if (is_identifier_start (current_char32.value))
else if (is_identifier_start (current_char.value))
{
// parse lifetime name
std::string str;
str += current_char32;
str += current_char.as_string ();
length++;

current_char32 = peek_codepoint_input ();
while (is_identifier_continue (current_char32.value))
current_char = peek_input ();
while (is_identifier_continue (current_char.value))
{
str += current_char32;
skip_codepoint_input ();
current_char32 = peek_codepoint_input ();
str += current_char.as_string ();
skip_input ();
current_char = peek_input ();
length++;
}

Expand All @@ -2512,29 +2507,6 @@ Lexer::parse_char_or_lifetime (Location loc)
}
}

// TODO remove this function
// Returns the length of the codepoint at the current position.
int
Lexer::get_input_codepoint_length ()
{
return 1;
}

// TODO remove this function
// Returns the codepoint at the current position.
Codepoint
Lexer::peek_codepoint_input ()
{
return peek_input ();
}

// TODO remove this function
void
Lexer::skip_codepoint_input ()
{
skip_input ();
}

void
Lexer::split_current_token (TokenId new_left, TokenId new_right)
{
Expand Down
5 changes: 0 additions & 5 deletions gcc/rust/lex/rust-lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,6 @@ class Lexer
std::pair<long, int> parse_partial_hex_escape ();
std::pair<Codepoint, int> parse_partial_unicode_escape ();

int get_input_codepoint_length ();
// Peeks the current utf-8 char
Codepoint peek_codepoint_input ();
void skip_codepoint_input ();
void skip_broken_string_input (Codepoint current_char);

TokenPtr parse_byte_char (Location loc);
Expand Down Expand Up @@ -393,7 +389,6 @@ class Lexer
int current_column;
// Current character.
Codepoint current_char;
Codepoint current_char32;
// Line map.
Linemap *line_map;

Expand Down

0 comments on commit 5d33b88

Please sign in to comment.