Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Ruby parsing for block assigns #1732

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Units/parser-ruby.r/ruby-block-assign.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Bar input.rb /^c = class Bar$/;" c
Foo input.rb /^m = module Foo$/;" m
Quux input.rb /^m += module Quux$/;" m
Zook input.rb /^c ||= class Zook$/;" c
method_a input.rb /^ x = def method_a$/;" f class:Bar
method_b input.rb /^ def method_b$/;" f class:Bar
method_c input.rb /^ def method_c$/;" f class:Bar
method_d input.rb /^ def method_d$/;" f class:Bar
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't m, c and x be reported as something (variables I guess)?

I'm not sure where this syntax is used, but apparently it does something (store the result of the last statement? not sure from experimenting…), so I guess they should ideally be reported, shouldn't they?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They could be reported as variables, but I believe this hasn't been the case thus far. Anyway, it isn't in the scope of this PR, as it just aims to fix the annoying bug where class members are reported as free-floating methods if block assignment is used.

method_e input.rb /^ def method_e$/;" f class:Bar
method_f input.rb /^ def method_f$/;" f class:Bar
43 changes: 43 additions & 0 deletions Units/parser-ruby.r/ruby-block-assign.d/input.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
m = module Foo
end

m += module Quux
end

c = class Bar
x = def method_a
if 1
else
end
end

def method_b
x = while 1 do
break
end
end

def method_c
x = if 1
else
end
end

def method_d
x += if 1
else
end
end

def method_e
x ||= if 1
else
end
end

def method_f
end
end

c ||= class Zook
end
121 changes: 91 additions & 30 deletions parsers/ruby.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@
#include "routines.h"
#include "vstring.h"

/*
* MACROS
*/
#define isIdentChar(c) (isalnum (c) || (c) == '_')

/*
* DATA DECLARATIONS
*/
Expand Down Expand Up @@ -117,31 +112,104 @@ static bool canMatch (const unsigned char** s, const char* literal,
return true;
}

static bool isIdentChar (int c)
{
return (isalnum (c) || c == '_');
}

static bool notIdentChar (int c)
{
return ! isIdentChar (c);
}

static bool operatorChar (int c)
{
return (c == '[' || c == ']' ||
c == '=' || c == '!' || c == '~' ||
c == '+' || c == '-' ||
c == '@' || c == '*' || c == '/' || c == '%' ||
c == '<' || c == '>' ||
c == '&' || c == '^' || c == '|');
}

static bool notOperatorChar (int c)
{
return ! (c == '[' || c == ']' ||
c == '=' || c == '!' || c == '~' ||
c == '+' || c == '-' ||
c == '@' || c == '*' || c == '/' || c == '%' ||
c == '<' || c == '>' ||
c == '&' || c == '^' || c == '|');
return ! operatorChar (c);
}

static bool isWhitespace (int c)
{
return c == 0 || isspace (c);
}

/*
* Advance 's' while the passed predicate is true. Returns true if
* advanced by at least one position.
*/
static bool advanceWhile (const unsigned char** s, bool (*predicate) (int))
{
const int s_length = strlen ((const char *)*s);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size_t is better.

unsigned char this_char;
int i = 0;

for (i = 0; i < s_length; ++i)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function scans the input twice: once by strlen and once by this for.
How about replacing them with following while?

while (**s != `\0`)

{
this_char = **s;

if (! predicate (this_char))
{
return i > 0;
}

(*s)++;
}

return i > 0;
}

static bool canMatchKeyword (const unsigned char** s, const char* literal)
{
return canMatch (s, literal, notIdentChar);
}

/*
* Extends canMatch. Works similarly, but allows assignment to precede
* the keyword, as block assignment is a common Ruby idiom.
*/
static bool canMatchKeywordWithAssign (const unsigned char** s, const char* literal)
{
const unsigned char* original_pos = *s;

if (canMatchKeyword (s, literal))
{
return true;
}

if (! advanceWhile (s, isIdentChar))
{
*s = original_pos;
return false;
}

advanceWhile (s, isWhitespace);

if (! (advanceWhile (s, operatorChar) && *(*s - 1) == '='))
{
*s = original_pos;
return false;
}

advanceWhile (s, isWhitespace);

if (canMatchKeyword (s, literal))
{
return true;
}

*s = original_pos;
return false;
}

/*
* Attempts to advance 'cp' past a Ruby operator method name. Returns
* true if successful (and copies the name into 'name'), false otherwise.
Expand All @@ -167,7 +235,7 @@ static bool parseRubyOperator (vString* name, const unsigned char** cp)
{
if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
{
vStringCatS (name, RUBY_OPERATORS[i]);
vStringCatS (name, RUBY_OPERATORS[i]);
Copy link
Member

@masatake masatake Apr 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you keep this line as the orignal?

return true;
}
}
Expand Down Expand Up @@ -428,29 +496,22 @@ static void findRubyTags (void)
*
* return if <exp>
*
* FIXME: this is fooled by code such as
*
* result = if <exp>
* <a>
* else
* <b>
* end
*
* FIXME: we're also fooled if someone does something heinous such as
* FIXME: we're fooled if someone does something heinous such as
*
* puts("hello") \
* unless <exp>
*/
if (canMatchKeyword (&cp, "for") ||
canMatchKeyword (&cp, "until") ||
canMatchKeyword (&cp, "while"))

if (canMatchKeywordWithAssign (&cp, "for") ||
canMatchKeywordWithAssign (&cp, "until") ||
canMatchKeywordWithAssign (&cp, "while"))
{
expect_separator = true;
enterUnnamedScope ();
}
else if (canMatchKeyword (&cp, "case") ||
canMatchKeyword (&cp, "if") ||
canMatchKeyword (&cp, "unless"))
else if (canMatchKeywordWithAssign (&cp, "case") ||
canMatchKeywordWithAssign (&cp, "if") ||
canMatchKeywordWithAssign (&cp, "unless"))
{
enterUnnamedScope ();
}
Expand All @@ -459,15 +520,15 @@ static void findRubyTags (void)
* "module M", "class C" and "def m" should only be at the beginning
* of a line.
*/
if (canMatchKeyword (&cp, "module"))
if (canMatchKeywordWithAssign (&cp, "module"))
{
readAndEmitTag (&cp, K_MODULE);
}
else if (canMatchKeyword (&cp, "class"))
else if (canMatchKeywordWithAssign (&cp, "class"))
{
readAndEmitTag (&cp, K_CLASS);
}
else if (canMatchKeyword (&cp, "def"))
else if (canMatchKeywordWithAssign (&cp, "def"))
{
rubyKind kind = K_METHOD;
NestingLevel *nl = nestingLevelsGetCurrent (nesting);
Expand Down