diff --git a/lang/semgrep-grammars/src/semgrep-kotlin/grammar.js b/lang/semgrep-grammars/src/semgrep-kotlin/grammar.js index 17df70aa..0c3886a4 100644 --- a/lang/semgrep-grammars/src/semgrep-kotlin/grammar.js +++ b/lang/semgrep-grammars/src/semgrep-kotlin/grammar.js @@ -6,7 +6,7 @@ */ // INVARIATN: Make sure that you are merging any commits into the `semgrep` -// branch of `tree-sitter-kotlin`! This is because our version of +// branch of `tree-sitter-kotlin`! This is because our version of // `tree-sitter-kotlin` is forked from the original repository, and we // want our branch to be kept separate. @@ -40,7 +40,7 @@ module.exports = grammar(standard_grammar, { typed_metavar: $ => seq( "(", $.simple_identifier, ":", $._type, ")" - ), + ), // Statement ellipsis: '...' not followed by ';' _expression: ($, previous) => { @@ -74,13 +74,43 @@ module.exports = grammar(standard_grammar, { ); }, + // We would like to be able to parse programs which have a newline between the + // class name and the constructor: + // class Foo + // constructor Bar() { ... } + + // The problem is that the Kotlin parser inserts a semicolon after "Foo", making + // it such that we get interrupted in the middle of the class_declaration. + // To make it so we can continue, we allow everything after the class identifier + // to be a standalone statement in its own right. This way, we can parse both parts + // individually, and stitch them together at parsing time. + + // We only need to amend statements here, because the consumers of _declaration are + // only class_member_declaration, top_level_object and _statement. + // The former has `secondary_constructor`, which already looks like what we want to + // add, and the second seems to be unused. + // So we just need to fix _statement. + _statement: ($, previous) => choice( + ...previous.members, + $.partial_class_declaration, + ), + + partial_class_declaration: $ => prec.left(seq( + optional($.type_parameters), + seq(optional($.modifiers), "constructor"), + $._class_parameters, + optional(seq(":", $._delegation_specifiers)), + optional($.type_constraints), + optional($.class_body) + )), + class_parameter: ($, previous) => { return choice( previous, $.ellipsis ); }, - + deep_ellipsis: $ => seq( '<...', $._expression, '...>' ), diff --git a/lang/semgrep-grammars/src/semgrep-kotlin/test/corpus/semgrep.txt b/lang/semgrep-grammars/src/semgrep-kotlin/test/corpus/semgrep.txt index f12c8c49..b4f6687d 100644 --- a/lang/semgrep-grammars/src/semgrep-kotlin/test/corpus/semgrep.txt +++ b/lang/semgrep-grammars/src/semgrep-kotlin/test/corpus/semgrep.txt @@ -32,7 +32,6 @@ class $CLASS { (value_argument (integer_literal))))))))))) - ===================================== Typed Metavariables ===================================== @@ -54,7 +53,7 @@ val x = ($X : int) Ellipsis Metavariable ===================================== -$...X +$...X val x = 2 --- @@ -67,7 +66,7 @@ val x = 2 (integer_literal))) ===================================== -Expression Ellipsis +Expression Ellipsis ===================================== class Foo { @@ -89,7 +88,6 @@ class Foo { (statements (ellipsis))))))) - ===================================== Deep Ellipsis ===================================== @@ -132,9 +130,12 @@ class Foo { } --- + (source_file - (class_declaration (type_identifier) - (class_body (ellipsis)))) + (class_declaration + (type_identifier) + (class_body + (ellipsis)))) ===================================== Argument Ellipsis @@ -142,12 +143,18 @@ Argument Ellipsis foo(1, ..., 2) --- + (source_file - (call_expression (simple_identifier) - (call_suffix (value_arguments - (value_argument (integer_literal)) - (value_argument (ellipsis)) - (value_argument (integer_literal)))))) + (call_expression + (simple_identifier) + (call_suffix + (value_arguments + (value_argument + (integer_literal)) + (value_argument + (ellipsis)) + (value_argument + (integer_literal)))))) ===================================== Parameter Ellipsis @@ -157,12 +164,16 @@ fun foo(..., bar: String, ...) {} --- (source_file - (function_declaration (simple_identifier) - (function_value_parameters + (function_declaration + (simple_identifier) + (function_value_parameters (ellipsis) - (parameter (simple_identifier) (user_type (type_identifier))) + (parameter + (simple_identifier) + (user_type + (type_identifier))) (ellipsis)) - (function_body))) + (function_body))) ===================================== Class Parameter Ellipsis @@ -171,11 +182,17 @@ class Foo(...) : Filter { } --- + (source_file - (class_declaration (type_identifier) - (primary_constructor (class_parameter (ellipsis))) - (delegation_specifier (user_type (type_identifier))) - (class_body))) + (class_declaration + (type_identifier) + (primary_constructor + (class_parameter + (ellipsis))) + (delegation_specifier + (user_type + (type_identifier))) + (class_body))) ===================================== Statement Ellipsis @@ -187,15 +204,22 @@ fun foo() { } --- + (source_file - (function_declaration (simple_identifier) + (function_declaration + (simple_identifier) (function_value_parameters) (function_body (statements - (call_expression (simple_identifier) (call_suffix (value_arguments))) - (ellipsis) - (call_expression (simple_identifier) (call_suffix (value_arguments)))) - ))) + (call_expression + (simple_identifier) + (call_suffix + (value_arguments))) + (ellipsis) + (call_expression + (simple_identifier) + (call_suffix + (value_arguments))))))) ===================================== Statement Ellipsis and metavarible @@ -205,11 +229,17 @@ $COOKIE.setValue("") ... --- + (source_file (call_expression - (navigation_expression (simple_identifier) - (navigation_suffix (simple_identifier))) - (call_suffix (value_arguments (value_argument (string_literal))))) + (navigation_expression + (simple_identifier) + (navigation_suffix + (simple_identifier))) + (call_suffix + (value_arguments + (value_argument + (string_literal))))) (ellipsis)) ===================================== @@ -218,14 +248,97 @@ Method chaining Ellipsis obj. ... .foo().bar() --- + (source_file (call_expression (navigation_expression (call_expression (navigation_expression - (navigation_expression (simple_identifier) - (navigation_suffix (ellipsis))) - (navigation_suffix (simple_identifier))) - (call_suffix (value_arguments))) - (navigation_suffix (simple_identifier))) - (call_suffix (value_arguments)))) + (navigation_expression + (simple_identifier) + (navigation_suffix + (ellipsis))) + (navigation_suffix + (simple_identifier))) + (call_suffix + (value_arguments))) + (navigation_suffix + (simple_identifier))) + (call_suffix + (value_arguments)))) + +===================================== +Class with a newline +===================================== + +class BlahClass +constructor(arg: Int) { + private val blah2 = "foo${arg}" +} + +--- + +(source_file + (class_declaration + (type_identifier)) + (partial_class_declaration + (class_parameter + (simple_identifier) + (user_type + (type_identifier))) + (class_body + (property_declaration + (modifiers + (visibility_modifier)) + (variable_declaration + (simple_identifier)) + (string_literal + (interpolated_expression + (simple_identifier))))))) + +===================================== +Class in a class +===================================== + +class Foo { + class BlahClass + constructor(arg: Int) { + private val blah2 = "foo${arg}" + } +} + +--- + +(source_file + (class_declaration + (type_identifier) + (class_body + (class_declaration + (type_identifier)) + (secondary_constructor + (function_value_parameters + (parameter + (simple_identifier) + (user_type + (type_identifier)))) + (statements + (property_declaration + (modifiers + (visibility_modifier)) + (variable_declaration + (simple_identifier)) + (string_literal + (interpolated_expression + (simple_identifier))))))))) + +===================================== +Class without body +===================================== + +class Foo + +--- + +(source_file + (class_declaration + (type_identifier)))