Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TOML language parser #4845

Merged
merged 4 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions rewrite-toml/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
plugins {
id("org.openrewrite.build.language-library")
}

tasks.register<JavaExec>("generateAntlrSources") {
mainClass.set("org.antlr.v4.Tool")

args = listOf(
"-o", "src/main/java/org/openrewrite/toml/internal/grammar",
"-package", "org.openrewrite.toml.internal.grammar",
"-visitor"
) + fileTree("src/main/antlr").matching { include("**/*.g4") }.map { it.path }

classpath = sourceSets["main"].runtimeClasspath
}

dependencies {
implementation(project(":rewrite-core"))
implementation("org.antlr:antlr4-runtime:4.11.1")
implementation("io.micrometer:micrometer-core:1.9.+")

compileOnly(project(":rewrite-test"))

testImplementation(project(":rewrite-test"))
}
149 changes: 149 additions & 0 deletions rewrite-toml/src/main/antlr/TomlLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/

// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true

lexer grammar TomlLexer;

WS : [ \t]+ -> skip;
NL : ('\r'? '\n')+;
COMMENT : '#' (~[\r\n])*;
L_BRACKET : '[';
DOUBLE_L_BRACKET : '[[';
R_BRACKET : ']';
DOUBLE_R_BRACKET : ']]';
EQUALS : '=' -> pushMode(SIMPLE_VALUE_MODE);
DOT : '.';
COMMA : ',' -> skip;

fragment DIGIT : [0-9];
fragment ALPHA : [A-Za-z];

// strings
fragment ESC : '\\' (["\\/bfnrt] | UNICODE | EX_UNICODE);
fragment UNICODE : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
fragment EX_UNICODE:
'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
BASIC_STRING : '"' (ESC | ~["\\\n])*? '"';
LITERAL_STRING : '\'' (~['\n])*? '\'';

// keys
UNQUOTED_KEY: (ALPHA | DIGIT | '-' | '_')+;

mode SIMPLE_VALUE_MODE;

VALUE_WS: WS -> skip;

L_BRACE : '{' -> mode(INLINE_TABLE_MODE);
ARRAY_START : L_BRACKET -> type(L_BRACKET), mode(ARRAY_MODE);

// booleans
BOOLEAN: ('true' | 'false') -> popMode;

// strings
fragment ML_ESC : '\\' '\r'? '\n' | ESC;
VALUE_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING), popMode;
ML_BASIC_STRING : '"""' (ML_ESC | ~["\\])*? '"""' -> popMode;
VALUE_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING), popMode;
ML_LITERAL_STRING : '\'\'\'' (.)*? '\'\'\'' -> popMode;

// floating point numbers
fragment EXP : ('e' | 'E') [+-]? ZERO_PREFIXABLE_INT;
fragment ZERO_PREFIXABLE_INT : DIGIT (DIGIT | '_' DIGIT)*;
fragment FRAC : '.' ZERO_PREFIXABLE_INT;
FLOAT : DEC_INT ( EXP | FRAC EXP?) -> popMode;
INF : [+-]? 'inf' -> popMode;
NAN : [+-]? 'nan' -> popMode;

// integers
fragment HEX_DIGIT : [A-Fa-f] | DIGIT;
fragment DIGIT_1_9 : [1-9];
fragment DIGIT_0_7 : [0-7];
fragment DIGIT_0_1 : [0-1];
DEC_INT : [+-]? (DIGIT | (DIGIT_1_9 (DIGIT | '_' DIGIT)+)) -> popMode;
HEX_INT : '0x' HEX_DIGIT (HEX_DIGIT | '_' HEX_DIGIT)* -> popMode;
OCT_INT : '0o' DIGIT_0_7 (DIGIT_0_7 | '_' DIGIT_0_7)* -> popMode;
BIN_INT : '0b' DIGIT_0_1 (DIGIT_0_1 | '_' DIGIT_0_1)* -> popMode;

// dates
fragment YEAR : DIGIT DIGIT DIGIT DIGIT;
fragment MONTH : DIGIT DIGIT;
fragment DAY : DIGIT DIGIT;
fragment DELIM : 'T' | 't' | ' ';
fragment HOUR : DIGIT DIGIT;
fragment MINUTE : DIGIT DIGIT;
fragment SECOND : DIGIT DIGIT;
fragment SECFRAC : '.' DIGIT+;
fragment NUMOFFSET : ('+' | '-') HOUR ':' MINUTE;
fragment OFFSET : 'Z' | NUMOFFSET;
fragment PARTIAL_TIME : HOUR ':' MINUTE ':' SECOND SECFRAC?;
fragment FULL_DATE : YEAR '-' MONTH '-' DAY;
fragment FULL_TIME : PARTIAL_TIME OFFSET;
OFFSET_DATE_TIME : FULL_DATE DELIM FULL_TIME -> popMode;
LOCAL_DATE_TIME : FULL_DATE DELIM PARTIAL_TIME -> popMode;
LOCAL_DATE : FULL_DATE -> popMode;
LOCAL_TIME : PARTIAL_TIME -> popMode;

mode INLINE_TABLE_MODE;

INLINE_TABLE_WS : WS -> skip;
INLINE_TABLE_KEY_DOT : DOT -> type(DOT);
INLINE_TABLE_COMMA : COMMA -> type(COMMA);
R_BRACE : '}' -> popMode;

INLINE_TABLE_KEY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING);
INLINE_TABLE_KEY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING);
INLINE_TABLE_KEY_UNQUOTED : UNQUOTED_KEY -> type(UNQUOTED_KEY);

INLINE_TABLE_EQUALS: EQUALS -> type(EQUALS), pushMode(SIMPLE_VALUE_MODE);

mode ARRAY_MODE;

ARRAY_WS : WS -> skip;
ARRAY_NL : NL -> type(NL);
ARRAY_COMMENT : COMMENT -> type(COMMENT);
ARRAY_COMMA : COMMA -> type(COMMA);

ARRAY_INLINE_TABLE_START : L_BRACE -> type(L_BRACE), pushMode(INLINE_TABLE_MODE);
NESTED_ARRAY_START : L_BRACKET -> type(L_BRACKET), pushMode(ARRAY_MODE);
ARRAY_END : R_BRACKET -> type(R_BRACKET), popMode;

ARRAY_BOOLEAN: BOOLEAN -> type(BOOLEAN);

ARRAY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING);
ARRAY_ML_BASIC_STRING : ML_BASIC_STRING -> type(ML_BASIC_STRING);
ARRAY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING);
ARRAY_ML_LITERAL_STRING : ML_LITERAL_STRING -> type(ML_LITERAL_STRING);

ARRAY_FLOAT : FLOAT -> type(FLOAT);
ARRAY_INF : INF -> type(INF);
ARRAY_NAN : NAN -> type(NAN);

ARRAY_DEC_INT : DEC_INT -> type(DEC_INT);
ARRAY_HEX_INT : HEX_INT -> type(HEX_INT);
ARRAY_OCT_INT : OCT_INT -> type(OCT_INT);
ARRAY_BIN_INT : BIN_INT -> type(BIN_INT);

ARRAY_OFFSET_DATE_TIME : OFFSET_DATE_TIME -> type(OFFSET_DATE_TIME);
ARRAY_LOCAL_DATE_TIME : LOCAL_DATE_TIME -> type(LOCAL_DATE_TIME);
ARRAY_LOCAL_DATE : LOCAL_DATE -> type(LOCAL_DATE);
ARRAY_LOCAL_TIME : LOCAL_TIME -> type(LOCAL_TIME);
137 changes: 137 additions & 0 deletions rewrite-toml/src/main/antlr/TomlParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/

// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging

parser grammar TomlParser;

options {
tokenVocab = TomlLexer;
}

document
: expression? (NL expression?)* EOF
;

expression
: keyValue comment?
| table comment?
| comment
;

comment
: COMMENT
;

keyValue
: key EQUALS value
;

key
: simpleKey
| dottedKey
;

simpleKey
: quotedKey
| unquotedKey
;

unquotedKey
: UNQUOTED_KEY
;

quotedKey
: BASIC_STRING
| LITERAL_STRING
;

dottedKey
: simpleKey (DOT simpleKey)+
;

value
: string
| integer
| floatingPoint
| bool
| dateTime
| array
| inlineTable
;

string
: BASIC_STRING
| ML_BASIC_STRING
| LITERAL_STRING
| ML_LITERAL_STRING
;

integer
: DEC_INT
| HEX_INT
| OCT_INT
| BIN_INT
;

floatingPoint
: FLOAT
| INF
| NAN
;

bool
: BOOLEAN
;

dateTime
: OFFSET_DATE_TIME
| LOCAL_DATE_TIME
| LOCAL_DATE
| LOCAL_TIME
;

commentOrNl
: COMMENT NL
| NL
;

array
: L_BRACKET commentOrNl* R_BRACKET
| L_BRACKET commentOrNl* value (COMMA commentOrNl* value COMMA?)* commentOrNl* R_BRACKET
;

table
: standardTable
| arrayTable
;

standardTable
: L_BRACKET key R_BRACKET (commentOrNl* expression)*
;

inlineTable
: L_BRACE commentOrNl* R_BRACE
| L_BRACE commentOrNl* keyValue (COMMA commentOrNl* keyValue COMMA?)* commentOrNl* R_BRACE
;

arrayTable
: DOUBLE_L_BRACKET key DOUBLE_R_BRACKET (commentOrNl* expression)*
;
52 changes: 52 additions & 0 deletions rewrite-toml/src/main/java/org/openrewrite/toml/Assertions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2025 the original author or authors.
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* https://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openrewrite.toml;

import org.intellij.lang.annotations.Language;
import org.openrewrite.internal.lang.Nullable;
import org.openrewrite.test.SourceSpec;
import org.openrewrite.test.SourceSpecs;
import org.openrewrite.toml.tree.Toml;

import java.util.function.Consumer;

public class Assertions {
private Assertions() {
}

public static SourceSpecs toml(@Language("toml") @Nullable String before) {
return Assertions.toml(before, s -> {
});
}

public static SourceSpecs toml(@Language("toml") @Nullable String before, Consumer<SourceSpec<Toml.Document>> spec) {
SourceSpec<Toml.Document> toml = new SourceSpec<>(Toml.Document.class, null, TomlParser.builder(), before, null);
spec.accept(toml);
return toml;
}

public static SourceSpecs toml(@Language("toml") @Nullable String before, @Language("toml") @Nullable String after) {
return toml(before, after, s -> {
});
}

public static SourceSpecs toml(@Language("toml") @Nullable String before, @Language("toml") @Nullable String after,
Consumer<SourceSpec<Toml.Document>> spec) {
SourceSpec<Toml.Document> toml = new SourceSpec<>(Toml.Document.class, null, TomlParser.builder(), before, s -> after);
spec.accept(toml);
return toml;
}
}
Loading
Loading