diff --git a/source/parsing/tokenizer/make_tokens.d b/source/parsing/tokenizer/make_tokens.d index 099a3f9..acecc9c 100644 --- a/source/parsing/tokenizer/make_tokens.d +++ b/source/parsing/tokenizer/make_tokens.d @@ -80,7 +80,22 @@ private Token[] protoTokenize(string input) } TokenType tokenType = getVarietyOfLetter(symbol); - tokens ~= Token(tokenType, [symbol], index); + Token token = Token(tokenType, [symbol], index); + if (tokenType == TokenType.Quotation) + { + dchar last = symbol; + index++; + while (index < chars.length) + { + dchar symbol2 = chars[index]; + token.value ~= symbol2; + if (symbol2 == symbol && last != '\\') + break; + last = symbol2; + index++; + } + } + tokens ~= token; } return tokens; } diff --git a/source/parsing/tokenizer/tokens.d b/source/parsing/tokenizer/tokens.d index 932ba09..0f6d847 100644 --- a/source/parsing/tokenizer/tokens.d +++ b/source/parsing/tokenizer/tokens.d @@ -13,6 +13,7 @@ enum TokenType Letter, Semicolon, Colon, + Comma, Pipe, WhiteSpace, Equals, @@ -69,7 +70,7 @@ bool isSingleLineComment(dchar first, dchar secound) { static foreach (const dchar[] style; validSingleLineCommentStyles) { - if (style[0] == first || style[0] == secound) + if (style[0] == first && style[0] == secound) return true; } return false; @@ -115,13 +116,15 @@ TokenType getVarietyOfLetter(dchar symbol) return TokenType.Pipe; case '.': return TokenType.Period; + case ',': + return TokenType.Comma; default: break; } if (isDigit(symbol)) return TokenType.Number; - if (isAlpha(symbol)) + if (isAlpha(symbol) || symbol == '_') return TokenType.Letter; if (isWhite(symbol)) return TokenType.WhiteSpace; @@ -144,14 +147,14 @@ struct Token size_t startingIndex; } -import tern.typecons.common : Nullable; +import tern.typecons.common : Nullable, nullable; Nullable!Token nextToken(Token[] tokens, ref size_t index) { Nullable!Token found; - if (tokens.length >= index) - return found; - found = tokens[index++]; + if (tokens.length <= index+1) + return nullable!Token(null); + found = tokens[++index]; return found; } diff --git a/source/parsing/treegen/astTypes.d b/source/parsing/treegen/astTypes.d index 27bc065..015eb41 100644 --- a/source/parsing/treegen/astTypes.d +++ b/source/parsing/treegen/astTypes.d @@ -107,15 +107,15 @@ enum OperationVariety struct SingleArgumentOperationNodeData { - OperationVariety pperationVariety; - AstNode* value; + OperationVariety operationVariety; + AstNode value; } struct DoubleArgumentOperationNodeData { - OperationVariety pperationVariety; - AstNode* left; - AstNode* right; + OperationVariety operationVariety; + AstNode left; + AstNode right; } struct ExpressionNodeData @@ -128,10 +128,10 @@ struct ExpressionNodeData struct CallNodeData { NameUnit func; - AstNode* args; + AstNode args; } -struct AstNode +class AstNode { AstAction action; union @@ -159,26 +159,97 @@ struct AstNode sink("{"); switch (action) { - case AstAction.Keyword: - sink(keywordNodeData.to!string); - break; - case AstAction.TokenHolder: - sink(tokenBeingHeld.to!string); - break; - case AstAction.Expression: - sink(expressionNodeData.components.to!string); - break; - default: break; + case AstAction.Keyword: + sink(keywordNodeData.to!string); + break; + case AstAction.TokenHolder: + sink(tokenBeingHeld.to!string); + break; + case AstAction.Expression: + sink(expressionNodeData.components.to!string); + break; + case AstAction.NamedUnit: + sink(namedUnit.names.to!string); + break; + case AstAction.Call: + sink(callNodeData.func.names.to!string); + sink("(\n"); + sink(callNodeData.args.to!string); + sink("\n)"); + break; + case AstAction.LiteralUnit: + sink(literalUnitCompenents.to!string); + break; + case AstAction.DoubleArgumentOperation: + sink(doubleArgumentOperationNodeData.operationVariety.to!string); + sink(", "); + sink(doubleArgumentOperationNodeData.left.to!string); + sink(", "); + sink(doubleArgumentOperationNodeData.right.to!string); + break; + default: + break; } sink("}"); } + + void tree(size_t tabCount) + { + import std.stdio; + import std.conv; + + foreach (i; 0 .. tabCount) + write("| "); + + switch (action) + { + case AstAction.Call: + writeln(callNodeData.func.to!string ~ ":"); + callNodeData.args.tree(tabCount + 1); + break; + case AstAction.DoubleArgumentOperation: + writeln(doubleArgumentOperationNodeData.operationVariety.to!string ~ ":"); + doubleArgumentOperationNodeData.left.tree(tabCount + 1); + doubleArgumentOperationNodeData.right.tree(tabCount + 1); + break; + case AstAction.Expression: + writeln("Result of expression with " ~ expressionNodeData.components.length.to!string ~ " components:"); + foreach (subnode; expressionNodeData.components) + { + subnode.tree(tabCount + 1); + } + break; + default: + writeln(this.to!string); + break; + } + } } -struct ScopeParsingMode{ - bool allowDefiningObjects; - bool allowDefiningFunctions; - bool allowVariableDefinitions; - bool allowInlineVariableAssignments; - bool hasProperties; - bool isCommaSeperated; -} \ No newline at end of file +// struct ScopeParsingMode{ +// bool allowDefiningObjects; +// bool allowDefiningFunctions; +// bool allowVariableDefinitions; +// bool allowInlineVariableAssignments; +// bool hasProperties; +// bool isCommaSeperated; +// } +import std.container.array; + +Nullable!AstNode nextNonWhiteNode(Array!AstNode nodes, ref size_t index) +{ + Nullable!AstNode found; + while (nodes.length > index) + { + import parsing.tokenizer.tokens; + + AstNode node = nodes[index++]; + if (node.action == AstAction.TokenHolder && + (node.tokenBeingHeld.tokenVariety == TokenType.WhiteSpace + || node.tokenBeingHeld.tokenVariety == TokenType.Comment)) + continue; + found = node; + break; + } + return found; +} diff --git a/source/parsing/treegen/expressionParser.d b/source/parsing/treegen/expressionParser.d index 73e2b3b..4173059 100644 --- a/source/parsing/treegen/expressionParser.d +++ b/source/parsing/treegen/expressionParser.d @@ -1,47 +1,133 @@ module parsing.treegen.expressionParser; +import tern.typecons.common : Nullable, nullable; import parsing.treegen.astTypes; import parsing.tokenizer.tokens; -import parsing.tokenizer.make_tokens; -import tern.typecons.common : Nullable; +import parsing.treegen.tokenRelationships; import errors; +import std.stdio; +import std.container.array; +// Group letters.letters.letters into NamedUnit s +// Group Parenthesis into AstNode.Expression s to be parsed speratly +private AstNode[] phaseOne(Token[] tokens) +{ + AstNode[] ret; + AstNode[] parenthesisStack; + bool isLastTokenWhite = false; + for (size_t index = 0; index < tokens.length; index++) + { + Token token = tokens[index]; + if (token.tokenVariety == TokenType.OpenBraces) + { + AstNode newExpression = new AstNode(); + newExpression.action = AstAction.Expression; + newExpression.expressionNodeData = ExpressionNodeData( + token.value[0], + braceOpenToBraceClose[token.value[0]], + [] + ); + parenthesisStack ~= newExpression; + continue; + } + if (token.tokenVariety == TokenType.CloseBraces) + { + + if (parenthesisStack.length == 0) + throw new SyntaxError("Parenthesis closed but never opened"); -// Line types; -///// 1. Variable declairation. -///// NameUnit followed by NameUnit + AstNode node = parenthesisStack[$ - 1]; -enum LineType{ - Declaration, - Expression -} + if (node.expressionNodeData.closer != token.value[0]) + throw new SyntaxError("Parenthesis not closed with correct token"); + parenthesisStack.length--; -import std.stdio; -void parseLine(Token[] tokens, ScopeParsingMode mode) -{ - size_t index = 0; - Nullable!Token firstToken = tokens.nextNonWhiteToken(index); - if (firstToken.ptr == null) - throw new SyntaxError("Expected a statement"); - // Nullable!LineType; - // Determine line type - if (mode.allowVariableDefinitions){ - + if (parenthesisStack.length == 0) + ret ~= node; + else + parenthesisStack[$ - 1].expressionNodeData.components ~= node; + continue; + } + AstNode tokenToBeParsedLater = new AstNode(); + if (token.tokenVariety == TokenType.Letter){ + tokenToBeParsedLater.action = AstAction.NamedUnit; + tokenToBeParsedLater.namedUnit = tokens.genNameUnit(index); + index--; + }else if(token.tokenVariety == TokenType.Number){ + tokenToBeParsedLater.action = AstAction.LiteralUnit; + tokenToBeParsedLater.literalUnitCompenents = [token]; + } + else if(token.tokenVariety != TokenType.Comment){ + bool isWhite = token.tokenVariety == TokenType.WhiteSpace; + if (isWhite && isLastTokenWhite) continue; + isLastTokenWhite = isWhite; + + tokenToBeParsedLater.action = AstAction.TokenHolder; + tokenToBeParsedLater.tokenBeingHeld = token; + } + + if (parenthesisStack.length == 0) + ret ~= tokenToBeParsedLater; + else + parenthesisStack[$ - 1].expressionNodeData.components ~= tokenToBeParsedLater; } + return ret; +} + +private void operatorPairingPhase(Array!AstNode nodes){ + +} +// Handle function calls and operators +private void phaseTwo(Array!AstNode nodes){ + for (size_t index = 0; index < nodes.length; index++){ + AstNode node = nodes[index]; + if (node.action == AstAction.NamedUnit && index+1 < nodes.length && nodes[index+1].action == AstAction.Expression){ + AstNode functionCall = new AstNode(); + AstNode args = nodes[index+1]; + + Array!AstNode components; + components~=args.expressionNodeData.components; + phaseTwo(components); + scanAndMergeOperators(components); + args.expressionNodeData.components.length = components.data.length; + args.expressionNodeData.components[0..$] = components.data[0..$]; + + + functionCall.action = AstAction.Call; + functionCall.callNodeData = CallNodeData( + node.namedUnit, + args + ); + nodes[index] = functionCall; + nodes.linearRemove(nodes[index+1..index+2]); + } + else if (node.action == AstAction.Expression){ + Array!AstNode components; + components~=node.expressionNodeData.components; + phaseTwo(components); + scanAndMergeOperators(components); + node.expressionNodeData.components.length = components.data.length; + node.expressionNodeData.components[0..$] = components.data[0..$]; + } + } } +import parsing.treegen.treeGenUtils; +import parsing.treegen.tokenRelationships; unittest { - parseLine(tokenizeText("int x = 4;"), ScopeParsingMode( - false, - false, - true, - true, - false, - false - )); + + import parsing.tokenizer.make_tokens; + AstNode[] phaseOneNodes = phaseOne("math.sqrt(3*5+6*7/2)*3".tokenizeText); + + Array!AstNode nodes; + nodes~=phaseOneNodes; + phaseTwo(nodes); + scanAndMergeOperators(nodes); + nodes[0].tree(0); + } \ No newline at end of file diff --git a/source/parsing/treegen/scopeParser.d b/source/parsing/treegen/scopeParser.d index c16899a..25978c3 100644 --- a/source/parsing/treegen/scopeParser.d +++ b/source/parsing/treegen/scopeParser.d @@ -1,2 +1,63 @@ module parsing.treegen.scopeParser; +import parsing.tokenizer.tokens; +import parsing.treegen.tokenRelationships; +enum LineVariety +{ + SimpleExpression, + IfStatementWithScope, + IfStatementWithoutScope, + DeclarationLine, + DeclarationAndAssignment +} + +struct LineVarietyAndLength +{ + LineVariety lineVariety; + size_t length; +} + +LineVarietyAndLength getLineVarietyAndLength(Token[] tokens, size_t index) +{ + size_t temp_index = index; + + static foreach (i, func; [ + IfStatementWithScope, + IfStatementWithoutScope, + DeclarationLine, + DeclarationAndAssignment + ]) + { + if (func.matchesToken(tokens, temp_index)) + return LineVarietyAndLength( + [ + LineVariety.IfStatementWithScope, + LineVariety.IfStatementWithoutScope, + LineVariety.DeclarationLine, + LineVariety.DeclarationAndAssignment + ][i], temp_index - index + ); + temp_index = index; + } + + return LineVarietyAndLength(LineVariety.SimpleExpression, -1); +} +import std.stdio; +void parseLine(Token[] tokens, ref size_t index) +{ + LineVarietyAndLength lineVariety = tokens.getLineVarietyAndLength(index); +} + +unittest +{ + import std.stdio; + import parsing.tokenizer.make_tokens; + + // assert(LineVariety.IfStatementWithoutScope == getLineVariety("if (hello) world;".tokenizeText)); + // assert(LineVariety.IfStatementWithScope == getLineVariety("if (hello) {wo\n rl\nd};".tokenizeText)); + size_t i = 0; + // getLineVarietyAndLength("int x = 4;".tokenizeText, 0).writeln; + // parseLine("int x = 4;".tokenizeText, i); + // DeclarationLine.matchesToken() + +} diff --git a/source/parsing/treegen/tokenRelationships.d b/source/parsing/treegen/tokenRelationships.d index 5673c5b..499a311 100644 --- a/source/parsing/treegen/tokenRelationships.d +++ b/source/parsing/treegen/tokenRelationships.d @@ -1,29 +1,279 @@ module parsing.treegen.tokenRelationships; import parsing.tokenizer.tokens; import parsing.treegen.astTypes; +import parsing.treegen.treeGenUtils; +import tern.typecons.common : Nullable; -enum OperatorOrder{ +enum TokenGrepMethod +{ + Glob, + Whitespace, + MatchesTokens, + MatchesTokenType, + Scope, + ConditionWithCertainReturnType, + NameUnit, + PossibleCommaSeperated +} + +struct TokenGrepPacket +{ + TokenGrepMethod method; + union + { + Token[] tokens; + TokenGrepPacket[] packets; + } +} + +TokenGrepPacket TokenGrepPacketToken(TokenGrepMethod method, Token[] list) +{ + TokenGrepPacket ret; + ret.method = method; + ret.tokens = list; + return ret; +} + +TokenGrepPacket TokenGrepPacketRec(TokenGrepMethod method, TokenGrepPacket[] list) +{ + TokenGrepPacket ret; + ret.method = method; + ret.packets = list; + return ret; +} + +const TokenGrepPacket[] IfStatementWithoutScope = [ + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.Letter, "if".makeUnicodeString) + ]), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.OpenBraces, ['(']) + ]), + TokenGrepPacketToken(TokenGrepMethod.Glob, []), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.CloseBraces, [')']) + ]), + TokenGrepPacketToken(TokenGrepMethod.Glob, []), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.Semicolon, [';']) + ]), +]; +const TokenGrepPacket[] IfStatementWithScope = [ + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.Letter, "if".makeUnicodeString) + ]), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.OpenBraces, ['(']) + ]), + TokenGrepPacketToken(TokenGrepMethod.Glob, []), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.CloseBraces, [')']) + ]), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.OpenBraces, ['{']) + ]), + TokenGrepPacketToken(TokenGrepMethod.Glob, []), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokens, [ + Token(TokenType.CloseBraces, ['}']) + ]), +]; + +// int x, y, z; +const TokenGrepPacket[] DeclarationLine = [ + TokenGrepPacketToken(TokenGrepMethod.NameUnit, []), + TokenGrepPacketRec(TokenGrepMethod.PossibleCommaSeperated, [ + TokenGrepPacketToken(TokenGrepMethod.MatchesTokenType, [ + Token(TokenType.Letter, []) + ]) + ]), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokenType, [ + Token(TokenType.Semicolon, []) + ]) +]; +// int x, y, z = [1, 2, 3]; +const TokenGrepPacket[] DeclarationAndAssignment = [ + TokenGrepPacketToken(TokenGrepMethod.NameUnit, []), + TokenGrepPacketRec(TokenGrepMethod.PossibleCommaSeperated, [ + TokenGrepPacketToken(TokenGrepMethod.MatchesTokenType, [ + Token(TokenType.Letter, []) + ]) + ]), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokenType, [ + Token(TokenType.Equals, []) + ]), + TokenGrepPacketToken(TokenGrepMethod.Glob, []), + TokenGrepPacketToken(TokenGrepMethod.MatchesTokenType, [ + Token(TokenType.Semicolon, []) + ]) +]; + +bool matchesToken(in TokenGrepPacket[] testWith, Token[] tokens) +{ + size_t index = 0; + return matchesToken(testWith, tokens, index); +} + +import std.stdio; + +bool matchesToken(in TokenGrepPacket[] testWith, Token[] tokens, ref size_t index) +{ + foreach (testIndex, packet; testWith) + { + switch (packet.method) + { + case TokenGrepMethod.NameUnit: + if (index >= tokens.length) + return false; + NameUnit name = genNameUnit(tokens, index); + if (name.names.length == 0) + return false; + break; + case TokenGrepMethod.MatchesTokenType: + Nullable!Token potential = tokens.nextNonWhiteToken(index); + if (potential.ptr == null) + return false; + Token token = potential; + bool doRet = true; + + foreach (const(Token) potentialMatch; packet.tokens) + { + if (potentialMatch.tokenVariety == token.tokenVariety) + doRet = false; + } + if (doRet) + return false; + break; + case TokenGrepMethod.MatchesTokens: + foreach (const(Token) testToken; packet.tokens) + { + Nullable!Token tokenNullable = tokens.nextNonWhiteToken(index); + if (tokenNullable.ptr == null) + return false; + Token token = tokenNullable; + if (token.tokenVariety != testToken.tokenVariety || token.value != testToken.value) + return false; + } + break; + case TokenGrepMethod.PossibleCommaSeperated: + if (index >= tokens.length) + return false; + Token[][] tstack; + Token[] currentGroup; + size_t maxComma = 0; + foreach (secountIndex, token; tokens[index .. $]) + { + if (token.tokenVariety == TokenType.Comma) + { + maxComma = secountIndex + 1; + tstack ~= currentGroup; + currentGroup = new Token[0]; + continue; + } + currentGroup ~= token; + } + size_t searchExtent; + tstack ~= currentGroup; + foreach (Token[] tokenGroup; tstack) + { + searchExtent = 0; + + if (!matchesToken(packet.packets, tokenGroup, searchExtent)) + return false; + } + index += maxComma + searchExtent; + + break; + + case TokenGrepMethod.Glob: + if (testWith[testIndex + 1 .. $].matchesToken(tokens[index .. $])) + return true; + int braceDeph = 0; + while (true) + { + Nullable!Token tokenNullable = tokens.nextToken(index); + if (tokenNullable.ptr == null) + return false; + Token token = tokenNullable; + if (token.tokenVariety == TokenType.OpenBraces) + braceDeph += 1; + else if (token.tokenVariety == TokenType.CloseBraces && braceDeph != 0) + braceDeph -= 1; + else if (braceDeph == 0) + { + size_t index_inc; + if (testWith[testIndex + 1 .. $].matchesToken(tokens[index .. $], index_inc)) + { + index += index_inc; + return true; + } + } + } + break; + default: + assert(0, "Not implemented"); + + } + } + + return true; +} + +unittest +{ + import parsing.tokenizer.make_tokens; + + assert(DeclarationLine.matchesToken( + tokenizeText("mod.type.submod x,r,q,a, A_variable \n\r\t ;") + )); + assert(DeclarationLine.matchesToken(tokenizeText("mod.type.submod x, a, e ,y;"))); + assert(!DeclarationLine.matchesToken(tokenizeText(";mod.type x;"))); + assert(!DeclarationLine.matchesToken(tokenizeText("123 mod.type x;"))); + assert(!DeclarationLine.matchesToken(tokenizeText("mod.type x = 5;"))); + assert(DeclarationAndAssignment.matchesToken( + tokenizeText("mod.type x, y, z , o = someFunc();") + )); + assert(!DeclarationAndAssignment.matchesToken(tokenizeText("someFunc();"))); + assert(!DeclarationLine.matchesToken(tokenizeText("someFunc();"))); + assert(IfStatementWithoutScope.matchesToken(tokenizeText("if (hello) testText;"))); + assert(IfStatementWithoutScope.matchesToken(tokenizeText("if (hello) v = ()=>print(1235);"))); + assert(IfStatementWithScope.matchesToken(tokenizeText("if (hello){}"))); + assert(IfStatementWithScope.matchesToken(tokenizeText("if (hello world){}"))); + assert(IfStatementWithScope.matchesToken( + tokenizeText( + "if (hello world){\n\n\r if(Some possible nested code) still works;}") + )); + assert( + DeclarationAndAssignment.matchesToken(tokenizeText("int x = 4;")) + ); +} + +enum OperatorOrder +{ LeftToRight, RightToLeft } + struct OperatorPrecedenceLayer { - OperatorOrder order; + OperatorOrder order; OperationPrecedenceEntry[] layer; } -struct OperationPrecedenceEntry{ +struct OperationPrecedenceEntry +{ OperationVariety operation; // These tokens are just the template used for // determining what is parsed in what order. - + // TokenType of Operator is the operator to match to. // TokenType of Filler is an expression (or equivelent) const(Token[]) tokens; } -private Token OPR(dchar o){ - return Token(TokenType.Operator, [o]); + +private Token OPR(dchar o) +{ + return Token(o != '=' ? TokenType.Operator : TokenType.Equals, [o]); } // https://en.cppreference.com/w/c/language/operator_precedence @@ -32,69 +282,257 @@ private Token OPR(dchar o){ // of each layer they are read left to right, or right to left. const OperatorPrecedenceLayer[] operatorPrecedence = [ + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // // TODO: Unary + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.PreIncrement, [ + // OPR('+'), OPR('+'), Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.PreDecrement, [ + // OPR('-'), OPR('-'), Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.PostIncrement, [ + // Token(TokenType.Filler), OPR('+'), OPR('+') + // ]), + // OperationPrecedenceEntry(OperationVariety.PostDecrement, [ + // Token(TokenType.Filler), OPR('-'), OPR('-') + // ]), + + // OperationPrecedenceEntry(OperationVariety.LogicalNot, [ + // OPR('!'), Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.BitwiseNot, [ + // OPR('~'), Token(TokenType.Filler) + // ]), + // ]), OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - // TODO: Unary - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.PreIncrement, [OPR('+'), OPR('+'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.PreDecrement, [OPR('-'), OPR('-'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.PostIncrement, [Token(TokenType.Filler), OPR('+'), OPR('+')]), - OperationPrecedenceEntry(OperationVariety.PostDecrement, [Token(TokenType.Filler), OPR('-'), OPR('-')]), - - OperationPrecedenceEntry(OperationVariety.LogicalNot, [OPR('!'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.BitwiseNot, [OPR('~'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.Multiply,[Token(TokenType.Filler), OPR('*'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.Divide, [Token(TokenType.Filler), OPR('/'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.Mod, [Token(TokenType.Filler), OPR('%'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.Add, [Token(TokenType.Filler), OPR('+'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.Substract, [Token(TokenType.Filler), OPR('-'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.BitshiftLeftSigned, [Token(TokenType.Filler), OPR('<'), OPR('<'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.BitshiftRightSigned, [Token(TokenType.Filler), OPR('>'), OPR('>'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.LessThanEq, [Token(TokenType.Filler), OPR('<'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.GreaterThanEq, [Token(TokenType.Filler), OPR('>'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.LessThan, [Token(TokenType.Filler), OPR('<'), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.GreaterThan, [Token(TokenType.Filler), OPR('>'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.NotEqualTo, [Token(TokenType.Filler), OPR('!'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.EqualTo ,[Token(TokenType.Filler), OPR('='), OPR('='), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.BitwiseAnd, [Token(TokenType.Filler), OPR('&'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.BitwiseXor, [Token(TokenType.Filler), OPR('^'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.BitwiseOr, [Token(TokenType.Filler), OPR('|'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.LogicalAnd, [Token(TokenType.Filler), OPR('&'), OPR('&'), Token(TokenType.Filler)]), - ]), + OperationPrecedenceEntry(OperationVariety.Multiply, [ + Token(TokenType.Filler), OPR('*'), Token(TokenType.Filler) + ]), + OperationPrecedenceEntry(OperationVariety.Divide, [ + Token(TokenType.Filler), OPR('/'), Token(TokenType.Filler) + ]), + OperationPrecedenceEntry(OperationVariety.Mod, [ + Token(TokenType.Filler), OPR('%'), Token(TokenType.Filler) + ]), + ]), OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ - OperationPrecedenceEntry(OperationVariety.LogicalOr, [Token(TokenType.Filler), OPR('|'), OPR('|'), Token(TokenType.Filler)]), - ]), - OperatorPrecedenceLayer(OperatorOrder.RightToLeft, [ - OperationPrecedenceEntry(OperationVariety.Assignment, [Token(TokenType.Filler), OPR('='), Token(TokenType.Filler)]), // asignment - OperationPrecedenceEntry(OperationVariety.AddEq, [Token(TokenType.Filler), OPR('+'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.SubstractEq, [Token(TokenType.Filler), OPR('-'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.MultiplyEq, [Token(TokenType.Filler), OPR('*'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.DivideEq, [Token(TokenType.Filler), OPR('/'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.ModEq, [Token(TokenType.Filler), OPR('%'), OPR('='), Token(TokenType.Filler)]), - - OperationPrecedenceEntry(OperationVariety.BitwiseAndEq, [Token(TokenType.Filler), OPR('&'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.BitwiseXorEq, [Token(TokenType.Filler), OPR('^'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.BitwiseOrEq, [Token(TokenType.Filler), OPR('|'), OPR('='), Token(TokenType.Filler)]), - OperationPrecedenceEntry(OperationVariety.BitwiseNotEq, [Token(TokenType.Filler), OPR('~'), OPR('='), Token(TokenType.Filler)]), - ]) - - -]; \ No newline at end of file + OperationPrecedenceEntry(OperationVariety.Add, [ + Token(TokenType.Filler), OPR('+'), Token(TokenType.Filler) + ]), + OperationPrecedenceEntry(OperationVariety.Substract, [ + Token(TokenType.Filler), OPR('-'), Token(TokenType.Filler) + ]), + ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.BitshiftLeftSigned, [ + // Token(TokenType.Filler), OPR('<'), OPR('<'), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.BitshiftRightSigned, [ + // Token(TokenType.Filler), OPR('>'), OPR('>'), + // Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.LessThanEq, [ + // Token(TokenType.Filler), OPR('<'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.GreaterThanEq, [ + // Token(TokenType.Filler), OPR('>'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.LessThan, [ + // Token(TokenType.Filler), OPR('<'), Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.GreaterThan, [ + // Token(TokenType.Filler), OPR('>'), Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.NotEqualTo, [ + // Token(TokenType.Filler), OPR('!'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.EqualTo, [ + // Token(TokenType.Filler), OPR('='), OPR('='), + // Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.BitwiseAnd, [ + // Token(TokenType.Filler), OPR('&'), Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.BitwiseXor, [ + // Token(TokenType.Filler), OPR('^'), Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.BitwiseOr, [ + // Token(TokenType.Filler), OPR('|'), Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.LogicalAnd, [ + // Token(TokenType.Filler), OPR('&'), OPR('&'), + // Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.LeftToRight, [ + // OperationPrecedenceEntry(OperationVariety.LogicalOr, [ + // Token(TokenType.Filler), OPR('|'), OPR('|'), + // Token(TokenType.Filler) + // ]), + // ]), + // OperatorPrecedenceLayer(OperatorOrder.RightToLeft, [ + // OperationPrecedenceEntry(OperationVariety.Assignment, [ + // Token(TokenType.Filler), OPR('='), Token(TokenType.Filler) + // ]), // asignment + // OperationPrecedenceEntry(OperationVariety.AddEq, [ + // Token(TokenType.Filler), OPR('+'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.SubstractEq, [ + // Token(TokenType.Filler), OPR('-'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.MultiplyEq, [ + // Token(TokenType.Filler), OPR('*'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.DivideEq, [ + // Token(TokenType.Filler), OPR('/'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.ModEq, [ + // Token(TokenType.Filler), OPR('%'), OPR('='), + // Token(TokenType.Filler) + // ]), + + // OperationPrecedenceEntry(OperationVariety.BitwiseAndEq, [ + // Token(TokenType.Filler), OPR('&'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.BitwiseXorEq, [ + // Token(TokenType.Filler), OPR('^'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.BitwiseOrEq, [ + // Token(TokenType.Filler), OPR('|'), OPR('='), + // Token(TokenType.Filler) + // ]), + // OperationPrecedenceEntry(OperationVariety.BitwiseNotEq, [ + // Token(TokenType.Filler), OPR('~'), OPR('='), + // Token(TokenType.Filler) + // ]), + // ]) + +]; +import std.container.array; + +private bool testAstEntry(const(OperationPrecedenceEntry) entry, AstNode[] nodes) +{ + if (entry.tokens.length > nodes.length) + return false; + for (size_t index = 0; index < entry.tokens.length; index++) + { + switch (entry.tokens[index].tokenVariety) + { + case TokenType.Filler: + AstNode node = nodes[index]; + if (node.action == AstAction.TokenHolder || node.action == AstAction.Keyword || node.action == AstAction + .Scope) + return false; + break; + case TokenType.Operator: + AstNode node = nodes[index]; + if (node.action != AstAction.TokenHolder) + return false; + Token token = node.tokenBeingHeld; + if (token.tokenVariety != TokenType.Equals && token.tokenVariety != TokenType.Operator) + return false; + if (token.value != entry.tokens[index].value) + return false; + break; + default: + // entry.tokens[index].writeln; + assert(0); + + } + } + return true; +} + +private void merge(const(OperationPrecedenceEntry) entry, ref Array!AstNode nodes, size_t startIndex) +{ + AstNode[] nodeData; + for (size_t index = 0; index < entry.tokens.length; index++) + { + switch (entry.tokens[index].tokenVariety) + { + case TokenType.Filler: + nodeData ~= nodes[startIndex + index]; + break; + case TokenType.Operator: + break; + default: + assert(0); + } + } + AstNode oprNode = new AstNode(); + oprNode.action = AstAction.DoubleArgumentOperation; + if (nodeData.length == 0) + assert(0); + if (nodeData.length == 1) + { + oprNode.action = AstAction.SingleArgumentOperation; + oprNode.singleArgumentOperationNodeData = SingleArgumentOperationNodeData( + entry.operation, + nodeData[0], + ); + } + if (nodeData.length == 2) + oprNode.doubleArgumentOperationNodeData = DoubleArgumentOperationNodeData( + entry.operation, + nodeData[0], + nodeData[1] + ); + + nodes[startIndex] = oprNode; + nodes.linearRemove(nodes[startIndex + 1 .. startIndex + entry.tokens.length]); + +} + +void scanAndMergeOperators(Array!AstNode nodes) +{ + // OperatorOrder order; + auto data = nodes.data; + static foreach (layer; operatorPrecedence) + { + static if (layer.order == OperatorOrder.LeftToRight) + { + for (size_t index = 0; index < nodes.length; index++) + { + foreach (entry; layer.layer) + { + if (entry.testAstEntry(data[index .. $])) + entry.merge(nodes, index); + } + + } + } + static if (layer.order == OperatorOrder.RightToLeft){ + for (size_t index = nodes.length; index != -1; index--){ + foreach (entry; layer.layer) + { + if (entry.testAstEntry(data[index .. $])) + entry.merge(nodes, index); + } + } + } + } +} diff --git a/source/parsing/treegen/treeGenUtils.d b/source/parsing/treegen/treeGenUtils.d index f806bf8..dcb2a93 100644 --- a/source/parsing/treegen/treeGenUtils.d +++ b/source/parsing/treegen/treeGenUtils.d @@ -4,11 +4,11 @@ import parsing.treegen.astTypes; import parsing.tokenizer.tokens; import tern.typecons.common : Nullable, nullable; - NameUnit genNameUnit(Token[] tokens, ref size_t index) { NameUnit ret; Nullable!Token tokenNullable = tokens.nextNonWhiteToken(index); + index--; Token token; // An attempt to generate a name at an EOF @@ -16,71 +16,34 @@ NameUnit genNameUnit(Token[] tokens, ref size_t index) return ret; token = tokenNullable; - while (token.tokenVariety == TokenType.Letter || token.tokenVariety == TokenType.Period) + while (token.tokenVariety == TokenType.Letter || token.tokenVariety == TokenType.Number || token.tokenVariety == TokenType.Period) { - - if (token.tokenVariety == TokenType.Period) - continue; - - ret.names ~= token.value; + + if (token.tokenVariety != TokenType.Period) + ret.names ~= token.value; + tokenNullable = tokens.nextToken(index); // We hit an EOF if (tokenNullable.ptr == null) return ret; + token = tokenNullable; + } return ret; } -// First step of the AST gen process. Puts the tokens into -// AstNode objects and extracts parenthesis into deeper -// levels of nesting so that later they can be recursivly parsed -AstNode[] parenthesisExtract(Token[] tokens) +unittest { - AstNode[] ret; - AstNode[] parenthesisStack; - foreach (Token token; tokens) - { - if (token.tokenVariety == TokenType.OpenBraces) - { - AstNode newExpression; - newExpression.action = AstAction.Expression; - newExpression.expressionNodeData = ExpressionNodeData( - token.value[0], - braceOpenToBraceClose[token.value[0]], - [] - ); - parenthesisStack ~= newExpression; - continue; - } - if (token.tokenVariety == TokenType.CloseBraces) - { - if (parenthesisStack.length == 0) - throw new SyntaxError("Parenthesis closed but never opened"); - - AstNode node = parenthesisStack[$ - 1]; - - if (node.expressionNodeData.closer != token.value[0]) - throw new SyntaxError("Parenthesis not closed with correct token"); - - parenthesisStack.length--; - - if (parenthesisStack.length == 0) - ret ~= node; - else - parenthesisStack[$ - 1].expressionNodeData.components ~= node; - continue; - } - - AstNode tokenToBeParsedLater; - tokenToBeParsedLater.action = AstAction.TokenHolder; - tokenToBeParsedLater.tokenBeingHeld = token; - if (parenthesisStack.length == 0) - ret ~= tokenToBeParsedLater; - else - parenthesisStack[$ - 1].expressionNodeData.components ~= tokenToBeParsedLater; - } - return ret; -} \ No newline at end of file + import parsing.tokenizer.make_tokens; + + size_t s = 0; + assert("int x = 4;".tokenizeText.genNameUnit(s).names == ["int".makeUnicodeString]); + s = 0; + assert("std.int x = 4;".tokenizeText.genNameUnit(s).names == [ + "std".makeUnicodeString, + "int".makeUnicodeString + ]); +}