Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
cetio committed Apr 21, 2024
2 parents 389b66d + 2de62dd commit b602cc8
Show file tree
Hide file tree
Showing 7 changed files with 823 additions and 186 deletions.
17 changes: 16 additions & 1 deletion source/parsing/tokenizer/make_tokens.d
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,22 @@ private Token[] protoTokenize(string input)

}
TokenType tokenType = getVarietyOfLetter(symbol);
tokens ~= Token(tokenType, [symbol], index);
Token token = Token(tokenType, [symbol], index);
if (tokenType == TokenType.Quotation)
{
dchar last = symbol;
index++;
while (index < chars.length)
{
dchar symbol2 = chars[index];
token.value ~= symbol2;
if (symbol2 == symbol && last != '\\')
break;
last = symbol2;
index++;
}
}
tokens ~= token;
}
return tokens;
}
Expand Down
15 changes: 9 additions & 6 deletions source/parsing/tokenizer/tokens.d
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ enum TokenType
Letter,
Semicolon,
Colon,
Comma,
Pipe,
WhiteSpace,
Equals,
Expand Down Expand Up @@ -69,7 +70,7 @@ bool isSingleLineComment(dchar first, dchar secound)
{
static foreach (const dchar[] style; validSingleLineCommentStyles)
{
if (style[0] == first || style[0] == secound)
if (style[0] == first && style[0] == secound)
return true;
}
return false;
Expand Down Expand Up @@ -115,13 +116,15 @@ TokenType getVarietyOfLetter(dchar symbol)
return TokenType.Pipe;
case '.':
return TokenType.Period;
case ',':
return TokenType.Comma;
default:
break;
}

if (isDigit(symbol))
return TokenType.Number;
if (isAlpha(symbol))
if (isAlpha(symbol) || symbol == '_')
return TokenType.Letter;
if (isWhite(symbol))
return TokenType.WhiteSpace;
Expand All @@ -144,14 +147,14 @@ struct Token
size_t startingIndex;
}

import tern.typecons.common : Nullable;
import tern.typecons.common : Nullable, nullable;

Nullable!Token nextToken(Token[] tokens, ref size_t index)
{
Nullable!Token found;
if (tokens.length >= index)
return found;
found = tokens[index++];
if (tokens.length <= index+1)
return nullable!Token(null);
found = tokens[++index];
return found;
}

Expand Down
121 changes: 96 additions & 25 deletions source/parsing/treegen/astTypes.d
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ enum OperationVariety

struct SingleArgumentOperationNodeData
{
OperationVariety pperationVariety;
AstNode* value;
OperationVariety operationVariety;
AstNode value;
}

struct DoubleArgumentOperationNodeData
{
OperationVariety pperationVariety;
AstNode* left;
AstNode* right;
OperationVariety operationVariety;
AstNode left;
AstNode right;
}

struct ExpressionNodeData
Expand All @@ -128,10 +128,10 @@ struct ExpressionNodeData
struct CallNodeData
{
NameUnit func;
AstNode* args;
AstNode args;
}

struct AstNode
class AstNode
{
AstAction action;
union
Expand Down Expand Up @@ -159,26 +159,97 @@ struct AstNode
sink("{");
switch (action)
{
case AstAction.Keyword:
sink(keywordNodeData.to!string);
break;
case AstAction.TokenHolder:
sink(tokenBeingHeld.to!string);
break;
case AstAction.Expression:
sink(expressionNodeData.components.to!string);
break;
default: break;
case AstAction.Keyword:
sink(keywordNodeData.to!string);
break;
case AstAction.TokenHolder:
sink(tokenBeingHeld.to!string);
break;
case AstAction.Expression:
sink(expressionNodeData.components.to!string);
break;
case AstAction.NamedUnit:
sink(namedUnit.names.to!string);
break;
case AstAction.Call:
sink(callNodeData.func.names.to!string);
sink("(\n");
sink(callNodeData.args.to!string);
sink("\n)");
break;
case AstAction.LiteralUnit:
sink(literalUnitCompenents.to!string);
break;
case AstAction.DoubleArgumentOperation:
sink(doubleArgumentOperationNodeData.operationVariety.to!string);
sink(", ");
sink(doubleArgumentOperationNodeData.left.to!string);
sink(", ");
sink(doubleArgumentOperationNodeData.right.to!string);
break;
default:
break;
}
sink("}");
}

void tree(size_t tabCount)
{
import std.stdio;
import std.conv;

foreach (i; 0 .. tabCount)
write("| ");

switch (action)
{
case AstAction.Call:
writeln(callNodeData.func.to!string ~ ":");
callNodeData.args.tree(tabCount + 1);
break;
case AstAction.DoubleArgumentOperation:
writeln(doubleArgumentOperationNodeData.operationVariety.to!string ~ ":");
doubleArgumentOperationNodeData.left.tree(tabCount + 1);
doubleArgumentOperationNodeData.right.tree(tabCount + 1);
break;
case AstAction.Expression:
writeln("Result of expression with " ~ expressionNodeData.components.length.to!string ~ " components:");
foreach (subnode; expressionNodeData.components)
{
subnode.tree(tabCount + 1);
}
break;
default:
writeln(this.to!string);
break;
}
}
}

struct ScopeParsingMode{
bool allowDefiningObjects;
bool allowDefiningFunctions;
bool allowVariableDefinitions;
bool allowInlineVariableAssignments;
bool hasProperties;
bool isCommaSeperated;
}
// struct ScopeParsingMode{
// bool allowDefiningObjects;
// bool allowDefiningFunctions;
// bool allowVariableDefinitions;
// bool allowInlineVariableAssignments;
// bool hasProperties;
// bool isCommaSeperated;
// }
import std.container.array;

Nullable!AstNode nextNonWhiteNode(Array!AstNode nodes, ref size_t index)
{
Nullable!AstNode found;
while (nodes.length > index)
{
import parsing.tokenizer.tokens;

AstNode node = nodes[index++];
if (node.action == AstAction.TokenHolder &&
(node.tokenBeingHeld.tokenVariety == TokenType.WhiteSpace
|| node.tokenBeingHeld.tokenVariety == TokenType.Comment))
continue;
found = node;
break;
}
return found;
}
142 changes: 114 additions & 28 deletions source/parsing/treegen/expressionParser.d
Original file line number Diff line number Diff line change
@@ -1,47 +1,133 @@
module parsing.treegen.expressionParser;

import tern.typecons.common : Nullable, nullable;
import parsing.treegen.astTypes;
import parsing.tokenizer.tokens;
import parsing.tokenizer.make_tokens;
import tern.typecons.common : Nullable;
import parsing.treegen.tokenRelationships;
import errors;
import std.stdio;
import std.container.array;

// Group letters.letters.letters into NamedUnit s
// Group Parenthesis into AstNode.Expression s to be parsed speratly
private AstNode[] phaseOne(Token[] tokens)
{
AstNode[] ret;
AstNode[] parenthesisStack;
bool isLastTokenWhite = false;
for (size_t index = 0; index < tokens.length; index++)
{
Token token = tokens[index];
if (token.tokenVariety == TokenType.OpenBraces)
{
AstNode newExpression = new AstNode();
newExpression.action = AstAction.Expression;
newExpression.expressionNodeData = ExpressionNodeData(
token.value[0],
braceOpenToBraceClose[token.value[0]],
[]
);
parenthesisStack ~= newExpression;
continue;
}
if (token.tokenVariety == TokenType.CloseBraces)
{

if (parenthesisStack.length == 0)
throw new SyntaxError("Parenthesis closed but never opened");

// Line types;
///// 1. Variable declairation.
///// NameUnit followed by NameUnit
AstNode node = parenthesisStack[$ - 1];

enum LineType{
Declaration,
Expression
}
if (node.expressionNodeData.closer != token.value[0])
throw new SyntaxError("Parenthesis not closed with correct token");

parenthesisStack.length--;

import std.stdio;
void parseLine(Token[] tokens, ScopeParsingMode mode)
{
size_t index = 0;
Nullable!Token firstToken = tokens.nextNonWhiteToken(index);
if (firstToken.ptr == null)
throw new SyntaxError("Expected a statement");
// Nullable!LineType;
// Determine line type
if (mode.allowVariableDefinitions){

if (parenthesisStack.length == 0)
ret ~= node;
else
parenthesisStack[$ - 1].expressionNodeData.components ~= node;
continue;
}
AstNode tokenToBeParsedLater = new AstNode();
if (token.tokenVariety == TokenType.Letter){
tokenToBeParsedLater.action = AstAction.NamedUnit;
tokenToBeParsedLater.namedUnit = tokens.genNameUnit(index);
index--;
}else if(token.tokenVariety == TokenType.Number){
tokenToBeParsedLater.action = AstAction.LiteralUnit;
tokenToBeParsedLater.literalUnitCompenents = [token];
}
else if(token.tokenVariety != TokenType.Comment){
bool isWhite = token.tokenVariety == TokenType.WhiteSpace;
if (isWhite && isLastTokenWhite) continue;
isLastTokenWhite = isWhite;

tokenToBeParsedLater.action = AstAction.TokenHolder;
tokenToBeParsedLater.tokenBeingHeld = token;
}

if (parenthesisStack.length == 0)
ret ~= tokenToBeParsedLater;
else
parenthesisStack[$ - 1].expressionNodeData.components ~= tokenToBeParsedLater;
}
return ret;
}

private void operatorPairingPhase(Array!AstNode nodes){

}


// Handle function calls and operators
private void phaseTwo(Array!AstNode nodes){
for (size_t index = 0; index < nodes.length; index++){
AstNode node = nodes[index];
if (node.action == AstAction.NamedUnit && index+1 < nodes.length && nodes[index+1].action == AstAction.Expression){
AstNode functionCall = new AstNode();
AstNode args = nodes[index+1];

Array!AstNode components;
components~=args.expressionNodeData.components;
phaseTwo(components);
scanAndMergeOperators(components);
args.expressionNodeData.components.length = components.data.length;
args.expressionNodeData.components[0..$] = components.data[0..$];


functionCall.action = AstAction.Call;
functionCall.callNodeData = CallNodeData(
node.namedUnit,
args
);
nodes[index] = functionCall;
nodes.linearRemove(nodes[index+1..index+2]);
}
else if (node.action == AstAction.Expression){
Array!AstNode components;
components~=node.expressionNodeData.components;
phaseTwo(components);
scanAndMergeOperators(components);
node.expressionNodeData.components.length = components.data.length;
node.expressionNodeData.components[0..$] = components.data[0..$];
}
}
}

import parsing.treegen.treeGenUtils;

import parsing.treegen.tokenRelationships;
unittest
{
parseLine(tokenizeText("int x = 4;"), ScopeParsingMode(
false,
false,
true,
true,
false,
false
));

import parsing.tokenizer.make_tokens;
AstNode[] phaseOneNodes = phaseOne("math.sqrt(3*5+6*7/2)*3".tokenizeText);

Array!AstNode nodes;
nodes~=phaseOneNodes;
phaseTwo(nodes);
scanAndMergeOperators(nodes);
nodes[0].tree(0);

}
Loading

0 comments on commit b602cc8

Please sign in to comment.