From cf7a605431f3f960f914295441348bc29e050073 Mon Sep 17 00:00:00 2001 From: Nebojsa Obradovic Date: Fri, 31 Jan 2020 14:57:21 +0100 Subject: [PATCH] Initial commit --- CMakeLists.txt | 19 +++ README.md | 32 +++++ astnode.cpp | 11 ++ astnode.h | 30 +++++ binaryoperator.cpp | 31 +++++ binaryoperator.h | 48 +++++++ interpreter.cpp | 36 ++++++ interpreter.h | 36 ++++++ lexer.cpp | 51 ++++++++ lexer.h | 45 +++++++ main.cpp | 58 +++++++++ number.cpp | 10 ++ number.h | 40 ++++++ parser.cpp | 94 ++++++++++++++ parser.h | 87 +++++++++++++ tests/CMakeLists.txt | 62 +++++++++ tests/main.cpp | 9 ++ tests/tst_simpletest.h | 282 +++++++++++++++++++++++++++++++++++++++++ token.h | 43 +++++++ 19 files changed, 1024 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 README.md create mode 100644 astnode.cpp create mode 100644 astnode.h create mode 100644 binaryoperator.cpp create mode 100644 binaryoperator.h create mode 100644 interpreter.cpp create mode 100644 interpreter.h create mode 100644 lexer.cpp create mode 100644 lexer.h create mode 100644 main.cpp create mode 100644 number.cpp create mode 100644 number.h create mode 100644 parser.cpp create mode 100644 parser.h create mode 100644 tests/CMakeLists.txt create mode 100644 tests/main.cpp create mode 100644 tests/tst_simpletest.h create mode 100644 token.h diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..e6012f4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.5) + +project(Parser LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Create a list with all .cpp source files +set( project_sources + main.cpp + parser.cpp + interpreter.cpp + lexer.cpp + astnode.cpp + number.cpp + binaryoperator.cpp +) + +add_executable(Parser ${project_sources}) diff --git a/README.md b/README.md new file mode 100644 index 0000000..887a5c0 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# Basic calculator parser +A simple tool to parse one digit calculator syntax using only **+**, **-**, * and **/** operators. It is allowed to use parenthesis **(**,**)** as well. +## Usage +*Parser* tool will try to parse any expression that come from standard input: +```bash +$ echo "2+2" | Parser +4 +$ Parser < expression.txt +44.8 +``` +## Building code +Use cmake to build the code. For example: +```bash +$ cd Parser +$ mkdir bin +$ cd bin +$ cmake .. +... +$ make +$ echo "1+1" | ./Parser +2 +``` +## Unit tests +Project also has unit-tests and to build them do similar: +```bash +$ cd Parser +$ mkdir test-bin +$ cd test-bin +$ cmake ../test +... +$ make +``` diff --git a/astnode.cpp b/astnode.cpp new file mode 100644 index 0000000..cb9ae4f --- /dev/null +++ b/astnode.cpp @@ -0,0 +1,11 @@ +#include "astnode.h" + +AstNode::AstNode() +{ + +} + +AstNode::~AstNode() +{ + +} diff --git a/astnode.h b/astnode.h new file mode 100644 index 0000000..6ee0c63 --- /dev/null +++ b/astnode.h @@ -0,0 +1,30 @@ +#ifndef ASTNODE_H +#define ASTNODE_H + +/** + * @brief The AstNode class represents a base object in the AST + * + * The AstNode should be used to represent one node in a Abstract Syntax Tree (AST). This class + * should be used to represent an AST tree for grammar defined in \see Parser class. There are + * two possible nodes: binary operator (+,-,* and /) and a digit (number) \see BinaryOperator and + * \see Number. + */ +class AstNode +{ +public: + enum AstNodeType { + ANT_BINARY_OPERATOR, + ANT_NUMBER + }; + + AstNode(); + virtual ~AstNode() = 0; + + /** + * @brief type returns type of the AST node. + */ + virtual AstNodeType type() const = 0; + +}; + +#endif // ASTNODE_H diff --git a/binaryoperator.cpp b/binaryoperator.cpp new file mode 100644 index 0000000..0f9555a --- /dev/null +++ b/binaryoperator.cpp @@ -0,0 +1,31 @@ +#include "binaryoperator.h" + + +BinaryOperator::BinaryOperator(const AstNode *left, const Token &op, const AstNode *right) + : _left(left), _right(right), _operator(op) +{ + if (left == nullptr || right == nullptr) { + throw "Invalid params."; + } + if (&left == &right) { + throw "Left and right are the same."; + } + + switch(_operator.type()) { + case Token::TT_PLUS: + case Token::TT_MINUS: + case Token::TT_MULTIPLY: + case Token::TT_DIVIDE: + // Allowed operators. + break; + default: + throw "Invalid operator."; + } +} + +BinaryOperator::~BinaryOperator() +{ + delete _left; + delete _right; + _left = _right = nullptr; +} diff --git a/binaryoperator.h b/binaryoperator.h new file mode 100644 index 0000000..3291c21 --- /dev/null +++ b/binaryoperator.h @@ -0,0 +1,48 @@ +#ifndef BINARYOPERATOR_H +#define BINARYOPERATOR_H + +#include "astnode.h" +#include "token.h" + +/** + * @brief The BinaryOperator class represents a binary operator node in AST. + * + * The BinaryOperator must have its left and right AstNode and the operator which represent the + * operation performed between them. + * The BinaryOperator will take ownership of the 'left' and 'right' object and delete them on + * destruction + * + * Usage: + * BinaryOperator(leftNode, Token(Token::TT_PLUS, '+'), rightNode); + */ +class BinaryOperator : public AstNode +{ +public: + /** + * @brief BinaryOperator Creates BinaryOperator object. + * @param left The left node in an AST tree. + * @param op The binary operator. Can only be Token::TT_PLUS,...,Token::TT_DIVIDE + * otherwise will crash. + * @param right The right node in an AST tree. + */ + BinaryOperator(const AstNode *left, const Token &op, const AstNode *right); + ~BinaryOperator(); + + // Getters. + const AstNode &left() const { return *_left; } + const AstNode &right() const { return *_right; } + Token::TokenType op() const { return _operator.type(); } + + // AstNode interface + /** + * @return Returns AstNode::ANT_BINARY_OPERATOR. + */ + AstNodeType type() const override { return AstNode::ANT_BINARY_OPERATOR; } + +private: + const AstNode * _left; + const AstNode * _right; + const Token _operator; +}; + +#endif // BINARYOPERATOR_H diff --git a/interpreter.cpp b/interpreter.cpp new file mode 100644 index 0000000..aed9650 --- /dev/null +++ b/interpreter.cpp @@ -0,0 +1,36 @@ +#include "interpreter.h" + +#include "astnode.h" +#include "number.h" +#include "binaryoperator.h" + +double Interpreter::interpret(const AstNode &node) +{ + return this->visit(node); +} + +double Interpreter::visit(const AstNode &node) +{ + switch (node.type()) { + case AstNode::ANT_NUMBER: + return static_cast(node).value() - '0'; + case AstNode::ANT_BINARY_OPERATOR: + { + const BinaryOperator &op = static_cast(node); + switch (op.op()) { + case Token::TT_PLUS: + return this->visit(op.left()) + this->visit(op.right()); + case Token::TT_MINUS: + return this->visit(op.left()) - this->visit(op.right()); + case Token::TT_MULTIPLY: + return this->visit(op.left()) * this->visit(op.right()); + case Token::TT_DIVIDE: + return this->visit(op.left()) / this->visit(op.right()); + default: + throw "Something went wrong. Unimplemented operator."; + } + } + default: + throw "Something went wrong. Unimplemented node."; + } +} diff --git a/interpreter.h b/interpreter.h new file mode 100644 index 0000000..8a72308 --- /dev/null +++ b/interpreter.h @@ -0,0 +1,36 @@ +#ifndef INTERPRETER_H +#define INTERPRETER_H + +class AstNode; + +/// +/// \brief The Interpreter class represent a post-order interpreter object for a given AST. +/// +/// This class represent one post-order interpreter for a given AST. For more \see AstNode. +/// +/// Usage: +/// Interpreter i; +/// double result = i.iterpret(rootNode); +class Interpreter +{ +public: + Interpreter() {}; + + /** + * @brief interpret Will interpret the value of an AST by giving the root of the tree. + * @param node The tree root node. + * @return Value for a given AST tree. + */ + double interpret(const AstNode &node); + +private: + /** + * @brief visit Visitor pattern implementation for a given tree node. Goes in post-order + * traversal. + * @param node AST node. + * @return Value for the whole subtree. + */ + double visit(const AstNode &node); +}; + +#endif // INTERPRETER_H diff --git a/lexer.cpp b/lexer.cpp new file mode 100644 index 0000000..6ecbcbb --- /dev/null +++ b/lexer.cpp @@ -0,0 +1,51 @@ +#include "lexer.h" + +#include "token.h" + +Lexer::Lexer(std::stringstream &stringStream) + : _stringStream(stringStream) +{ + +} + +Token Lexer::getNextToken() +{ + char tokenChar; + + if (!(_stringStream >> tokenChar)) { + return Token(Token::TT_EOF, '\0'); + } + + // We only expect one-character tokens such as digits, parentheses and operators. + // Operatror >> will skip all whitespaces and give next valid token. + + switch (tokenChar) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return Token(Token::TT_NUMBER, tokenChar); + case '+': + return Token(Token::TT_PLUS, tokenChar); + case '-': + return Token(Token::TT_MINUS, tokenChar); + case '*': + return Token(Token::TT_MULTIPLY, tokenChar); + case '/': + return Token(Token::TT_DIVIDE, tokenChar); + case '(': + return Token(Token::TT_LEFT_PARENTHESIS, tokenChar); + case ')': + return Token(Token::TT_RIGHT_PARENTHESIS, tokenChar); + + default: + throw "Invalid character."; // TODO: add more details. + } + +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..1c94d28 --- /dev/null +++ b/lexer.h @@ -0,0 +1,45 @@ +#ifndef LEXER_H +#define LEXER_H + +#include +#include + +class Token; + +/** + * @brief The Lexer class represent a basic lexer wrapper for a calculator syntax \see Parser + * + * The Lexer object wraps tokenization parsing and will return a next token for every character the + * Lexer reads from an input stream. Whitespaces are ignored. Each token represent one character + * since there are no multicharacter tokens \see Token. + * + * Usage: + * Lexer lexer(inputStream); + * Token token; + * while ((token = lexer.getNextToken).type() != Token::TT_EOF) { + * ... + * } + * + */ +class Lexer +{ +public: + /** + * @brief Lexer Creates the Lexer object with given input stream. + * @param stringStream Input string stream. + */ + Lexer(std::stringstream &stringStream); + + /** + * @brief getNextToken Returns next token from an input stream. Token will be EOF if end of + * stream is reached. + */ + Token getNextToken(); + +private: + Lexer(const Lexer &); + + std::stringstream & _stringStream; +}; + +#endif // LEXER_H diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..740baf6 --- /dev/null +++ b/main.cpp @@ -0,0 +1,58 @@ +#include +#include +#include + +#include "lexer.h" +#include "parser.h" +#include "interpreter.h" +#include "astnode.h" + +/* + * Example of an test function: + * +double test(const std::string &string) { + std::stringstream stream(string); + Lexer lexer(stream); + Parser parser(lexer); + Interpreter interpreter; + auto node = parser.parse(); + return interpreter.interpret(*node); +} +*/ + +int main() +{ + std::stringstream stream; + std::string input; + int length = 0; + + while (std::cin >> input) { + stream << input; + length += input.size(); + } + + if (length == 0) { + std::cout + << "Input empty. Provide input on the standard input like: echo \"2+2\" | Parser" + << std::endl; + return 0; + } + + try { + Lexer lexer(stream); + Parser parser(lexer); + Interpreter interpreter; + + auto node = parser.parse(); + + std::cout << interpreter.interpret(*node) << std::endl; + + delete node; + } catch (char const *reason) { + std::cerr << reason << std::endl; + } catch (...) { + std::cerr << "Something realy wrong happened." << std::endl; + } + + return 0; +} diff --git a/number.cpp b/number.cpp new file mode 100644 index 0000000..ace5bfd --- /dev/null +++ b/number.cpp @@ -0,0 +1,10 @@ +#include "number.h" + +Number::Number(const Token &token) + : _token(token) +{ + if (this->_token.type() != Token::TT_NUMBER || + this->_token.value() < '0' || this->_token.value() > '9') { + throw "Invalid number (digit) token"; + } +} diff --git a/number.h b/number.h new file mode 100644 index 0000000..d4f8e3c --- /dev/null +++ b/number.h @@ -0,0 +1,40 @@ +#ifndef NUMBER_H +#define NUMBER_H + +#include "astnode.h" +#include "token.h" + +/** + * @brief The Number class represents a digit (number) node in an AST + * + * The node must be construced with a valid digit value token otherwise will crash during + * construction. \see Token. + * + * Usage: + * Number numThree(Token(Token::TT_NUMBER, '3')); + * int value = numTree.value() - '0'; + * + */ +class Number : public AstNode +{ +public: + /** + * @brief Number Creates the Number object for a valid Token::TT_NUMBER token. Will crash + * otherwise. + * @param token + */ + Number(const Token &token); + + /** + * @brief value Returns original token value. + */ + char value() const { return _token.value(); } + + // AstNode interface + AstNodeType type() const override { return AstNode::ANT_NUMBER; } + +private: + const Token _token; +}; + +#endif // NUMBER_H diff --git a/parser.cpp b/parser.cpp new file mode 100644 index 0000000..4c1fe73 --- /dev/null +++ b/parser.cpp @@ -0,0 +1,94 @@ +#include "parser.h" + +#include "lexer.h" +#include "number.h" +#include "binaryoperator.h" + +Parser::Parser(Lexer &lexer) + : _lexer(lexer), _currentToken(lexer.getNextToken()) +{ + +} + +const AstNode *Parser::parse() +{ + auto rootNode = this->expression(); + // Check if there is unconsumed token, if so than it's a syntax error. + if (this->_currentToken.type() != Token::TT_EOF) { + throw "Invalid syntax"; + } + + return rootNode; +} + +const AstNode *Parser::expression() +{ + // From grammar: + // := {(PLUS|MINUS) } + + auto node = this->term(); + + while (this->_currentToken.type() == Token::TT_PLUS || + this->_currentToken.type() == Token::TT_MINUS) { + + const Token token = this->_currentToken; + this->eat(this->_currentToken.type()); // Eat '+' or '-' token. + + node = new BinaryOperator(node, token, this->term()); + } + + return node; +} + +const AstNode *Parser::term() +{ + // From grammar: + // := {(MULTIPLY|DIVIDE) } + + auto node = this->factor(); + + while (this->_currentToken.type() == Token::TT_MULTIPLY || + this->_currentToken.type() == Token::TT_DIVIDE) { + + const Token token = this->_currentToken; + this->eat(this->_currentToken.type()); // Eat '*' or '/' token. + + node = new BinaryOperator(node, token, this->factor()); + } + + return node; +} + +const AstNode *Parser::factor() +{ + // From grammar: + // := (NUMBER|LEFT_PARENTHESIS RIGHT_PARENTHESIS) + + if (this->_currentToken.type() == Token::TT_NUMBER) { + const Token token = this->_currentToken; + this->eat(Token::TT_NUMBER); + + if (this->_currentToken.type() == Token::TT_NUMBER) { + // We found multidigit number in input. + throw "Invalid syntax"; + } + + return new Number(token); + } else if (this->_currentToken.type() == Token::TT_LEFT_PARENTHESIS) { + this->eat(Token::TT_LEFT_PARENTHESIS); + auto node = this->expression(); + this->eat(Token::TT_RIGHT_PARENTHESIS); + return node; + } else { + throw "Something went wrong!"; // Unexpected situation. API misuse. + } +} + +void Parser::eat(Token::TokenType type) +{ + if (_currentToken.type() == type) { + this->_currentToken = this->_lexer.getNextToken(); + } else { + throw "Invalid syntax."; // TODO: Add more details about the error. + } +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..16c80dd --- /dev/null +++ b/parser.h @@ -0,0 +1,87 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "token.h" + +class Lexer; +class AstNode; + +/** + * @brief The Parser class represents an implemetation of a simple calculator's expression parser + * + * The Parser class will for a given lexer produce a valid Abstract Syntax Tree (AST). It is one + * time usage object and can be destroyed after usage. It implements the following grammar: + * + * := {(PLUS|MINUS) } + * := {(MULTIPLY|DIVIDE) } + * := (NUMBER|LEFT_PARENTHESIS RIGHT_PARENTHESIS) + * + * and produce a AST tree in input has correct grammar. + * + * Usage: + * const std::string input = " (2 + 3) * 4 "; + * std::stringstream stream(input); + * + * Lexer lexer(stream); + * Parser parser(lexer); + * Interpreter interpreter; + * + * try { + * auto node = parser.parse(); + * } catch(...) { + * .... + * } + * + * double value = interpreter.interpret(*node); + * + * \see Interpreter + * \see Lexer + * \see AstNode + */ +class Parser +{ +public: + /** + * @brief Parser Creates the parser object for a given Lexer. + * @param lexer A valid Lexer object. + */ + Parser(Lexer &lexer); + + /** + * @brief parse Will parse everything that finds on input stream using given lexer. + * @return A root node of a AST parsed from input stream or crash if syntax or grammar is + * incorrect. + */ + const AstNode *parse(); + +private: + /** + * @brief expression Parse an expression from input stream. + * @return A AST node of an expression if syntax and grammar is ok. + */ + const AstNode *expression(); + + /** + * @brief term Parse a term from input stream. + * @return A AST node of a term if syntax and grammar is ok. + */ + const AstNode *term(); + + /** + * @brief factor Parse a factor from input stream. + * @return A AST node of a factor if syntax and grammar is ok. + */ + const AstNode *factor(); + + /** + * @brief eat 'Eats' a token if given token is the same as the read one and reads next. Crashes + * otherwise indicating the grammar is wrong. + * @param type Expected token on the stream. + */ + void eat(Token::TokenType type); + + Lexer & _lexer; + Token _currentToken; +}; + +#endif // PARSER_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..7692435 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,62 @@ +cmake_minimum_required(VERSION 3.5) + +project(simpletest LANGUAGES CXX) + +add_definitions(-DGTEST_LANGUAGE_CXX11) +set(CMAKE_CXX_STANDARD 11) + +find_package(Threads REQUIRED) + +if ($ENV{GOOGLETEST_DIR}) + set(GOOGLETEST_DIR $ENV{GOOGLETEST_DIR}) +else () + if (NOT "/opt/local/src/googletest" STREQUAL "") + message(WARNING "Using googletest src dir specified at Qt Creator wizard") + endif () + set(GOOGLETEST_DIR "/opt/local/src/") +endif () +if (EXISTS ${GOOGLETEST_DIR}) + set(GTestSrc ${GOOGLETEST_DIR}/googletest) + set(GMockSrc ${GOOGLETEST_DIR}/googlemock) +elseif (UNIX AND EXISTS /usr/src/gtest) + set(GTestSrc /usr/src/gtest) + message(WARNING "Using gtest from system") + if (EXISTS /usr/src/gmock) + set(GMockSrc /usr/src/gmock) + endif () +else () + message( FATAL_ERROR "No googletest src dir found - set GOOGLETEST_DIR to enable!") +endif () + +set(GTestFiles ${GTestSrc}/src/gtest-all.cc) +set(GTestIncludes ${GTestSrc} ${GTestSrc}/include) +if (NOT ${GMockSrc} STREQUAL "") + list(APPEND GTestFiles ${GMockSrc}/src/gmock-all.cc) + list(APPEND GTestIncludes ${GMockSrc} ${GMockSrc}/include) +endif () + +set(ParserFiles ../astnode.h + ../interpreter.cpp + ../lexer.h + ../number.h + ../binaryoperator.cpp + ../interpreter.h + ../parser.cpp + ../token.h + ../astnode.cpp + ../binaryoperator.h + ../lexer.cpp + ../number.cpp + ../parser.h) +set(ParserIncludes ../) + + +include_directories(${GTestIncludes}) +include_directories(${ParserIncludes}) + +add_executable(simpletest main.cpp tst_simpletest.h + ${GTestFiles} + ${ParserFiles}) +add_test(NAME simpletest COMMAND simpletest) +target_link_libraries(simpletest PRIVATE Threads::Threads) + diff --git a/tests/main.cpp b/tests/main.cpp new file mode 100644 index 0000000..933a68c --- /dev/null +++ b/tests/main.cpp @@ -0,0 +1,9 @@ +#include "tst_simpletest.h" + +#include + +int main(int argc, char *argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/tst_simpletest.h b/tests/tst_simpletest.h new file mode 100644 index 0000000..a5c5f97 --- /dev/null +++ b/tests/tst_simpletest.h @@ -0,0 +1,282 @@ +#ifndef TST_SIMPLETEST_H +#define TST_SIMPLETEST_H + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace testing; + +TEST(ParserTests, simpletest) +{ + EXPECT_EQ(1, 1); + ASSERT_THAT(0, Eq(0)); +} + +TEST(ParserTests, NumberConstruction) +{ + Number *number; + Token validToken(Token::TT_NUMBER, '0'); + + // Number is valid only for characters between '0' and '9' + for (int i = 0; i < 10; ++i) { + number = new Number(Token(Token::TT_NUMBER, '0' + i)); + ASSERT_EQ(number->value(), '0' + i); + ASSERT_EQ(number->type(), AstNode::ANT_NUMBER); + delete number; + } + + // Check for invalid Numbers + ASSERT_ANY_THROW(new Number(Token(Token::TT_NUMBER, '0' - 1))); + ASSERT_ANY_THROW(new Number(Token(Token::TT_NUMBER, '9' + 1))); + + for (int i = Token::TT_PLUS; i <= Token::TT_EOF; ++i) { + ASSERT_ANY_THROW(new Number(Token(static_cast(i), '0'))); + } +} + +TEST(ParserTest, BinaryOperatorConstruction) +{ + BinaryOperator *bOperator; + auto genNum = [](int num) { + return new Number(Token(Token::TT_NUMBER, '0' + num)); + }; + + // Binary operator should be valid for any binary operator token. + Token::TokenType validOperators[] = + { Token::TT_PLUS, Token::TT_MINUS, Token::TT_MULTIPLY, Token::TT_DIVIDE }; + char operatorChars[] = { '+', '-', '*', '/' }; + + for (unsigned int i = 0; i < sizeof(validOperators) / sizeof(validOperators[0]); ++i) { + const Number *n1 = genNum(1); + const Number *n2 = genNum(2); + bOperator = new BinaryOperator(n1, + Token(validOperators[i], operatorChars[i]), + n2); + + ASSERT_EQ(n1, &bOperator->left()); + ASSERT_EQ(n2, &bOperator->right()); + ASSERT_EQ(validOperators[i], bOperator->op()); + ASSERT_EQ(AstNode::ANT_BINARY_OPERATOR, bOperator->type()); + + delete bOperator; + } + + // Check for invalid operators. + ASSERT_ANY_THROW(new BinaryOperator(genNum(1), Token(Token::TT_NUMBER, '0'), genNum(2))); + ASSERT_ANY_THROW(new BinaryOperator(genNum(1), Token(Token::TT_NUMBER, '0'), genNum(1))); + ASSERT_ANY_THROW(new BinaryOperator(genNum(1), Token(Token::TT_LEFT_PARENTHESIS, '('), genNum(2))); + ASSERT_ANY_THROW(new BinaryOperator(genNum(1), Token(Token::TT_RIGHT_PARENTHESIS, ')'), genNum(2))); +} + +TEST(ParserTest, Lexer) +{ + Lexer *lexer; + + // Simple behavior. + std::stringstream stream(std::string("1+1")); + lexer = new Lexer(stream); + + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '1')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_PLUS, '+')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '1')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_EOF, '\0')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_EOF, '\0')); + + delete lexer; + + // White spaces should be ignored + stream = std::stringstream(std::string(" 1 + 1 ")); + lexer = new Lexer(stream); + + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '1')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_PLUS, '+')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '1')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_EOF, '\0')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_EOF, '\0')); + + delete lexer; + + // Allowed chars + stream = std::stringstream(std::string("0123456789()+-*/")); + lexer = new Lexer(stream); + + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '0')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '1')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '2')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '3')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '4')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '5')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '6')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '7')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '8')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '9')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_LEFT_PARENTHESIS, '(')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_RIGHT_PARENTHESIS, ')')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_PLUS, '+')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_MINUS, '-')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_MULTIPLY, '*')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_DIVIDE, '/')); + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_EOF, '\0')); + + delete lexer; + + // Invalid char + stream = std::stringstream(std::string("2 % 3")); + lexer = new Lexer(stream); + + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_NUMBER, '2')); + ASSERT_ANY_THROW(lexer->getNextToken()); + + delete lexer; + + // Empty input + stream = std::stringstream(std::string("")); + lexer = new Lexer(stream); + + ASSERT_EQ(lexer->getNextToken(), Token(Token::TT_EOF, '\0')); + + delete lexer; +} + +TEST(ParserTest, ParserAst) +{ + Lexer *lexer; + Parser *parser; + + { + // Simple behavior with valid gramatics. + // + + // 1 1 + std::stringstream stream(std::string("1+1")); + lexer = new Lexer(stream); + parser = new Parser(*lexer); + + const AstNode *rootNode = parser->parse(); + ASSERT_EQ(rootNode->type(), AstNode::ANT_BINARY_OPERATOR); + const BinaryOperator *bOperatorNode = static_cast(rootNode); + ASSERT_EQ(static_cast(bOperatorNode->left()).value(), '1'); + ASSERT_EQ(static_cast(bOperatorNode->right()).value(), '1'); + + delete rootNode; + delete parser; + delete lexer; + } + { + // Simple behavior with valid gramatics. + // + + // 1 + + // 2 3 + std::stringstream stream(std::string("1+(2*3)")); + lexer = new Lexer(stream); + parser = new Parser(*lexer); + + const AstNode *rootNode = parser->parse(); + ASSERT_EQ(rootNode->type(), AstNode::ANT_BINARY_OPERATOR); + const BinaryOperator *bOperatorNode = static_cast(rootNode); + ASSERT_EQ(bOperatorNode->op(), Token::TT_PLUS); + ASSERT_EQ(static_cast(bOperatorNode->left()).value(), '1'); + ASSERT_EQ(bOperatorNode->right().type(), AstNode::ANT_BINARY_OPERATOR); + + const BinaryOperator *bOperatorNode2 = + static_cast(&bOperatorNode->right()); + ASSERT_EQ(bOperatorNode2->op(), Token::TT_MULTIPLY); + ASSERT_EQ(static_cast(bOperatorNode2->left()).value(), '2'); + ASSERT_EQ(static_cast(bOperatorNode2->right()).value(), '3'); + + + delete rootNode; + delete parser; + delete lexer; + } + + { + // Simple behavior with valid gramatics. + // * + // + 3 + // 1 2 + std::stringstream stream(std::string("(1+2)*3")); + lexer = new Lexer(stream); + parser = new Parser(*lexer); + + const AstNode *rootNode = parser->parse(); + ASSERT_EQ(rootNode->type(), AstNode::ANT_BINARY_OPERATOR); + const BinaryOperator *bOperatorNode = static_cast(rootNode); + ASSERT_EQ(bOperatorNode->op(), Token::TT_MULTIPLY); + ASSERT_EQ(static_cast(bOperatorNode->right()).value(), '3'); + ASSERT_EQ(bOperatorNode->left().type(), AstNode::ANT_BINARY_OPERATOR); + + const BinaryOperator *bOperatorNode2 = + static_cast(&bOperatorNode->left()); + ASSERT_EQ(bOperatorNode2->op(), Token::TT_PLUS); + ASSERT_EQ(static_cast(bOperatorNode2->left()).value(), '1'); + ASSERT_EQ(static_cast(bOperatorNode2->right()).value(), '2'); + + + delete rootNode; + delete parser; + delete lexer; + } +} + +TEST(ParserTest, Interpreter) +{ + auto calculate = [](const std::string &string) { + Lexer *lexer; + Parser *parser; + Interpreter interpreter; + + std::stringstream stream(string); + lexer = new Lexer(stream); + parser = new Parser(*lexer); + + const AstNode *rootNode = parser->parse(); + double result = interpreter.interpret(*rootNode); + + delete rootNode; + delete parser; + delete lexer; + + return result; + }; + + // Simple behavior cases with valid syntax & gramatic. + ASSERT_EQ(calculate("1+1"), 2.); + ASSERT_EQ(calculate("0+1+2+3+4+5+6+7+8+9"), 45.); + ASSERT_EQ(calculate("(1)"), 1.); + ASSERT_EQ(calculate("1+2*3"), 7.); + ASSERT_EQ(calculate("1+(2*3)"), 7.); + ASSERT_EQ(calculate("(1+2)*3"), 9.); + ASSERT_EQ(calculate("((1+1)*(1+1+1))*3"), 18.); + ASSERT_EQ(calculate(" ( (1 + 1) * ( 1 + 1 +1) )*3 "), 18.); + ASSERT_EQ(calculate("5/2"), 2.5); + ASSERT_EQ(calculate("1-1"), 0.); + ASSERT_EQ(calculate("1-9"), -8.); + ASSERT_EQ(calculate("5/0"), std::numeric_limits::infinity()); + ASSERT_EQ(calculate("(2+2+(3-1)+4) * 9"), 90.); + + // Invalid syntax or gramatic. + ASSERT_ANY_THROW(calculate("10+1")); + ASSERT_ANY_THROW(calculate("-1")); + ASSERT_ANY_THROW(calculate(" ( ")); + ASSERT_ANY_THROW(calculate("((1)")); + ASSERT_ANY_THROW(calculate("1+aaa")); + ASSERT_ANY_THROW(calculate("+")); + ASSERT_ANY_THROW(calculate("1+2% 5")); + ASSERT_ANY_THROW(calculate("1+()")); + ASSERT_ANY_THROW(calculate("() + 1 ")); + ASSERT_ANY_THROW(calculate("1+1 ) ")); + ASSERT_ANY_THROW(calculate("1-1 ) ")); + ASSERT_ANY_THROW(calculate("1*1 ) ")); + ASSERT_ANY_THROW(calculate("1/1 ) ")); +} + +#endif // TST_SIMPLETEST_H diff --git a/token.h b/token.h new file mode 100644 index 0000000..9b18084 --- /dev/null +++ b/token.h @@ -0,0 +1,43 @@ +#ifndef TOKEN_H +#define TOKEN_H + +/** + * @brief The Token class represent wraper around token concept for the calculator syntax + * + * Token class object represent what one lexer has read from an input. In its essence it is a pair + * of interpreted value and actual value read from input. In a case of the calculatro syntax valid + * tokens can be one of the following characters 0123456789+*-/() and EOF character. + * \see Lexer + */ +class Token +{ +public: + enum TokenType { + TT_NUMBER, + TT_PLUS, + TT_MINUS, + TT_MULTIPLY, + TT_DIVIDE, + TT_LEFT_PARENTHESIS, + TT_RIGHT_PARENTHESIS, + TT_EOF, + }; + + Token(TokenType type, char value) + : _type(type), _value(value) {} + + TokenType type() const { return _type; } + char value() const { return _value; } + +private: + Token(); + + TokenType _type; + char _value; +}; + +inline bool operator==(const Token &left, const Token& right) { + return left.type() == right.type() && left.value() == right.value(); +}; + +#endif // TOKEN_H