diff --git a/grammar.ebnf b/grammar.ebnf index 1198841..162a2f5 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -32,9 +32,8 @@ string ::= ('\'' {all_characters} '\'' | '\"' {all_characters} '\"' assignop ::= '=' | '+=' | '-=' | '*=' | '/=' | '|=' | '&=' | '^=' | '%=' | '**=' | '<<=' | '>>=' | '>>>='; logicop ::= '&&' | '||' | '^^' | 'and' | 'or' | 'xor'; compop ::= '==' | '!=' | '<' | '<=' | '>' | '>=' | 'is' | '!is'; -mathop ::= '+' | '-' | '*' | '/' | '' | '**'; +mathop ::= '+' | '-' | '*' | '/' | '%' | '**'; bitop ::= '&' | '|' | '^' | '<<' | '>>' | '>>>'; -exprop ::= mathop | compop | logicop | bitop; primtype ::= 'void' | 'int' | 'int8' | 'int16' | 'int32' | 'int64' | 'uint' | 'uint8' | 'uint16' | @@ -42,8 +41,8 @@ primtype ::= 'void' | 'int' | 'int8' | 'int16' | 'int32' | 'int64' | datatype ::= (identifier | primtype | 'auto'); scope ::= ['::'] {identifier '::'} [identifier ['<' type {',' type} '>'] '::']; type ::= ['const'] scope datatype ['<' type {',' type} '>'] { ('[' ']') | ('@' ['const']) }; -# Condition is defined further below due to a circular dependency: condition->expr->exprterm->initlist->assign -assign ::= condition [ assignop assign ]; +# ternary is defined further below due to a circular dependency: ternary->expr->exprterm->initlist->assign +assign ::= ternary [ assignop assign ]; initlist ::= '{' [assign | initlist] {',' [assign | initlist]} '}'; exprpreop ::= '-' | '+' | '!' | '++' | '--' | '~' | '@'; arglist ::= '(' [identifier ':'] assign {',' [identifier ':'] assign} ')'; @@ -59,8 +58,8 @@ lambda ::= 'function' '(' [[type typemod] identifier {',' [type typ exprvalue ::= 'void' | constructcall | funccall | varaccess | cast | literal | '(' assign ')' | lambda; exprpostop ::= ('.' (funccall | identifier)) | ('[' [identifier ':'] assign {',' [identifier ':' assign} ']') | arglist | '++' | '--'; exprterm ::= ([type '='] initlist) | ({exprpreop} exprvalue {exprpostop}); -expr ::= exprterm {exprop exprterm}; -condition ::= expr ['?' assign : assign]; +expr ::= exprterm {(mathop | compop | logicop | bitop) exprterm}; +ternary ::= expr ['?' assign : assign]; return ::= 'return' [assign] ';'; exprstat ::= [assign] ';'; diff --git a/src/CoreData/Operators.hpp b/src/CoreData/Operators.hpp new file mode 100644 index 0000000..bf41426 --- /dev/null +++ b/src/CoreData/Operators.hpp @@ -0,0 +1,59 @@ +#ifndef MALACHSCRIPT_OPERATORS_HPP +#define MALACHSCRIPT_OPERATORS_HPP + +#include + +namespace MalachScript { + enum class AssignmentOperator : uint8_t { + Assignment, // = + AdditionAssignment, // += + SubtractionAssignment, // -= + MultiplicationAssignment, // *= + DivisionAssignment, // /= + BitwiseOrAssignment, // |= + BitwiseAndAssignment, // &= + BitwiseXorAssignment, // ^= + ModuloAssignment, // %= + ExponentiationAssignment, // **= + BitwiseLeftShiftAssignment, // <<= + BitwiseRightShiftAssignment, // >>= + ArithmeticRightShiftAssignment, // >>>= + }; + + enum class LogicOperator : uint8_t { + LogicalAnd, // &&, and + LogicalOr, // ||, or + LogicalXor, // ^^, xor + }; + + enum class ComparisonOperator : uint8_t { + Equality, // == + Inequality, // != + LessThan, // < + LessThanEquals, // <= + GreaterThan, // > + GreaterThanEquals, // >= + Identity, // is + InverseIdentity, // !is + }; + + enum class MathOperator : uint8_t { + Addition, + Subtraction, + Multiplication, + Division, + Modulo, + Exponentiation, + }; + + enum class BitOperator : uint8_t { + BitwiseAnd, + BitwiseOr, + BitwiseXor, + BitwiseLeftShift, + BitwiseRightShift, + ArithmeticRightShift, + }; +} + +#endif // MALACHSCRIPT_OPERATORS_HPP diff --git a/src/Parser/Expressions/ParsedExpression.hpp b/src/Parser/Expressions/ParsedExpression.hpp deleted file mode 100644 index 64caa60..0000000 --- a/src/Parser/Expressions/ParsedExpression.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef MALACHSCRIPT_PARSEDEXPRESSION_HPP -#define MALACHSCRIPT_PARSEDEXPRESSION_HPP - -namespace MalachScript::Parser { - class ParsedExpression {}; -} - -#endif // MALACHSCRIPT_PARSEDEXPRESSION_HPP diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index e152326..7089f49 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -11,6 +11,8 @@ #define EXPECT_TOKEN(token, kind) \ if (token->GetKind() != LexTokenKind::kind) { \ LogError(Diagnostics::DiagnosticType::UnexpectedToken, token->GetSpan()); \ + } else { \ + PROGRESS_TOKEN(token); \ } namespace MalachScript::Parser { @@ -20,18 +22,20 @@ namespace MalachScript::Parser { std::vector statements; statements.reserve(32); size_t current = 0; + const auto* currentToken = _firstToken; while (true) { - while (_currentToken->GetKind() == LexTokenKind::Whitespace) { - _currentToken = _currentToken->GetNext().get(); + while (currentToken->GetKind() == LexTokenKind::Whitespace) { + currentToken = currentToken->GetNext().get(); } - if (_currentToken->GetKind() == LexTokenKind::EndOfFile) { + if (currentToken->GetKind() == LexTokenKind::EndOfFile) { break; } const ParsedStatement* statement; - auto result = ParseClass(statement) || ParseFunc(statement) || ParseNamespace(statement); + auto result = ParseClass(statement, currentToken) || ParseFunc(statement, currentToken) || + ParseNamespace(statement, currentToken); if (!result) { // TODO: Log error - PROGRESS_TOKEN(_currentToken); + PROGRESS_TOKEN(currentToken); continue; } statements.push_back(statement); @@ -44,8 +48,8 @@ namespace MalachScript::Parser { } return new ParsedScriptStatement(TextSpan(0, end), statements); } - bool Parser::ParseClass(const ParsedStatement*& out) { - const auto* current = _currentToken; + bool Parser::ParseClass(const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; auto start = current->GetSpan().GetStart(); bool lookingForClass = true; while (lookingForClass) { @@ -62,7 +66,7 @@ namespace MalachScript::Parser { // After class keyword, an identifier should always follow, if it doesn't, log an error. Identifier identifier; if (!ParseIdentifier(identifier, current)) { - LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan()); + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); } PROGRESS_TOKEN(current); std::vector inherits; @@ -77,14 +81,14 @@ namespace MalachScript::Parser { case LexTokenKind::ColonSymbol: { PROGRESS_TOKEN(current); Identifier id; - if (!ParseIdentifier(id, _currentToken)) { - LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan()); + if (!ParseIdentifier(id, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); } inherits.push_back(id); while (current->GetKind() == LexTokenKind::CommaSymbol) { PROGRESS_TOKEN(current); - if (!ParseIdentifier(id, _currentToken)) { - LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan()); + if (!ParseIdentifier(id, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); } inherits.push_back(id); PROGRESS_TOKEN(current); @@ -105,123 +109,124 @@ namespace MalachScript::Parser { } const ParsedStatement* statement = nullptr; // TODO: Sort by complexity - if (!ParseVirtProp(statement, current) && !ParseFunc(statement) && !ParseVar(statement) && - !ParseFuncDef(statement)) { + if (!ParseVirtProp(statement, current) && !ParseFunc(statement, current) && + !ParseVar(statement, current) && !ParseFuncDef(statement, current)) { LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); break; - } else { - body.push_back(statement); } + body.push_back(statement); } break; } default: throw; } out = new ParsedClassStatement(TextSpan(start, current->GetSpan().GetEnd()), identifier, inherits, body); - _currentToken = current; + currentToken = current; return true; } - bool Parser::ParseTypeDef(const ParsedStatement*& out) { - if (_currentToken->GetKind() != LexTokenKind::TypedefKeyword) { + bool Parser::ParseTypeDef(const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + if (current->GetKind() != LexTokenKind::TypedefKeyword) { return false; } - auto start = _currentToken->GetSpan().GetStart(); - PROGRESS_TOKEN(_currentToken); + auto start = current->GetSpan().GetStart(); + PROGRESS_TOKEN(current); Identifier defineFrom; - if (!ParsePrimType(defineFrom, _currentToken) && !ParseIdentifier(defineFrom, _currentToken)) { - LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan()); + if (!ParsePrimType(defineFrom, current) && !ParseIdentifier(defineFrom, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); } - PROGRESS_TOKEN(_currentToken); + PROGRESS_TOKEN(current); Identifier defineTo; - if (!ParseIdentifier(defineTo, _currentToken)) { - LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan()); + if (!ParseIdentifier(defineTo, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); } - PROGRESS_TOKEN(_currentToken); - EXPECT_TOKEN(_currentToken, SemicolonSymbol); - PROGRESS_TOKEN(_currentToken); - out = new ParsedTypeDefStatement(TextSpan(start, _currentToken->GetSpan().GetEnd()), defineTo, defineFrom); + PROGRESS_TOKEN(current); + EXPECT_TOKEN(current, SemicolonSymbol); + out = new ParsedTypeDefStatement(TextSpan(start, current->GetSpan().GetEnd()), defineTo, defineFrom); return true; } - bool Parser::ParseNamespace(const ParsedStatement*& out) { - if (_currentToken->GetKind() != LexTokenKind::NamespaceKeyword) { + bool Parser::ParseNamespace(const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + if (current->GetKind() != LexTokenKind::NamespaceKeyword) { return false; } - auto start = _currentToken->GetSpan().GetStart(); - PROGRESS_TOKEN(_currentToken); + auto start = current->GetSpan().GetStart(); + PROGRESS_TOKEN(current); Identifier identifier; - if (!ParseIdentifier(identifier, _currentToken)) { - LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan()); + if (!ParseIdentifier(identifier, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); } const auto* script = ParseScript(); - auto end = _currentToken->GetSpan().GetEnd(); - PROGRESS_TOKEN(_currentToken); + auto end = current->GetSpan().GetEnd(); + PROGRESS_TOKEN(current); out = new ParsedNamespaceStatement(TextSpan(start, end), identifier, script); + currentToken = current; return true; } - bool Parser::ParseFunc(const ParsedStatement*& out) { - auto start = _currentToken->GetSpan().GetStart(); - const auto* token = _currentToken; + bool Parser::ParseFunc(const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + auto start = current->GetSpan().GetStart(); bool isShared = false; bool isExternal = false; bool modifiers = true; while (modifiers) { - switch (token->GetKind()) { + switch (current->GetKind()) { case LexTokenKind::SharedKeyword: isShared = true; - PROGRESS_TOKEN(token); + PROGRESS_TOKEN(current); continue; case LexTokenKind::ExternalKeyword: isExternal = true; - PROGRESS_TOKEN(token); + PROGRESS_TOKEN(current); continue; default: modifiers = false; break; } } AccessModifier accessModifier = AccessModifier::Public; - if (token->GetKind() == LexTokenKind::PrivateKeyword) { + if (current->GetKind() == LexTokenKind::PrivateKeyword) { accessModifier = AccessModifier::Private; - PROGRESS_TOKEN(token); - } else if (token->GetKind() == LexTokenKind::ProtectedKeyword) { + PROGRESS_TOKEN(current); + } else if (current->GetKind() == LexTokenKind::ProtectedKeyword) { accessModifier = AccessModifier::Protected; - PROGRESS_TOKEN(token); + PROGRESS_TOKEN(current); } const ParsedStatement* typeStatement = nullptr; bool returnsReference = false; - if (token->GetKind() == LexTokenKind::TildeSymbol) { + if (current->GetKind() == LexTokenKind::TildeSymbol) { // TODO: Handle destructor throw std::logic_error("not implemented"); - } else if (ParseType(typeStatement, token)) { - if (token->GetKind() == LexTokenKind::AmpersandSymbol) { + } else if (ParseType(typeStatement, current)) { + if (current->GetKind() == LexTokenKind::AmpersandSymbol) { returnsReference = true; - PROGRESS_TOKEN(token); + PROGRESS_TOKEN(current); } } Identifier identifier; - if (!ParseIdentifier(identifier, token)) { + if (!ParseIdentifier(identifier, current)) { return false; } - PROGRESS_TOKEN(token); + PROGRESS_TOKEN(current); const ParsedStatement* paramList = nullptr; - if (!ParseParamList(paramList, token)) { + if (!ParseParamList(paramList, current)) { return false; } - _currentToken = token; bool isConst = false; - if (_currentToken->GetKind() == LexTokenKind::ConstKeyword) { + if (current->GetKind() == LexTokenKind::ConstKeyword) { isConst = true; - PROGRESS_TOKEN(_currentToken); + PROGRESS_TOKEN(current); } FuncAttr funcAttr = FuncAttr::None; - ParseFuncAttr(funcAttr, _currentToken); + ParseFuncAttr(funcAttr, current); const ParsedStatement* statblock = nullptr; - if (_currentToken->GetKind() != LexTokenKind::SemicolonSymbol) { + if (current->GetKind() != LexTokenKind::SemicolonSymbol) { // TODO: Parse stat block. throw std::logic_error("not implemented"); } - out = new ParsedFuncStatement(TextSpan(start, _currentToken->GetSpan().GetEnd()), isShared, isExternal, + out = new ParsedFuncStatement(TextSpan(start, current->GetSpan().GetEnd()), isShared, isExternal, accessModifier, typeStatement, returnsReference, identifier, paramList, isConst, funcAttr, statblock); + currentToken = current; return true; } @@ -355,7 +360,7 @@ namespace MalachScript::Parser { const ParsedStatement* typeStatement = nullptr; TypeMod typeMod = TypeMod::None; Identifier identifier; - const ParsedExpression* defaultExpression = nullptr; + const ParsedStatement* defaultExpression = nullptr; if (!ParseType(typeStatement, currentToken)) { LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan()); @@ -410,8 +415,8 @@ namespace MalachScript::Parser { } bool Parser::ParseVirtProp([[maybe_unused]] const ParsedStatement*& out, const LexToken*& currentToken) { - AccessModifier access = AccessModifier::Public; const auto* current = currentToken; + AccessModifier access = AccessModifier::Public; if (current->GetKind() == LexTokenKind::PrivateKeyword) { access = AccessModifier::Private; PROGRESS_TOKEN(current); @@ -456,11 +461,9 @@ namespace MalachScript::Parser { } ParseFuncAttr(getAttr, current); if (current->GetKind() != LexTokenKind::SemicolonSymbol) { - // TODO: Parse stat block. - // if (ParseStatBlock(getStatement, current)){ - // - // } - this->LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + if (!ParseStatBlock(getStatement, current)) { + this->LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + } } PROGRESS_TOKEN(current); @@ -477,11 +480,9 @@ namespace MalachScript::Parser { } ParseFuncAttr(setAttr, current); if (current->GetKind() != LexTokenKind::SemicolonSymbol) { - // TODO: Parse stat block. - // if (ParseStatBlock(setStatement, current)){ - // - // } - this->LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + if (!ParseStatBlock(setStatement, current)) { + this->LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + } } PROGRESS_TOKEN(current); @@ -507,8 +508,108 @@ namespace MalachScript::Parser { return true; } - bool Parser::ParseVar([[maybe_unused]] const ParsedStatement*& out) { return false; } - bool Parser::ParseFuncDef([[maybe_unused]] const ParsedStatement*& out) { return false; } + bool Parser::ParseIfStatement([[maybe_unused]] const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + if (current->GetKind() != LexTokenKind::IfKeyword) { + return false; + } + PROGRESS_TOKEN(current); + EXPECT_TOKEN(current, OpenParenthesisSymbol); + const ParsedStatement* condition = nullptr; + if (!ParseAssign(condition, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + return false; + } + EXPECT_TOKEN(current, CloseParenthesisSymbol); + const ParsedStatement* body = nullptr; + if (!ParseStatement(body, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + return false; + } + const ParsedStatement* elseStatement = nullptr; + if (current->GetKind() == LexTokenKind::ElseKeyword) { + PROGRESS_TOKEN(current); + if (!ParseStatement(elseStatement, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + } + } + out = new ParsedIfStatement(TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), + condition, body, elseStatement); + currentToken = current; + return true; + } + + bool Parser::ParseStatement(const ParsedStatement*& out, const LexToken*& currentToken) { + // TODO: All the other statements. + return ParseIfStatement(out, currentToken); + } + + bool Parser::ParseVar([[maybe_unused]] const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + AccessModifier access = AccessModifier::Public; + if (current->GetKind() == LexTokenKind::PrivateKeyword) { + access = AccessModifier::Private; + PROGRESS_TOKEN(current); + } else if (current->GetKind() == LexTokenKind::ProtectedKeyword) { + access = AccessModifier::Protected; + PROGRESS_TOKEN(current); + } + const ParsedStatement* typeStatement = nullptr; + if (!ParseType(typeStatement, current)) { + return false; + } + Identifier identifier; + if (!ParseIdentifier(identifier, current)) { + delete typeStatement; + return false; + } + PROGRESS_TOKEN(current); + // TODO: Default values + // TODO: Creating multiple vars in a single line (int a, b, c) + if (current->GetKind() == LexTokenKind::SemicolonSymbol) { + PROGRESS_TOKEN(current); + } else { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + } + out = new ParsedVarStatement(TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), access, + typeStatement, identifier); + currentToken = current; + return true; + } + + bool Parser::ParseStatBlock(const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + if (current->GetKind() != LexTokenKind::OpenCurlyParenthesisSymbol) { + return false; + } + std::vector statements; + while (true) { + if (current->GetKind() == LexTokenKind::CloseCurlyParenthesisSymbol) { + break; + } + const ParsedStatement* stat = nullptr; + if (ParseVar(stat, current) || ParseStatement(stat, current)) { + statements.push_back(stat); + } else { + break; + } + } + if (current->GetKind() == LexTokenKind::CloseCurlyParenthesisSymbol) { + PROGRESS_TOKEN(current); + } else { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + } + + out = new ParsedStatBlockStatement(TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), + statements); + currentToken = current; + return true; + } + + bool Parser::ParseFuncDef([[maybe_unused]] const ParsedStatement*& out, + [[maybe_unused]] const LexToken*& currentToken) { + return false; + } bool Parser::ParsePrimType(Identifier& out, const LexToken*& token) { switch (token->GetKind()) { case LexTokenKind::VoidKeyword: out = PrimitiveTypes::VoidName(); return true; @@ -549,4 +650,107 @@ namespace MalachScript::Parser { default: typeMod = TypeMod::RefInOut; return true; } } -} + bool Parser::ParseAssign(const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + const ParsedStatement* leftHand = nullptr; + if (!ParseTernary(leftHand, current)) { + return false; + } + AssignmentOperator op; + if (!ParseAssignOp(op, current)) { + out = leftHand; + currentToken = current; + return true; + } + PROGRESS_TOKEN(current); + const ParsedStatement* rightHand = nullptr; + if (!ParseAssign(rightHand, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + out = leftHand; + currentToken = current; + return true; + } + out = new ParsedBinaryStatement( + TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), leftHand, op, rightHand); + currentToken = current; + return true; + } + + bool Parser::ParseTernary(const ParsedStatement*& out, const LexToken*& currentToken) { + // TODO: implement ternary. + return ParseExpr(out, currentToken); + } + + bool Parser::ParseExpr(const ParsedStatement*& out, const LexToken*& currentToken) { + const auto* current = currentToken; + const ParsedStatement* leftHand = nullptr; + if (!ParseExprTerm(leftHand, current)) { + return false; + } + MathOperator mathOp; + if (ParseMathOp(mathOp, current)) { + PROGRESS_TOKEN(current); + const ParsedStatement* rightHand = nullptr; + if (!ParseExprTerm(rightHand, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + out = leftHand; + currentToken = current; + return true; + } + out = new ParsedBinaryStatement( + TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), leftHand, mathOp, rightHand); + return true; + } + ComparisonOperator compOp; + if (ParseCompOp(compOp, current)) { + PROGRESS_TOKEN(current); + const ParsedStatement* rightHand = nullptr; + if (!ParseExprTerm(rightHand, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + out = leftHand; + currentToken = current; + return true; + } + out = new ParsedBinaryStatement( + TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), leftHand, compOp, rightHand); + return true; + } + LogicOperator logicOp; + if (ParseLogicOp(logicOp, current)) { + PROGRESS_TOKEN(current); + const ParsedStatement* rightHand = nullptr; + if (!ParseExprTerm(rightHand, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + out = leftHand; + currentToken = current; + return true; + } + out = new ParsedBinaryStatement( + TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), leftHand, logicOp, + rightHand); + return true; + } + BitOperator bitOp; + if (ParseBitOp(bitOp, current)) { + PROGRESS_TOKEN(current); + const ParsedStatement* rightHand = nullptr; + if (!ParseExprTerm(rightHand, current)) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + out = leftHand; + currentToken = current; + return true; + } + out = new ParsedBinaryStatement( + TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()), leftHand, bitOp, rightHand); + return true; + } + out = leftHand; + currentToken = current; + return true; + } + + bool Parser::ParseExprTerm([[maybe_unused]] const ParsedStatement*& out, + [[maybe_unused]] const LexToken*& currentToken) { + return false; + } +} \ No newline at end of file diff --git a/src/Parser/Parser.hpp b/src/Parser/Parser.hpp index aa10d73..71a0f99 100644 --- a/src/Parser/Parser.hpp +++ b/src/Parser/Parser.hpp @@ -1,50 +1,172 @@ #ifndef MALACHSCRIPT_PARSER_HPP #define MALACHSCRIPT_PARSER_HPP +#include "../CoreData/Operators.hpp" #include "../Diagnostics/Diagnostics.hpp" #include "Lexer/LexToken.hpp" #include "Statements/ParsedStatement.hpp" + namespace MalachScript::Parser { class Parser { public: Parser(std::u8string_view scriptName, const LexToken* firstToken, Diagnostics::Diagnostics* diagnostics) - : _scriptName(scriptName), _diagnostics(diagnostics), _currentToken(firstToken) {} + : _scriptName(scriptName), _diagnostics(diagnostics), _firstToken(firstToken) {} const ParsedScriptStatement* Parse(); private: std::u8string_view _scriptName; Diagnostics::Diagnostics* _diagnostics; - const LexToken* _currentToken; + const LexToken* _firstToken; inline void LogError(Diagnostics::DiagnosticType type, const TextSpan& span) { _diagnostics->LogError(type, _scriptName, span); } - const ParsedScriptStatement* ParseScript(); - bool ParseClass(const ParsedStatement*& out); - bool ParseTypeDef(const ParsedStatement*& out); - bool ParseNamespace(const ParsedStatement*& out); - bool ParseFunc(const ParsedStatement*& out); + ///////////////////////////////////////////////////////////////////////////////////////// + // Underlying functions are laid out in the order they are defined in the grammar.ebnf // + ///////////////////////////////////////////////////////////////////////////////////////// - bool ParseType(const ParsedStatement*& out, const LexToken*& currentToken); - bool ParseScope(std::vector& out, const LexToken*& currentToken); - bool ParseFuncAttr(FuncAttr& out, const LexToken*& currentToken); - bool ParseParamList(const ParsedStatement*& out, const LexToken*& currentToken); - bool ParseTypeMod(TypeMod& typeMod, const LexToken*& currentToken); - bool ParseDataType(Identifier& out, const LexToken*& currentToken); - - bool ParseVirtProp(const ParsedStatement*& out, const LexToken*& currentToken); - bool ParseVar(const ParsedStatement*& out); - bool ParseFuncDef(const ParsedStatement*& out); - - bool ParsePrimType(Identifier& out, const LexToken*& currentToken); - static bool ParseIdentifier(Identifier& out, const LexToken* token) { + static inline bool ParseIdentifier(Identifier& out, const LexToken* token) { if (token->GetKind() != LexTokenKind::Identifier) { return false; } out = reinterpret_cast(token)->GetValue(); return true; } + + static inline bool ParseAssignOp(AssignmentOperator& op, const LexToken*& token) { + switch (token->GetKind()) { + case LexTokenKind::EqualsSymbol: op = AssignmentOperator::Assignment; return true; + case LexTokenKind::PlusEqualsSymbol: op = AssignmentOperator::AdditionAssignment; return true; + case LexTokenKind::MinusEqualsSymbol: op = AssignmentOperator::SubtractionAssignment; return true; + case LexTokenKind::StarEqualsSymbol: op = AssignmentOperator::MultiplicationAssignment; return true; + case LexTokenKind::SlashEqualsSymbol: op = AssignmentOperator::DivisionAssignment; return true; + case LexTokenKind::VerticalLineEqualsSymbol: op = AssignmentOperator::BitwiseOrAssignment; return true; + case LexTokenKind::AmpersandEqualsSymbol: op = AssignmentOperator::BitwiseAndAssignment; return true; + case LexTokenKind::CaretEqualsSymbol: op = AssignmentOperator::BitwiseXorAssignment; return true; + case LexTokenKind::PercentEqualsSymbol: op = AssignmentOperator::ModuloAssignment; return true; + case LexTokenKind::StarStarEqualsSymbol: op = AssignmentOperator::ExponentiationAssignment; return true; + case LexTokenKind::LessThanLessThanEqualsSymbol: + op = AssignmentOperator::BitwiseLeftShiftAssignment; + return true; + case LexTokenKind::GreaterThanGreaterThanEqualsSymbol: + op = AssignmentOperator::BitwiseRightShiftAssignment; + return true; + case LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol: + op = AssignmentOperator::ArithmeticRightShiftAssignment; + return true; + default: return false; + } + } + + static inline bool ParseLogicOp(LogicOperator& op, const LexToken*& token) { + switch (token->GetKind()) { + case LexTokenKind::AmpersandAmpersandSymbol: + case LexTokenKind::AndKeyword: op = LogicOperator::LogicalAnd; return true; + case LexTokenKind::VerticalLineVerticalLineSymbol: + case LexTokenKind::OrKeyword: op = LogicOperator::LogicalOr; return true; + case LexTokenKind::CaretCaretSymbol: + case LexTokenKind::XorKeyword: op = LogicOperator::LogicalXor; return true; + default: return false; + } + } + + static inline bool ParseCompOp(ComparisonOperator& op, const LexToken*& token) { + switch (token->GetKind()) { + case LexTokenKind::EqualsEqualsSymbol: op = ComparisonOperator::Equality; return true; + case LexTokenKind::ExclamationMarkEqualsSymbol: op = ComparisonOperator::Inequality; return true; + case LexTokenKind::LessThanSymbol: op = ComparisonOperator::LessThan; return true; + case LexTokenKind::LessThanEqualsSymbol: op = ComparisonOperator::LessThanEquals; return true; + case LexTokenKind::GreaterThanSymbol: op = ComparisonOperator::GreaterThan; return true; + case LexTokenKind::GreaterThanEqualsSymbol: op = ComparisonOperator::GreaterThanEquals; return true; + case LexTokenKind::IsKeyword: op = ComparisonOperator::Identity; return true; + case LexTokenKind::ExclamationMarkIsSymbol: op = ComparisonOperator::InverseIdentity; return true; + default: return false; + } + } + + static inline bool ParseMathOp(MathOperator& op, const LexToken*& token) { + switch (token->GetKind()) { + case LexTokenKind::PlusSymbol: op = MathOperator::Addition; return true; + case LexTokenKind::MinusSymbol: op = MathOperator::Subtraction; return true; + case LexTokenKind::StarSymbol: op = MathOperator::Multiplication; return true; + case LexTokenKind::SlashSymbol: op = MathOperator::Division; return true; + case LexTokenKind::PercentSymbol: op = MathOperator::Modulo; return true; + case LexTokenKind::StarStarSymbol: op = MathOperator::Exponentiation; return true; + default: return false; + } + } + + static inline bool ParseBitOp(BitOperator& op, const LexToken*& token) { + switch (token->GetKind()) { + case LexTokenKind::AmpersandSymbol: op = BitOperator::BitwiseAnd; return true; + case LexTokenKind::VerticalLineSymbol: op = BitOperator::BitwiseOr; return true; + case LexTokenKind::CaretSymbol: op = BitOperator::BitwiseXor; return true; + case LexTokenKind::LessThanLessThanSymbol: op = BitOperator::BitwiseLeftShift; return true; + case LexTokenKind::GreaterThanGreaterThanSymbol: op = BitOperator::BitwiseRightShift; return true; + case LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol: + op = BitOperator::ArithmeticRightShift; + return true; + default: return false; + } + } + + bool ParsePrimType(Identifier& out, const LexToken*& currentToken); + bool ParseDataType(Identifier& out, const LexToken*& currentToken); + bool ParseScope(std::vector& out, const LexToken*& currentToken); + bool ParseType(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseAssign(const ParsedStatement*& out, const LexToken*& currentToken); + // InitList + // ExprPreOp + // ArgList + // FuncCall + // ConstructCall + // VarAccess + // Cast + // Literal + bool ParseTypeMod(TypeMod& typeMod, const LexToken*& currentToken); + // Lambda + + // ExprValue + // ExprPostOp + bool ParseExprTerm(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseExpr(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseTernary(const ParsedStatement*& out, const LexToken*& currentToken); + + // Return + // ExprStat + // Continue + // Break + + bool ParseIfStatement(const ParsedStatement*& out, const LexToken*& currentToken); + // For + // While + // DoWhile + // Try + // Case + // Switch + + bool ParseStatement(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseVar(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseStatBlock(const ParsedStatement*& out, const LexToken*& currentToken); + + bool ParseFuncAttr(FuncAttr& out, const LexToken*& currentToken); + bool ParseParamList(const ParsedStatement*& out, const LexToken*& currentToken); + + bool ParseVirtProp(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseFunc(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseFuncDef(const ParsedStatement*& out, const LexToken*& currentToken); + bool ParseClass(const ParsedStatement*& out, const LexToken*& currentToken); + + // Mixin + // Enum + // Import + bool ParseTypeDef(const ParsedStatement*& out, const LexToken*& currentToken); + + // InterfaceMethod + // Interface + bool ParseNamespace(const ParsedStatement*& out, const LexToken*& currentToken); + const ParsedScriptStatement* ParseScript(); }; } diff --git a/src/Parser/Statements/ParsedStatement.hpp b/src/Parser/Statements/ParsedStatement.hpp index 7362c28..b1e9685 100644 --- a/src/Parser/Statements/ParsedStatement.hpp +++ b/src/Parser/Statements/ParsedStatement.hpp @@ -7,7 +7,6 @@ #include "../../CoreData/FuncAttr.hpp" #include "../../CoreData/TypeMod.hpp" #include "../../TextSpan.hpp" -#include "../Expressions/ParsedExpression.hpp" #include "ParsedStatementKind.hpp" namespace MalachScript::Parser { class ParsedStatement { @@ -114,11 +113,11 @@ namespace MalachScript::Parser { std::unique_ptr _typeStatement = nullptr; TypeMod _typeMod = TypeMod::None; Identifier _identifier; - std::unique_ptr _defaultExpression = nullptr; + std::unique_ptr _defaultExpression = nullptr; public: ParsedParameter(const ParsedTypeStatement* typeStatement, TypeMod typeMod, const Identifier& identifier, - const ParsedExpression* defaultExpression) + const ParsedStatement* defaultExpression) : _typeStatement(typeStatement), _typeMod(typeMod), _identifier(identifier), _defaultExpression(defaultExpression){}; @@ -135,10 +134,10 @@ namespace MalachScript::Parser { [[nodiscard]] Identifier& GetIdentifier() noexcept { return _identifier; } [[nodiscard]] const Identifier& GetIdentifier() const noexcept { return _identifier; } - [[nodiscard]] std::unique_ptr& GetDefaultExpression() noexcept { + [[nodiscard]] std::unique_ptr& GetDefaultExpression() noexcept { return _defaultExpression; } - [[nodiscard]] const std::unique_ptr& GetDefaultExpression() const noexcept { + [[nodiscard]] const std::unique_ptr& GetDefaultExpression() const noexcept { return _defaultExpression; } }; @@ -246,6 +245,68 @@ namespace MalachScript::Parser { FuncAttr _setAttr = FuncAttr::None; std::unique_ptr _setStatement; }; + + class ParsedStatBlockStatement : public ParsedStatementImpl { + public: + ParsedStatBlockStatement(const TextSpan& span, const std::vector& statements) + : ParsedStatementImpl(span), _statements(statements.size()) { + for (size_t i = 0; i < statements.size(); i++) { + _statements[i] = std::unique_ptr(statements[i]); + } + } + + [[nodiscard]] const std::vector>& GetStatements() const noexcept { + return _statements; + } + + private: + std::vector> _statements; + }; + + class ParsedVarStatement : public ParsedStatementImpl { + public: + ParsedVarStatement(const TextSpan& span, AccessModifier access, const ParsedStatement* typeStatement, + Identifier identifier) + : ParsedStatementImpl(span), _access(access), _typeStatement(typeStatement), _identifier(identifier) {} + + [[nodiscard]] AccessModifier GetAccess() const noexcept { return _access; } + [[nodiscard]] const std::unique_ptr& GetTypeStatement() const noexcept { + return _typeStatement; + } + [[nodiscard]] const Identifier& GetIdentifier() const noexcept { return _identifier; } + + private: + AccessModifier _access; + std::unique_ptr _typeStatement; + Identifier _identifier; + }; + + class ParsedIfStatement : public ParsedStatementImpl { + public: + ParsedIfStatement(const TextSpan& span, const ParsedStatement* condition, const ParsedStatement* body, + const ParsedStatement* elseStatement) + : ParsedStatementImpl(span), _condition(condition), _body(body), _elseStatement(elseStatement) {} + + private: + std::unique_ptr _condition; + std::unique_ptr _body; + std::unique_ptr _elseStatement; + }; + + template + class ParsedBinaryStatement : public ParsedStatementImpl { + public: + ParsedBinaryStatement(const TextSpan& span, const ParsedStatement* leftHand, TOperator op, + const ParsedStatement* rightHand) + : ParsedStatementImpl(span), _leftHand(leftHand), _operator(op), _rightHand(rightHand) {} + + [[nodiscard]] inline size_t GetOperatorType() const noexcept { return typeid(TOperator).hash_code(); } + + private: + std::unique_ptr _leftHand; + TOperator _operator; + std::unique_ptr _rightHand; + }; } #endif // MALACHSCRIPT_PARSEDSTATEMENT_HPP diff --git a/src/Parser/Statements/ParsedStatementKind.hpp b/src/Parser/Statements/ParsedStatementKind.hpp index d96b9b6..95f168f 100644 --- a/src/Parser/Statements/ParsedStatementKind.hpp +++ b/src/Parser/Statements/ParsedStatementKind.hpp @@ -12,6 +12,10 @@ namespace MalachScript::Parser { ParamList, Func, VirtProp, + StatBlock, + If, + Assign, + BinaryExpression }; }