#include #include #include "Parser.hpp" #include "ParsedStatements/ParsedStatement.hpp" #include "UnaryOperatorKind.hpp" #include "BinaryOperatorKind.hpp" #include "TypedVariableIdentifier.hpp" #include "ParsedExpressions/ParsedTableExpression.hpp" namespace Porygon::Parser { ParsedScriptStatement *Parser::Parse() { vector statements; while (this->_position < this->_tokens.size()) { auto next = this->Next(); if (next->GetKind() == TokenKind::EndOfFile) { break; } statements.push_back(this->ParseStatement(next)); } return new ParsedScriptStatement(statements); } const IToken *Parser::Peek() { return this->_tokens[_position]; } const IToken *Parser::PeekAt(int offset) { return this->_tokens[_position + offset]; } const IToken *Parser::Next() { this->_position++; return this->_tokens[_position - 1]; } ParsedStatement *Parser::ParseStatement(const IToken *current) { auto currentKind = current->GetKind(); switch (currentKind) { case TokenKind::LocalKeyword: return this->ParseVariableAssignment(current); case TokenKind::FunctionKeyword: return this->ParseFunctionDeclaration(current); case TokenKind::ReturnKeyword: return this->ParseReturnStatement(current); case TokenKind::IfKeyword: return this->ParseIfStatement(current); default: break; } if (this->Peek()->GetKind() == TokenKind::AssignmentToken) { return ParseVariableAssignment(current); } auto expression = this->ParseExpression(current); auto expKind = expression->GetKind(); if ((expKind == ParsedExpressionKind::Indexer || expKind == ParsedExpressionKind::PeriodIndexer) && this->Peek()->GetKind() == TokenKind::AssignmentToken) { return this->ParseIndexAssignment(expression); } return new ParsedExpressionStatement(expression); } ParsedStatement *Parser::ParseVariableAssignment(const IToken *current) { bool isLocal = false; const IToken *identifier; if (current->GetKind() == TokenKind::LocalKeyword) { isLocal = true; identifier = this->Next(); } else { identifier = current; } auto assignmentToken = this->Next(); auto expression = this->ParseExpression(this->Next()); if (identifier->GetKind() != TokenKind::Identifier) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(), identifier->GetLength()); return new ParsedBadStatement(identifier->GetStartPosition(), identifier->GetLength()); } if (assignmentToken->GetKind() != TokenKind::AssignmentToken) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(), identifier->GetLength()); return new ParsedBadStatement(identifier->GetStartPosition(), identifier->GetLength()); } auto start = current->GetStartPosition(); return new ParsedAssignmentStatement(isLocal, ((IdentifierToken *) identifier)->GetValue(), expression, start, expression->GetEndPosition() - start); } ParsedStatement *Parser::ParseIndexAssignment(ParsedExpression *indexer) { this->Next(); // Consume assignment token auto valueExpression = this->ParseExpression(this->Next()); auto start = indexer->GetStartPosition(); return new ParsedIndexAssignmentStatement(indexer, valueExpression, start, valueExpression->GetEndPosition() - start); } ParsedStatement * Parser::ParseBlock(const vector &endTokens, const vector &openStatements) { auto statements = openStatements; auto start = this->_position; while (this->_position < this->_tokens.size()) { auto next = this->Next(); auto nextKind = next->GetKind(); if (std::find(endTokens.begin(), endTokens.end(), nextKind) != endTokens.end()) { break; } if (nextKind == TokenKind::EndOfFile) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, next->GetStartPosition(), next->GetLength()); break; } statements.push_back(this->ParseStatement(next)); } if (statements.empty()) { return new ParsedBlockStatement(statements, start); } return new ParsedBlockStatement(statements); } ParsedStatement *Parser::ParseFunctionDeclaration(const IToken *current) { auto functionIdentifierToken = this->Next(); auto openParenthesis = this->Next(); vector parameters; bool hasErrors = false; if (functionIdentifierToken->GetKind() != TokenKind::Identifier) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, functionIdentifierToken->GetStartPosition(), functionIdentifierToken->GetLength()); hasErrors = true; } if (openParenthesis->GetKind() != TokenKind::OpenParenthesis && !hasErrors) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, openParenthesis->GetStartPosition(), openParenthesis->GetLength()); hasErrors = true; } while (this->_position < this->_tokens.size()) { auto type = this->Next(); if (type->GetKind() == TokenKind::CloseParenthesis) { break; } auto identifier = this->Next(); auto next = this->Next(); if (type->GetKind() != TokenKind::Identifier && !hasErrors) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, type->GetStartPosition(), type->GetLength()); hasErrors = true; continue; } if (identifier->GetKind() != TokenKind::Identifier && !hasErrors) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(), identifier->GetLength()); hasErrors = true; continue; } if (type->GetKind() != TokenKind::Identifier || identifier->GetKind() != TokenKind::Identifier) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, type->GetStartPosition(), type->GetLength()); hasErrors = true; continue; } auto typeToken = (IdentifierToken *) type; auto identifierToken = (IdentifierToken *) identifier; parameters.push_back(new TypedVariableIdentifier(typeToken->GetValue(), identifierToken->GetValue())); auto nextKind = next->GetKind(); if (nextKind == TokenKind::CloseParenthesis || nextKind == TokenKind::EndOfFile) { break; } else if (nextKind != TokenKind::CommaToken && !hasErrors) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, next->GetStartPosition(), next->GetLength()); hasErrors = true; } } auto block = this->ParseBlock({TokenKind::EndKeyword}); auto start = current->GetStartPosition(); if (hasErrors) { return new ParsedBadStatement(start, block->GetEndPosition() - start); } if (block->GetKind() == ParsedStatementKind::Bad) { return new ParsedBadStatement(start, block->GetEndPosition() - start); } auto functionIdentifier = ((IdentifierToken *) functionIdentifierToken)->GetValue(); return new ParsedFunctionDeclarationStatement(HashedString(functionIdentifier), parameters, (ParsedBlockStatement *) block, start, block->GetEndPosition() - start); } ParsedStatement *Parser::ParseReturnStatement(const IToken *current) { //TODO: if next token is on a different line, don't parse it as return expression. auto expression = this->ParseExpression(this->Next()); auto start = current->GetStartPosition(); return new ParsedReturnStatement(expression, start, expression->GetEndPosition() - start); } ParsedStatement *Parser::ParseIfStatement(const IToken *current) { auto condition = this->ParseExpression(this->Next()); auto next = this->Next(); if (next->GetKind() != TokenKind::ThenKeyword) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, next->GetStartPosition(), next->GetLength()); return new ParsedBadStatement(next->GetStartPosition(), next->GetLength()); } auto block = this->ParseBlock({TokenKind::EndKeyword, TokenKind::ElseKeyword, TokenKind::ElseIfKeyword}); auto closeToken = this->PeekAt(-1); auto start = current->GetStartPosition(); if (closeToken->GetKind() == TokenKind::ElseIfKeyword) { auto elseIfStatement = this->ParseIfStatement(closeToken); return new ParsedConditionalStatement(condition, block, elseIfStatement, start, elseIfStatement->GetEndPosition() - start); } else if (closeToken->GetKind() == TokenKind::ElseKeyword) { auto elseStatement = this->ParseBlock({TokenKind::EndKeyword}); return new ParsedConditionalStatement(condition, block, elseStatement, start, elseStatement->GetEndPosition() - start); } return new ParsedConditionalStatement(condition, block, start, block->GetEndPosition() - start); } ParsedExpression *Parser::ParseExpression(const IToken *current) { auto expression = this->ParseBinaryExpression(current, OperatorPrecedence::No); auto peekKind = this->Peek()->GetKind(); while (peekKind == TokenKind::OpenParenthesis || peekKind == TokenKind::OpenSquareBracket || peekKind == TokenKind::PeriodToken) { if (peekKind == TokenKind::OpenParenthesis) { expression = this->ParseFunctionCallExpression(expression); } else if (peekKind == TokenKind::OpenSquareBracket) { expression = this->ParseIndexExpression(expression); } else { expression = this->ParsePeriodIndexExpression(expression); } if (this->_position >= this->_tokens.size()) break; peekKind = this->Peek()->GetKind(); } return expression; } OperatorPrecedence GetUnaryPrecedence(TokenKind kind) { switch (kind) { case TokenKind::PlusToken: case TokenKind::MinusToken: case TokenKind::NotKeyword: return OperatorPrecedence::Unary; default: return OperatorPrecedence::No; } } UnaryOperatorKind GetUnaryOperatorKind(TokenKind kind) { switch (kind) { case TokenKind::PlusToken: return UnaryOperatorKind::Identity; case TokenKind::MinusToken: return UnaryOperatorKind::Negation; case TokenKind::NotKeyword: return UnaryOperatorKind::LogicalNegation; default: // This should never trigger, so throw. throw; } } BinaryOperatorKind GetBinaryOperatorKind(TokenKind kind) { switch (kind) { // Math operators case TokenKind::PlusToken: return BinaryOperatorKind::Addition; case TokenKind::MinusToken: return BinaryOperatorKind::Subtraction; case TokenKind::StarToken: return BinaryOperatorKind::Multiplication; case TokenKind::SlashToken: return BinaryOperatorKind::Division; // Equality operators case TokenKind::EqualityToken: return BinaryOperatorKind::Equality; case TokenKind::InequalityToken: return BinaryOperatorKind::Inequality; case TokenKind::Less: return BinaryOperatorKind::Less; case TokenKind::LessEquals: return BinaryOperatorKind::LessOrEquals; case TokenKind::Greater: return BinaryOperatorKind::Greater; case TokenKind::GreaterEquals: return BinaryOperatorKind::GreaterOrEquals; // logical operators case TokenKind::AndKeyword: return BinaryOperatorKind::LogicalAnd; case TokenKind::OrKeyword: return BinaryOperatorKind::LogicalOr; default: // This should never trigger, so throw. throw; } } OperatorPrecedence GetBinaryPrecedence(TokenKind kind) { switch (kind) { // Math case TokenKind::PlusToken: return OperatorPrecedence::Additive; case TokenKind::MinusToken: return OperatorPrecedence::Additive; case TokenKind::StarToken: return OperatorPrecedence::Multiplication; case TokenKind::SlashToken: return OperatorPrecedence::Multiplication; // Equality case TokenKind::EqualityToken: return OperatorPrecedence::Equality; case TokenKind::InequalityToken: return OperatorPrecedence::Equality; case TokenKind::Less: return OperatorPrecedence::Equality; case TokenKind::LessEquals: return OperatorPrecedence::Equality; case TokenKind::Greater: return OperatorPrecedence::Equality; case TokenKind::GreaterEquals: return OperatorPrecedence::Equality; // Logical case TokenKind::AndKeyword: return OperatorPrecedence::LogicalAnd; case TokenKind::OrKeyword: return OperatorPrecedence::LogicalOr; default: return OperatorPrecedence::No; } } ParsedExpression *Parser::ParseBinaryExpression(const IToken *current, OperatorPrecedence parentPrecedence) { OperatorPrecedence unaryPrecedence = GetUnaryPrecedence(current->GetKind()); ParsedExpression *left; if (unaryPrecedence != OperatorPrecedence::No && unaryPrecedence >= parentPrecedence) { UnaryOperatorKind operatorKind = GetUnaryOperatorKind(current->GetKind()); auto next = this->Next(); auto operand = this->ParseBinaryExpression(next, unaryPrecedence); auto startPos = current->GetStartPosition(); left = new UnaryExpression(operatorKind, operand, startPos, operand->GetEndPosition() - startPos); } else { left = this->ParsePrimaryExpression(current); } while (true) { auto next = this->Peek(); OperatorPrecedence binaryPrecedence = GetBinaryPrecedence(next->GetKind()); if (binaryPrecedence == OperatorPrecedence::No || binaryPrecedence <= parentPrecedence) { break; } auto operatorKind = GetBinaryOperatorKind(next->GetKind()); this->Next(); auto right = this->ParseBinaryExpression(this->Next(), binaryPrecedence); auto startPos = left->GetStartPosition(); left = new BinaryExpression(operatorKind, left, right, startPos, right->GetEndPosition() - startPos); } return left; } ParsedExpression *Parser::ParsePrimaryExpression(const IToken *current) { switch (current->GetKind()) { case TokenKind::Integer: return new LiteralIntegerExpression((IntegerToken *) current); case TokenKind::Float: return new LiteralFloatExpression((FloatToken *) current); case TokenKind::String: return new LiteralStringExpression((StringToken *) current); case TokenKind::TrueKeyword: return new LiteralBoolExpression(current); case TokenKind::FalseKeyword: return new LiteralBoolExpression(current); case TokenKind::Identifier: return new VariableExpression((IdentifierToken *) current); case TokenKind::OpenParenthesis: return this->ParseParenthesizedExpression(current); case TokenKind::OpenCurlyBracket: return this->ParseTableExpression(current); // If we find a bad token here, we should have already logged it in the lexer, so don't log another error. case TokenKind::BadToken: return new BadExpression(current->GetStartPosition(), current->GetLength()); default: this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, current->GetStartPosition(), current->GetLength()); return new BadExpression(current->GetStartPosition(), current->GetLength()); } } ParsedExpression *Parser::ParseParenthesizedExpression(const IToken *current) { auto next = this->Next(); auto expression = this->ParseExpression(next); auto closeToken = this->Next(); if (closeToken->GetKind() != TokenKind::CloseParenthesis) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, closeToken->GetStartPosition(), closeToken->GetLength()); return new BadExpression(closeToken->GetStartPosition(), closeToken->GetLength()); } auto start = current->GetStartPosition(); return new ParenthesizedExpression(expression, start, closeToken->GetEndPosition() - start); } ParsedExpression *Parser::ParseFunctionCallExpression(ParsedExpression *functionExpression) { this->Next(); // consume the open parenthesis vector parameters; auto peeked = this->Peek(); auto peekedKind = peeked->GetKind(); if (peekedKind == TokenKind::CloseParenthesis) { this->Next(); } else { while (peekedKind != TokenKind::CloseParenthesis) { if (peekedKind == TokenKind::EndOfFile) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, peeked->GetStartPosition(), peeked->GetLength()); return new BadExpression(peeked->GetStartPosition(), peeked->GetLength()); } parameters.push_back(this->ParseExpression(this->Next())); peeked = this->Next(); peekedKind = peeked->GetKind(); if (peekedKind != TokenKind::CloseParenthesis && peekedKind != TokenKind::CommaToken) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, peeked->GetStartPosition(), peeked->GetLength()); return new BadExpression(peeked->GetStartPosition(), peeked->GetLength()); } } } auto start = functionExpression->GetStartPosition(); return new FunctionCallExpression(functionExpression, parameters, start, peeked->GetEndPosition() - start); } ParsedExpression *Parser::ParseIndexExpression(ParsedExpression *indexingExpression) { this->Next(); // consume '[' token auto indexExpression = this->ParseExpression(this->Next()); auto closeBracket = this->Next(); if (closeBracket->GetKind() != TokenKind::CloseSquareBracket) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, closeBracket->GetStartPosition(), closeBracket->GetLength()); return new BadExpression(closeBracket->GetStartPosition(), closeBracket->GetLength()); } auto start = indexingExpression->GetStartPosition(); return new IndexExpression(indexingExpression, indexExpression, start, closeBracket->GetEndPosition() - start); } ParsedExpression *Parser::ParsePeriodIndexExpression(ParsedExpression *indexingExpression) { this->Next(); // consume '.' token auto identifier = this->Next(); if (identifier->GetKind() != TokenKind::Identifier) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(), identifier->GetLength()); return new BadExpression(indexingExpression->GetStartPosition(), identifier->GetEndPosition() - indexingExpression->GetStartPosition()); } auto start = indexingExpression->GetStartPosition(); return new PeriodIndexExpression(indexingExpression, ((IdentifierToken *) identifier)->GetValue(), start, identifier->GetEndPosition() - start); } ParsedExpression *Parser::ParseTableExpression(const IToken *current) { if (this->Peek()->GetKind() == TokenKind::CloseCurlyBracket) { this->Next(); auto start = current->GetStartPosition(); return new ParsedNumericalTableExpression({}, start, this->Peek()->GetEndPosition() - start); } auto start = current->GetStartPosition(); auto firstItem = this->ParseStatement(this->Next()); // If the first item is an expression, and is followed by a comma, we're dealing with a simple {1, 2, 3} kind of array if (firstItem->GetKind() == ParsedStatementKind::Expression && (this->Peek()->GetKind() == TokenKind::CommaToken)) { auto expr = ((ParsedExpressionStatement *) firstItem)->GetExpression(); auto expressions = vector{expr}; auto n = this->Next(); // consume the comma bool hasErrors = false; while (n->GetKind() != TokenKind::CloseCurlyBracket) { auto expression = this->ParseExpression(this->Next()); expressions.push_back(expression); n = this->Next(); if (n->GetKind() != TokenKind::CommaToken && n->GetKind() != TokenKind::CloseCurlyBracket && !hasErrors) { this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, n->GetStartPosition(), n->GetLength()); hasErrors = true; } } if (hasErrors) { return new BadExpression(start, n->GetEndPosition() - start); } return new ParsedNumericalTableExpression(expressions, start, n->GetEndPosition() - start); } // Otherwise we have a more complex table, which can be defined by a block else { auto block = (ParsedBlockStatement *) this->ParseBlock({TokenKind::CloseCurlyBracket}, {firstItem}); auto closeToken = this->PeekAt(-1); return new ParsedTableExpression(block, start, closeToken->GetEndPosition() - start); } } }