PorygonLang/src/Parser/Parser.cpp

559 lines
27 KiB
C++

#include <utility>
#include <algorithm>
#include "Parser.hpp"
#include "ParsedStatements/ParsedStatement.hpp"
#include "UnaryOperatorKind.hpp"
#include "BinaryOperatorKind.hpp"
#include "TypedVariableIdentifier.hpp"
#include "ParsedExpressions/ParsedTableExpression.hpp"
namespace Porygon::Parser {
ParsedScriptStatement *Parser::Parse() {
vector<const ParsedStatement *> statements;
while (this->_position < this->_tokens.size()) {
auto next = this->Next();
if (next->GetKind() == TokenKind::EndOfFile) {
break;
}
statements.push_back(this->ParseStatement(next));
}
return new ParsedScriptStatement(statements);
}
const IToken *Parser::Peek() {
return this->_tokens[_position];
}
const IToken *Parser::PeekAt(int offset) {
return this->_tokens[_position + offset];
}
const IToken *Parser::Next() {
this->_position++;
return this->_tokens[_position - 1];
}
ParsedStatement *Parser::ParseStatement(const IToken *current) {
auto currentKind = current->GetKind();
switch (currentKind) {
case TokenKind::LocalKeyword:
return this->ParseVariableAssignment(current);
case TokenKind::FunctionKeyword:
return this->ParseFunctionDeclaration(current);
case TokenKind::ReturnKeyword:
return this->ParseReturnStatement(current);
case TokenKind::IfKeyword:
return this->ParseIfStatement(current);
case TokenKind ::ForKeyword:
return this->ParseForStatement(current);
default:
break;
}
if (this->Peek()->GetKind() == TokenKind::AssignmentToken) {
return ParseVariableAssignment(current);
}
auto expression = this->ParseExpression(current);
auto expKind = expression->GetKind();
if ((expKind == ParsedExpressionKind::Indexer || expKind == ParsedExpressionKind::PeriodIndexer)
&& this->Peek()->GetKind() == TokenKind::AssignmentToken) {
return this->ParseIndexAssignment(expression);
}
return new ParsedExpressionStatement(expression);
}
ParsedStatement *Parser::ParseVariableAssignment(const IToken *current) {
bool isLocal = false;
const IToken *identifier;
if (current->GetKind() == TokenKind::LocalKeyword) {
isLocal = true;
identifier = this->Next();
} else {
identifier = current;
}
auto assignmentToken = this->Next();
auto expression = this->ParseExpression(this->Next());
if (identifier->GetKind() != TokenKind::Identifier) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(),
identifier->GetLength());
return new ParsedBadStatement(identifier->GetStartPosition(), identifier->GetLength());
}
if (assignmentToken->GetKind() != TokenKind::AssignmentToken) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(),
identifier->GetLength());
return new ParsedBadStatement(identifier->GetStartPosition(), identifier->GetLength());
}
auto start = current->GetStartPosition();
return new ParsedAssignmentStatement(isLocal, ((IdentifierToken *) identifier)->GetValue(), expression, start,
expression->GetEndPosition() - start);
}
ParsedStatement *Parser::ParseIndexAssignment(ParsedExpression *indexer) {
this->Next(); // Consume assignment token
auto valueExpression = this->ParseExpression(this->Next());
auto start = indexer->GetStartPosition();
return new ParsedIndexAssignmentStatement(indexer, valueExpression, start,
valueExpression->GetEndPosition() - start);
}
ParsedStatement *
Parser::ParseBlock(const vector<TokenKind> &endTokens, const vector<const ParsedStatement *> &openStatements) {
auto statements = openStatements;
auto start = this->_position;
while (this->_position < this->_tokens.size()) {
auto next = this->Next();
auto nextKind = next->GetKind();
if (std::find(endTokens.begin(), endTokens.end(), nextKind) != endTokens.end()) {
break;
}
if (nextKind == TokenKind::EndOfFile) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, next->GetStartPosition(),
next->GetLength());
break;
}
statements.push_back(this->ParseStatement(next));
}
if (statements.empty()) {
return new ParsedBlockStatement(statements, start);
}
return new ParsedBlockStatement(statements);
}
ParsedStatement *Parser::ParseFunctionDeclaration(const IToken *current) {
auto functionIdentifierToken = this->Next();
auto openParenthesis = this->Next();
vector<TypedVariableIdentifier *> parameters;
bool hasErrors = false;
if (functionIdentifierToken->GetKind() != TokenKind::Identifier) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken,
functionIdentifierToken->GetStartPosition(),
functionIdentifierToken->GetLength());
hasErrors = true;
}
if (openParenthesis->GetKind() != TokenKind::OpenParenthesis && !hasErrors) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken,
openParenthesis->GetStartPosition(), openParenthesis->GetLength());
hasErrors = true;
}
while (this->_position < this->_tokens.size()) {
auto type = this->Next();
if (type->GetKind() == TokenKind::CloseParenthesis) {
break;
}
auto identifier = this->Next();
auto next = this->Next();
if (type->GetKind() != TokenKind::Identifier && !hasErrors) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, type->GetStartPosition(),
type->GetLength());
hasErrors = true;
continue;
}
if (identifier->GetKind() != TokenKind::Identifier && !hasErrors) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(),
identifier->GetLength());
hasErrors = true;
continue;
}
if (type->GetKind() != TokenKind::Identifier || identifier->GetKind() != TokenKind::Identifier) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, type->GetStartPosition(),
type->GetLength());
hasErrors = true;
continue;
}
auto typeToken = (IdentifierToken *) type;
auto identifierToken = (IdentifierToken *) identifier;
parameters.push_back(new TypedVariableIdentifier(typeToken->GetValue(), identifierToken->GetValue()));
auto nextKind = next->GetKind();
if (nextKind == TokenKind::CloseParenthesis || nextKind == TokenKind::EndOfFile) {
break;
} else if (nextKind != TokenKind::CommaToken && !hasErrors) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, next->GetStartPosition(),
next->GetLength());
hasErrors = true;
}
}
auto block = this->ParseBlock({TokenKind::EndKeyword});
auto start = current->GetStartPosition();
if (hasErrors) {
return new ParsedBadStatement(start, block->GetEndPosition() - start);
}
if (block->GetKind() == ParsedStatementKind::Bad) {
return new ParsedBadStatement(start, block->GetEndPosition() - start);
}
auto functionIdentifier = ((IdentifierToken *) functionIdentifierToken)->GetValue();
return new ParsedFunctionDeclarationStatement(HashedString(functionIdentifier), parameters,
(ParsedBlockStatement *) block, start,
block->GetEndPosition() - start);
}
ParsedStatement *Parser::ParseReturnStatement(const IToken *current) {
auto start = current->GetStartPosition();
auto startLine = this -> ScriptData -> Diagnostics ->GetLineFromPosition(start);
if (startLine != this -> ScriptData -> Diagnostics -> GetLineFromPosition(this -> Peek() -> GetStartPosition())){
return new ParsedReturnStatement(nullptr, start, current->GetLength());
}
auto expression = this->ParseExpression(this->Next());
return new ParsedReturnStatement(expression, start, expression->GetEndPosition() - start);
}
ParsedStatement *Parser::ParseIfStatement(const IToken *current) {
auto condition = this->ParseExpression(this->Next());
auto next = this->Next();
if (next->GetKind() != TokenKind::ThenKeyword) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, next->GetStartPosition(),
next->GetLength());
return new ParsedBadStatement(next->GetStartPosition(), next->GetLength());
}
auto block = this->ParseBlock({TokenKind::EndKeyword, TokenKind::ElseKeyword, TokenKind::ElseIfKeyword});
auto closeToken = this->PeekAt(-1);
auto start = current->GetStartPosition();
if (closeToken->GetKind() == TokenKind::ElseIfKeyword) {
auto elseIfStatement = this->ParseIfStatement(closeToken);
return new ParsedConditionalStatement(condition, block, elseIfStatement, start,
elseIfStatement->GetEndPosition() - start);
} else if (closeToken->GetKind() == TokenKind::ElseKeyword) {
auto elseStatement = this->ParseBlock({TokenKind::EndKeyword});
return new ParsedConditionalStatement(condition, block, elseStatement, start,
elseStatement->GetEndPosition() - start);
}
return new ParsedConditionalStatement(condition, block, start, block->GetEndPosition() - start);
}
ParsedStatement *Parser::ParseForStatement(const IToken *current) {
auto identifier = this -> Next();
if (this -> Peek()->GetKind() == TokenKind::AssignmentToken){
return ParseNumericForStatement(identifier);
} else {
return ParseGenericForStatement(identifier);
}
}
ParsedStatement *Parser::ParseNumericForStatement(const IToken *current) {
auto identifier = (IdentifierToken*)current;
this->Next(); // consume assignment token
bool hasErrors = false;
auto start = this ->ParseExpression(this ->Next());
auto comma = this -> Next(); // consume comma token
if (comma->GetKind() != TokenKind::CommaToken){
hasErrors = true;
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, comma->GetStartPosition(),
comma->GetLength());
}
auto end = this -> ParseExpression(this -> Next());
ParsedExpression *step = nullptr;
if (this -> Peek()->GetKind() == TokenKind::CommaToken){
this -> Next();
step = this -> ParseExpression(this -> Next());
}
auto doToken = this ->Next();
if (doToken->GetKind() != TokenKind::DoKeyword && !hasErrors){
hasErrors = true;
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, doToken->GetStartPosition(),
doToken->GetLength());
}
auto block = this -> ParseBlock({TokenKind ::EndKeyword});
auto startPos = current->GetStartPosition();
if (hasErrors){
return new ParsedBadStatement(startPos, block -> GetEndPosition() - startPos);
}
return new ParsedNumericalForStatement(identifier->GetValue(), start, end, step, block, startPos, block->GetEndPosition() - startPos);
}
ParsedStatement *Parser::ParseGenericForStatement(const IToken *current) {
return nullptr;
}
/////////////////
// Expressions //
/////////////////
ParsedExpression *Parser::ParseExpression(const IToken *current) {
auto expression = this->ParseBinaryExpression(current, OperatorPrecedence::No);
auto peekKind = this->Peek()->GetKind();
while (peekKind == TokenKind::OpenParenthesis ||
peekKind == TokenKind::OpenSquareBracket ||
peekKind == TokenKind::PeriodToken) {
if (peekKind == TokenKind::OpenParenthesis) {
expression = this->ParseFunctionCallExpression(expression);
} else if (peekKind == TokenKind::OpenSquareBracket) {
expression = this->ParseIndexExpression(expression);
} else {
expression = this->ParsePeriodIndexExpression(expression);
}
if (this->_position >= this->_tokens.size())
break;
peekKind = this->Peek()->GetKind();
}
return expression;
}
OperatorPrecedence GetUnaryPrecedence(TokenKind kind) {
switch (kind) {
case TokenKind::PlusToken:
case TokenKind::MinusToken:
case TokenKind::NotKeyword:
return OperatorPrecedence::Unary;
default:
return OperatorPrecedence::No;
}
}
UnaryOperatorKind GetUnaryOperatorKind(TokenKind kind) {
switch (kind) {
case TokenKind::PlusToken:
return UnaryOperatorKind::Identity;
case TokenKind::MinusToken:
return UnaryOperatorKind::Negation;
case TokenKind::NotKeyword:
return UnaryOperatorKind::LogicalNegation;
default: // This should never trigger, so throw.
throw;
}
}
BinaryOperatorKind GetBinaryOperatorKind(TokenKind kind) {
switch (kind) {
// Math operators
case TokenKind::PlusToken:
return BinaryOperatorKind::Addition;
case TokenKind::MinusToken:
return BinaryOperatorKind::Subtraction;
case TokenKind::StarToken:
return BinaryOperatorKind::Multiplication;
case TokenKind::SlashToken:
return BinaryOperatorKind::Division;
// Equality operators
case TokenKind::EqualityToken:
return BinaryOperatorKind::Equality;
case TokenKind::InequalityToken:
return BinaryOperatorKind::Inequality;
case TokenKind::Less:
return BinaryOperatorKind::Less;
case TokenKind::LessEquals:
return BinaryOperatorKind::LessOrEquals;
case TokenKind::Greater:
return BinaryOperatorKind::Greater;
case TokenKind::GreaterEquals:
return BinaryOperatorKind::GreaterOrEquals;
// logical operators
case TokenKind::AndKeyword:
return BinaryOperatorKind::LogicalAnd;
case TokenKind::OrKeyword:
return BinaryOperatorKind::LogicalOr;
default: // This should never trigger, so throw.
throw;
}
}
OperatorPrecedence GetBinaryPrecedence(TokenKind kind) {
switch (kind) {
// Math
case TokenKind::PlusToken:
return OperatorPrecedence::Additive;
case TokenKind::MinusToken:
return OperatorPrecedence::Additive;
case TokenKind::StarToken:
return OperatorPrecedence::Multiplication;
case TokenKind::SlashToken:
return OperatorPrecedence::Multiplication;
// Equality
case TokenKind::EqualityToken:
return OperatorPrecedence::Equality;
case TokenKind::InequalityToken:
return OperatorPrecedence::Equality;
case TokenKind::Less:
return OperatorPrecedence::Equality;
case TokenKind::LessEquals:
return OperatorPrecedence::Equality;
case TokenKind::Greater:
return OperatorPrecedence::Equality;
case TokenKind::GreaterEquals:
return OperatorPrecedence::Equality;
// Logical
case TokenKind::AndKeyword:
return OperatorPrecedence::LogicalAnd;
case TokenKind::OrKeyword:
return OperatorPrecedence::LogicalOr;
default:
return OperatorPrecedence::No;
}
}
ParsedExpression *Parser::ParseBinaryExpression(const IToken *current, OperatorPrecedence parentPrecedence) {
OperatorPrecedence unaryPrecedence = GetUnaryPrecedence(current->GetKind());
ParsedExpression *left;
if (unaryPrecedence != OperatorPrecedence::No && unaryPrecedence >= parentPrecedence) {
UnaryOperatorKind operatorKind = GetUnaryOperatorKind(current->GetKind());
auto next = this->Next();
auto operand = this->ParseBinaryExpression(next, unaryPrecedence);
auto startPos = current->GetStartPosition();
left = new UnaryExpression(operatorKind, operand, startPos, operand->GetEndPosition() - startPos);
} else {
left = this->ParsePrimaryExpression(current);
}
while (true) {
auto next = this->Peek();
OperatorPrecedence binaryPrecedence = GetBinaryPrecedence(next->GetKind());
if (binaryPrecedence == OperatorPrecedence::No || binaryPrecedence <= parentPrecedence) {
break;
}
auto operatorKind = GetBinaryOperatorKind(next->GetKind());
this->Next();
auto right = this->ParseBinaryExpression(this->Next(), binaryPrecedence);
auto startPos = left->GetStartPosition();
left = new BinaryExpression(operatorKind, left, right, startPos, right->GetEndPosition() - startPos);
}
return left;
}
ParsedExpression *Parser::ParsePrimaryExpression(const IToken *current) {
switch (current->GetKind()) {
case TokenKind::Integer:
return new LiteralIntegerExpression((IntegerToken *) current);
case TokenKind::Float:
return new LiteralFloatExpression((FloatToken *) current);
case TokenKind::String:
return new LiteralStringExpression((StringToken *) current);
case TokenKind::TrueKeyword:
return new LiteralBoolExpression(current);
case TokenKind::FalseKeyword:
return new LiteralBoolExpression(current);
case TokenKind::Identifier:
return new VariableExpression((IdentifierToken *) current);
case TokenKind::OpenParenthesis:
return this->ParseParenthesizedExpression(current);
case TokenKind::OpenCurlyBracket:
return this->ParseTableExpression(current);
// If we find a bad token here, we should have already logged it in the lexer, so don't log another error.
case TokenKind::BadToken:
return new BadExpression(current->GetStartPosition(), current->GetLength());
default:
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, current->GetStartPosition(),
current->GetLength());
return new BadExpression(current->GetStartPosition(), current->GetLength());
}
}
ParsedExpression *Parser::ParseParenthesizedExpression(const IToken *current) {
auto next = this->Next();
auto expression = this->ParseExpression(next);
auto closeToken = this->Next();
if (closeToken->GetKind() != TokenKind::CloseParenthesis) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, closeToken->GetStartPosition(),
closeToken->GetLength());
return new BadExpression(closeToken->GetStartPosition(), closeToken->GetLength());
}
auto start = current->GetStartPosition();
return new ParenthesizedExpression(expression, start, closeToken->GetEndPosition() - start);
}
ParsedExpression *Parser::ParseFunctionCallExpression(ParsedExpression *functionExpression) {
this->Next(); // consume the open parenthesis
vector<const ParsedExpression *> parameters;
auto peeked = this->Peek();
auto peekedKind = peeked->GetKind();
if (peekedKind == TokenKind::CloseParenthesis) {
this->Next();
} else {
while (peekedKind != TokenKind::CloseParenthesis) {
if (peekedKind == TokenKind::EndOfFile) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, peeked->GetStartPosition(),
peeked->GetLength());
return new BadExpression(peeked->GetStartPosition(), peeked->GetLength());
}
parameters.push_back(this->ParseExpression(this->Next()));
peeked = this->Next();
peekedKind = peeked->GetKind();
if (peekedKind != TokenKind::CloseParenthesis && peekedKind != TokenKind::CommaToken) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, peeked->GetStartPosition(),
peeked->GetLength());
return new BadExpression(peeked->GetStartPosition(), peeked->GetLength());
}
}
}
auto start = functionExpression->GetStartPosition();
return new FunctionCallExpression(functionExpression, parameters, start, peeked->GetEndPosition() - start);
}
ParsedExpression *Parser::ParseIndexExpression(ParsedExpression *indexingExpression) {
this->Next(); // consume '[' token
auto indexExpression = this->ParseExpression(this->Next());
auto closeBracket = this->Next();
if (closeBracket->GetKind() != TokenKind::CloseSquareBracket) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, closeBracket->GetStartPosition(),
closeBracket->GetLength());
return new BadExpression(closeBracket->GetStartPosition(), closeBracket->GetLength());
}
auto start = indexingExpression->GetStartPosition();
return new IndexExpression(indexingExpression, indexExpression, start, closeBracket->GetEndPosition() - start);
}
ParsedExpression *Parser::ParsePeriodIndexExpression(ParsedExpression *indexingExpression) {
this->Next(); // consume '.' token
auto identifier = this->Next();
if (identifier->GetKind() != TokenKind::Identifier) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(),
identifier->GetLength());
return new BadExpression(indexingExpression->GetStartPosition(),
identifier->GetEndPosition() - indexingExpression->GetStartPosition());
}
auto start = indexingExpression->GetStartPosition();
return new PeriodIndexExpression(indexingExpression, ((IdentifierToken *) identifier)->GetValue(), start,
identifier->GetEndPosition() - start);
}
ParsedExpression *Parser::ParseTableExpression(const IToken *current) {
if (this->Peek()->GetKind() == TokenKind::CloseCurlyBracket) {
this->Next();
auto start = current->GetStartPosition();
return new ParsedNumericalTableExpression({}, start, this->Peek()->GetEndPosition() - start);
}
auto start = current->GetStartPosition();
auto firstItem = this->ParseStatement(this->Next());
// If the first item is an expression, and is followed by a comma, we're dealing with a simple {1, 2, 3} kind of array
if (firstItem->GetKind() == ParsedStatementKind::Expression &&
(this->Peek()->GetKind() == TokenKind::CommaToken)) {
auto statement = ((ParsedExpressionStatement *) firstItem);
auto expr = statement->GetExpression();
statement->NullifyExpression();
delete statement;
auto expressions = vector<const ParsedExpression *>{expr};
auto n = this->Next(); // consume the comma
bool hasErrors = false;
while (n->GetKind() != TokenKind::CloseCurlyBracket) {
auto expression = this->ParseExpression(this->Next());
expressions.push_back(expression);
n = this->Next();
if (n->GetKind() != TokenKind::CommaToken && n->GetKind() != TokenKind::CloseCurlyBracket &&
!hasErrors) {
this->ScriptData->Diagnostics->LogError(Diagnostics::DiagnosticCode::UnexpectedToken, n->GetStartPosition(),
n->GetLength());
hasErrors = true;
}
}
if (hasErrors) {
return new BadExpression(start, n->GetEndPosition() - start);
}
return new ParsedNumericalTableExpression(expressions, start, n->GetEndPosition() - start);
}
// Otherwise we have a more complex table, which can be defined by a block
else {
auto block = (ParsedBlockStatement *) this->ParseBlock({TokenKind::CloseCurlyBracket}, {firstItem});
auto closeToken = this->PeekAt(-1);
return new ParsedTableExpression(block, start, closeToken->GetEndPosition() - start);
}
}
}