PorygonLang/src/Parser/Parser.cpp

409 lines
19 KiB
C++
Raw Normal View History

#include <utility>
2019-05-20 15:45:03 +00:00
#include <algorithm>
2019-05-20 15:45:03 +00:00
#include "Parser.hpp"
2019-06-01 12:56:28 +00:00
#include "ParsedStatements/ParsedStatement.hpp"
2019-05-20 15:45:03 +00:00
#include "UnaryOperatorKind.hpp"
#include "BinaryOperatorKind.hpp"
#include "TypedVariableIdentifier.hpp"
2019-06-12 13:19:28 +00:00
#include "ParsedExpressions/ParsedTableExpression.hpp"
2019-05-20 15:45:03 +00:00
ParsedScriptStatement* Parser::Parse() {
vector<const ParsedStatement*> statements;
while (this->_position < this->_tokens.size()){
2019-05-21 12:00:14 +00:00
auto next = this -> Next();
if (next->GetKind() == TokenKind::EndOfFile){
2019-05-20 15:45:03 +00:00
break;
}
2019-05-21 12:00:14 +00:00
statements.push_back(this -> ParseStatement(next));
2019-05-20 15:45:03 +00:00
}
return new ParsedScriptStatement(statements);
}
2019-05-26 12:20:40 +00:00
IToken *Parser::Peek() {
return this -> _tokens[_position];
}
IToken *Parser::PeekAt(int offset) {
return this -> _tokens[_position + offset];
}
2019-05-26 12:20:40 +00:00
IToken *Parser::Next() {
this -> _position++;
return this -> _tokens[_position - 1];
}
2019-05-20 15:45:03 +00:00
ParsedStatement* Parser::ParseStatement(IToken* current){
auto currentKind = current->GetKind();
switch (currentKind){
case TokenKind ::LocalKeyword: return this -> ParseAssignment(current);
case TokenKind ::FunctionKeyword: return this -> ParseFunctionDeclaration(current);
2019-06-07 13:23:13 +00:00
case TokenKind ::ReturnKeyword: return this->ParseReturnStatement(current);
2019-06-09 10:48:14 +00:00
case TokenKind ::IfKeyword: return this -> ParseIfStatement(current);
default: break;
}
if (this->Peek()->GetKind() == TokenKind::AssignmentToken){
2019-05-26 12:20:40 +00:00
return ParseAssignment(current);
}
2019-05-21 12:00:14 +00:00
return new ParsedExpressionStatement(this -> ParseExpression(current));
2019-05-20 15:45:03 +00:00
}
2019-05-26 12:20:40 +00:00
ParsedStatement *Parser::ParseAssignment(IToken *current) {
bool isLocal = false;
IToken* identifier;
if (current -> GetKind() == TokenKind::LocalKeyword){
isLocal = true;
identifier = this -> Next();
} else{
identifier = current;
}
auto assignmentToken = this->Next();
auto expression = this -> ParseExpression(this -> Next());
if (identifier -> GetKind() != TokenKind::Identifier){
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(), identifier->GetLength());
return new ParsedBadStatement(identifier->GetStartPosition(), identifier->GetLength());
}
if (assignmentToken -> GetKind() != TokenKind::AssignmentToken){
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(), identifier->GetLength());
return new ParsedBadStatement(identifier->GetStartPosition(), identifier->GetLength());
}
auto start = current -> GetStartPosition();
return new ParsedAssignmentStatement(isLocal, ((IdentifierToken*)identifier) -> Value, expression, start, expression->GetEndPosition() - start);
}
ParsedStatement *Parser::ParseBlock(const vector<TokenKind>& endTokens, const vector<const ParsedStatement*>& openStatements) {
auto statements = openStatements;
auto start = this->_position;
while (this->_position < this->_tokens.size()){
auto next = this -> Next();
auto nextKind = next->GetKind();
if (std::find(endTokens.begin(), endTokens.end(), nextKind) != endTokens.end()){
break;
}
if (nextKind == TokenKind::EndOfFile){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, next->GetStartPosition(), next->GetLength());
break;
}
statements.push_back(this -> ParseStatement(next));
}
2019-06-09 10:48:14 +00:00
if (statements.empty()){
return new ParsedBlockStatement(statements,start);
}
return new ParsedBlockStatement(statements);
}
ParsedStatement *Parser::ParseFunctionDeclaration(IToken *current) {
auto functionIdentifierToken = this->Next();
auto openParenthesis = this->Next();
vector<TypedVariableIdentifier*> parameters;
bool hasErrors = false;
if (functionIdentifierToken->GetKind() != TokenKind::Identifier){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, functionIdentifierToken->GetStartPosition(), functionIdentifierToken->GetLength());
hasErrors = true;
}
if (openParenthesis->GetKind() != TokenKind::OpenParenthesis && !hasErrors){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, openParenthesis->GetStartPosition(), openParenthesis->GetLength());
hasErrors = true;
}
while (this -> _position < this->_tokens.size()){
auto type = this->Next();
if (type->GetKind() == TokenKind::CloseParenthesis){
break;
}
auto identifier = this->Next();
auto next = this->Next();
if (type->GetKind() != TokenKind::Identifier &&!hasErrors){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, type->GetStartPosition(), type->GetLength());
hasErrors = true;
continue;
}
if (identifier->GetKind() != TokenKind::Identifier &&!hasErrors){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, identifier->GetStartPosition(), identifier->GetLength());
hasErrors = true;
continue;
}
if (type->GetKind() != TokenKind::Identifier || identifier->GetKind() != TokenKind::Identifier){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, type->GetStartPosition(), type->GetLength());
hasErrors = true;
continue;
}
auto typeToken = (IdentifierToken*)type;
auto identifierToken = (IdentifierToken*)identifier;
parameters.push_back(new TypedVariableIdentifier(HashedString(typeToken->Value), HashedString(identifierToken->Value)));
auto nextKind = next->GetKind();
if (nextKind == TokenKind::CloseParenthesis || nextKind == TokenKind::EndOfFile){
break;
} else if (nextKind != TokenKind::CommaToken && !hasErrors){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, next->GetStartPosition(), next->GetLength());
hasErrors = true;
}
}
auto block = this -> ParseBlock({TokenKind ::EndKeyword});
auto start = current->GetStartPosition();
if (hasErrors){
return new ParsedBadStatement(start, block->GetEndPosition() - start);
}
if (block->GetKind() == ParsedStatementKind::Bad){
return new ParsedBadStatement(start, block->GetEndPosition() - start);
}
auto functionIdentifier = ((IdentifierToken*) functionIdentifierToken)->Value;
return new ParsedFunctionDeclarationStatement(HashedString(functionIdentifier), parameters, (ParsedBlockStatement*)block, start, block->GetEndPosition() - start);
2019-06-07 13:23:13 +00:00
}
2019-06-07 13:23:13 +00:00
ParsedStatement* Parser::ParseReturnStatement(IToken* current){
//TODO: if next token is on a different line, don't parse it as return expression.
auto expression = this->ParseExpression(this->Next());
auto start = current->GetStartPosition();
return new ParsedReturnStatement(expression, start, expression->GetEndPosition() - start);
}
2019-06-09 10:48:14 +00:00
ParsedStatement* Parser::ParseIfStatement(IToken* current){
auto condition = this->ParseExpression(this->Next());
auto next = this -> Next();
if (next->GetKind() != TokenKind::ThenKeyword){
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedToken, next->GetStartPosition(), next->GetLength());
return new ParsedBadStatement(next->GetStartPosition(), next->GetLength());
}
auto block = this -> ParseBlock({TokenKind ::EndKeyword, TokenKind ::ElseKeyword, TokenKind ::ElseIfKeyword});
auto closeToken = this->PeekAt(-1);
auto start = current->GetStartPosition();
if (closeToken->GetKind() == TokenKind::ElseIfKeyword){
2019-06-09 10:48:14 +00:00
auto elseIfStatement = this -> ParseIfStatement(closeToken);
return new ParsedConditionalStatement(condition, block, elseIfStatement, start, elseIfStatement->GetEndPosition() - start);
} else if (closeToken->GetKind() == TokenKind::ElseKeyword){
auto elseStatement = this -> ParseBlock({TokenKind ::EndKeyword});
return new ParsedConditionalStatement(condition, block, elseStatement, start, elseStatement->GetEndPosition() - start);
}
return new ParsedConditionalStatement(condition, block, start, block->GetEndPosition() - start);
}
2019-05-20 15:45:03 +00:00
ParsedExpression* Parser::ParseExpression(IToken* current){
auto expression = this -> ParseBinaryExpression(current, OperatorPrecedence::No);
auto peekKind = this->Peek()->GetKind();
while (peekKind == TokenKind::OpenParenthesis ||
peekKind == TokenKind::OpenSquareBracket ||
peekKind == TokenKind::PeriodToken){
if (peekKind == TokenKind::OpenParenthesis){
expression = this->ParseFunctionCallExpression(expression);
} else if (peekKind == TokenKind::OpenSquareBracket){
expression = this->ParseIndexExpression(expression);
} else {
//TODO: index period expression
}
if (this -> _position >= this->_tokens.size())
break;
peekKind = this->Peek()->GetKind();
}
return expression;
2019-05-20 15:45:03 +00:00
}
OperatorPrecedence GetUnaryPrecedence(TokenKind kind){
switch (kind){
case TokenKind::PlusToken:
case TokenKind::MinusToken:
case TokenKind::NotKeyword:
return OperatorPrecedence::Unary;
default:
return OperatorPrecedence::No;
}
}
UnaryOperatorKind GetUnaryOperatorKind(TokenKind kind){
switch (kind){
case TokenKind::PlusToken: return UnaryOperatorKind::Identity;
case TokenKind::MinusToken: return UnaryOperatorKind::Negation;
case TokenKind::NotKeyword: return UnaryOperatorKind::LogicalNegation;
2019-05-21 12:00:14 +00:00
default: // This should never trigger, so throw.
2019-05-20 15:45:03 +00:00
throw;
}
}
BinaryOperatorKind GetBinaryOperatorKind(TokenKind kind){
switch (kind){
// Math operators
2019-05-20 15:45:03 +00:00
case TokenKind::PlusToken: return BinaryOperatorKind ::Addition;
case TokenKind::MinusToken: return BinaryOperatorKind ::Subtraction;
case TokenKind::StarToken: return BinaryOperatorKind ::Multiplication;
case TokenKind::SlashToken: return BinaryOperatorKind ::Division;
// Equality operators
2019-05-20 15:45:03 +00:00
case TokenKind::EqualityToken: return BinaryOperatorKind ::Equality;
2019-05-25 12:17:52 +00:00
case TokenKind::InequalityToken: return BinaryOperatorKind ::Inequality;
case TokenKind ::Less: return BinaryOperatorKind ::Less;
case TokenKind ::LessEquals: return BinaryOperatorKind ::LessOrEquals;
case TokenKind ::Greater: return BinaryOperatorKind ::Greater;
case TokenKind ::GreaterEquals: return BinaryOperatorKind ::GreaterOrEquals;
// logical operators
2019-05-20 15:45:03 +00:00
case TokenKind::AndKeyword: return BinaryOperatorKind ::LogicalAnd;
case TokenKind::OrKeyword: return BinaryOperatorKind ::LogicalOr;
2019-05-21 12:00:14 +00:00
default: // This should never trigger, so throw.
2019-05-20 15:45:03 +00:00
throw;
}
}
OperatorPrecedence GetBinaryPrecedence(TokenKind kind){
switch (kind){
// Math
2019-05-20 15:45:03 +00:00
case TokenKind::PlusToken: return OperatorPrecedence ::Additive;
case TokenKind::MinusToken: return OperatorPrecedence ::Additive;
case TokenKind::StarToken: return OperatorPrecedence ::Multiplication;
case TokenKind::SlashToken: return OperatorPrecedence ::Multiplication;
// Equality
2019-05-20 15:45:03 +00:00
case TokenKind::EqualityToken: return OperatorPrecedence ::Equality;
2019-05-25 12:17:52 +00:00
case TokenKind::InequalityToken: return OperatorPrecedence ::Equality;
case TokenKind ::Less: return OperatorPrecedence ::Equality;
case TokenKind ::LessEquals: return OperatorPrecedence ::Equality;
case TokenKind ::Greater: return OperatorPrecedence ::Equality;
case TokenKind ::GreaterEquals: return OperatorPrecedence ::Equality;
// Logical
2019-05-20 15:45:03 +00:00
case TokenKind::AndKeyword: return OperatorPrecedence ::LogicalAnd;
case TokenKind::OrKeyword: return OperatorPrecedence ::LogicalOr;
default:
return OperatorPrecedence::No;
}
}
ParsedExpression* Parser::ParseBinaryExpression(IToken* current, OperatorPrecedence parentPrecedence){
OperatorPrecedence unaryPrecedence = GetUnaryPrecedence(current -> GetKind());
ParsedExpression* left;
if (unaryPrecedence != OperatorPrecedence::No && unaryPrecedence >= parentPrecedence){
UnaryOperatorKind operatorKind = GetUnaryOperatorKind(current -> GetKind());
2019-05-21 12:00:14 +00:00
auto next = this -> Next();
auto operand = this -> ParseBinaryExpression(next, unaryPrecedence);
2019-05-20 15:45:03 +00:00
auto startPos = current -> GetStartPosition();
left = new UnaryExpression(operatorKind, operand, startPos, operand -> GetEndPosition() - startPos);
} else{
2019-05-21 12:00:14 +00:00
left = this -> ParsePrimaryExpression(current);
2019-05-20 15:45:03 +00:00
}
while (true){
2019-05-21 12:00:14 +00:00
auto next = this -> Peek();
2019-05-20 15:45:03 +00:00
OperatorPrecedence binaryPrecedence = GetBinaryPrecedence(next -> GetKind());
if (binaryPrecedence == OperatorPrecedence::No || binaryPrecedence <= parentPrecedence){
break;
}
auto operatorKind = GetBinaryOperatorKind(next -> GetKind());
2019-05-21 12:00:14 +00:00
this -> Next();
auto right = this -> ParseBinaryExpression(this -> Next(), binaryPrecedence);
2019-05-20 15:45:03 +00:00
auto startPos = left -> GetStartPosition();
left = new BinaryExpression(operatorKind, left, right, startPos, right -> GetEndPosition() - startPos);
}
return left;
}
ParsedExpression *Parser::ParsePrimaryExpression(IToken *current) {
switch (current -> GetKind()){
case TokenKind ::Integer: return new LiteralIntegerExpression((IntegerToken*)current);
case TokenKind ::Float: return new LiteralFloatExpression((FloatToken*)current);
2019-05-22 11:29:35 +00:00
case TokenKind ::String: return new LiteralStringExpression((StringToken*)current);
2019-05-20 15:45:03 +00:00
case TokenKind ::TrueKeyword: return new LiteralBoolExpression(current);
case TokenKind ::FalseKeyword: return new LiteralBoolExpression(current);
case TokenKind ::Identifier: return new VariableExpression((IdentifierToken*)current);
case TokenKind ::OpenParenthesis: return this -> ParseParenthesizedExpression(current);
2019-06-09 18:15:09 +00:00
case TokenKind ::OpenCurlyBracket: return this -> ParseTableExpression(current);
// If we find a bad token here, we should have already logged it in the lexer, so don't log another error.
case TokenKind ::BadToken: return new BadExpression(current->GetStartPosition(), current->GetLength());
2019-05-20 15:45:03 +00:00
default:
2019-05-21 12:15:39 +00:00
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedToken, current->GetStartPosition(), current->GetLength());
2019-05-21 12:00:14 +00:00
return new BadExpression(current->GetStartPosition(), current->GetLength());
2019-05-20 15:45:03 +00:00
}
}
ParsedExpression *Parser::ParseParenthesizedExpression(IToken *current) {
auto next = this -> Next();
auto expression = this -> ParseExpression(next);
auto closeToken = this -> Next();
if (closeToken -> GetKind() != TokenKind::CloseParenthesis){
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedToken, closeToken->GetStartPosition(), closeToken->GetLength());
return new BadExpression(closeToken->GetStartPosition(), closeToken->GetLength());
}
auto start = current -> GetStartPosition();
return new ParenthesizedExpression(expression, start, closeToken->GetEndPosition() - start);
}
2019-06-01 12:56:28 +00:00
ParsedExpression *Parser::ParseFunctionCallExpression(ParsedExpression* functionExpression) {
this -> Next(); // consume the open parenthesis
vector<const ParsedExpression*> parameters;
2019-06-01 12:56:28 +00:00
auto peeked = this -> Peek();
auto peekedKind = peeked->GetKind();
if (peekedKind == TokenKind::CloseParenthesis){
this->Next();
} else{
while (peekedKind != TokenKind::CloseParenthesis){
if (peekedKind == TokenKind ::EndOfFile){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, peeked->GetStartPosition(), peeked->GetLength());
return new BadExpression(peeked->GetStartPosition(), peeked->GetLength());
}
parameters.push_back(this->ParseExpression(this->Next()));
peeked = this -> Next() ;
peekedKind = peeked->GetKind();
if (peekedKind != TokenKind::CloseParenthesis && peekedKind != TokenKind::CommaToken){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, peeked->GetStartPosition(), peeked->GetLength());
return new BadExpression(peeked->GetStartPosition(), peeked->GetLength());
}
2019-06-01 12:56:28 +00:00
}
}
auto start = functionExpression->GetStartPosition();
return new FunctionCallExpression(functionExpression, parameters, start, peeked->GetEndPosition() - start);
}
ParsedExpression* Parser::ParseIndexExpression(ParsedExpression* indexingExpression){
this->Next(); // consume '[' token
auto indexExpression = this -> ParseExpression(this -> Next());
auto closeBracket = this->Next();
if (closeBracket->GetKind() != TokenKind::CloseSquareBracket){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, closeBracket->GetStartPosition(), closeBracket->GetLength());
return new BadExpression(closeBracket->GetStartPosition(), closeBracket->GetLength());
}
auto start = indexingExpression->GetStartPosition();
return new IndexExpression(indexingExpression, indexExpression, start, closeBracket->GetEndPosition() - start);
}
2019-05-20 15:45:03 +00:00
2019-06-09 18:15:09 +00:00
ParsedExpression* Parser::ParseTableExpression(IToken* current){
if (this -> Peek() -> GetKind() == TokenKind::CloseCurlyBracket){
this -> Next();
auto start = current->GetStartPosition();
return new ParsedNumericalTableExpression({}, start, this -> Peek()->GetEndPosition() - start);
}
2019-06-12 13:19:28 +00:00
auto start = current->GetStartPosition();
2019-06-09 18:15:09 +00:00
auto firstItem = this->ParseStatement(this -> Next());
// If the first item is an expression, and is followed by a comma, we're dealing with a simple {1, 2, 3} kind of array
if (firstItem->GetKind() == ParsedStatementKind::Expression &&
(this->Peek()->GetKind() == TokenKind::CommaToken )){
auto expr = ((ParsedExpressionStatement*)firstItem)->GetExpression();
auto expressions = vector<const ParsedExpression*>{expr};
2019-06-09 18:15:09 +00:00
auto n = this -> Next(); // consume the comma
bool hasErrors = false;
while (n->GetKind() != TokenKind::CloseCurlyBracket){
auto expression = this->ParseExpression(this->Next());
expressions.push_back(expression);
n = this->Next();
if (n->GetKind() != TokenKind::CommaToken && n->GetKind() != TokenKind ::CloseCurlyBracket && !hasErrors){
this->ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedToken, n->GetStartPosition(), n->GetLength());
hasErrors = true;
}
}
if (hasErrors){
return new BadExpression(start, n->GetEndPosition() - start);
}
return new ParsedNumericalTableExpression(expressions, start, n->GetEndPosition() - start);
}
// Otherwise we have a more complex table, which can be defined by a block
else {
auto block = (ParsedBlockStatement*)this -> ParseBlock({TokenKind ::CloseCurlyBracket}, {firstItem});
2019-06-12 13:19:28 +00:00
auto closeToken = this -> PeekAt(-1);
return new ParsedTableExpression(block, start, closeToken->GetEndPosition() - start);
2019-06-09 18:15:09 +00:00
}
}