Upsilon/Upsilon/Parser/Parser.cs

604 lines
25 KiB
C#

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
namespace Upsilon.Parser
{
public class Parser
{
private readonly ImmutableArray<SyntaxToken> _tokens;
private readonly Diagnostics _diagnostics;
private int _position;
private Parser(ImmutableArray<SyntaxToken> tokens, Diagnostics diagnostics)
{
_tokens = tokens;
_diagnostics = diagnostics;
}
public static BlockStatementSyntax Parse(string text, Diagnostics diagnostics, bool saveComments)
{
var tokens = Lexer.Lex(text, diagnostics, saveComments);
return (BlockStatementSyntax) new Parser(tokens, diagnostics).ParseScriptSyntax();
}
private SyntaxToken Current => Get(0);
private SyntaxToken Next => Get(1);
private SyntaxToken Get(int offset)
{
if (_position + offset >= _tokens.Length)
return new SyntaxToken(SyntaxKind.EndOfFile, _tokens.Last().Span, null);
else
{
return _tokens[_position + offset];
}
}
private SyntaxToken NextToken()
{
var current = Current;
_position++;
return current;
}
private SyntaxToken MatchToken(SyntaxKind kind)
{
if (Current.Kind == kind)
return NextToken();
_diagnostics.LogBadCharacter(Current.Span, kind, Current.Kind);
return new SyntaxToken(kind, Current.Span, null);
}
private StatementSyntax ParseScriptSyntax()
{
var statement = ParseBlockStatement(new []{SyntaxKind.EndOfFile});
MatchToken(SyntaxKind.EndOfFile);
return statement;
}
private StatementSyntax ParseStatement()
{
if (Current.Kind == SyntaxKind.Identifier && Next.Kind == SyntaxKind.Equals)
{
return ParseAssignmentExpression();
}
if (Current.Kind == SyntaxKind.LocalKeyword && Next.Kind == SyntaxKind.Identifier)
{
return ParseAssignmentExpression();
}
if (Current.Kind == SyntaxKind.IfKeyword)
{
return ParseIfStatement(SyntaxKind.IfKeyword);
}
if (Current.Kind == SyntaxKind.ReturnKeyword)
{
return ParseReturnStatement();
}
if (Current.Kind == SyntaxKind.FunctionKeyword && Next.Kind != SyntaxKind.OpenParenthesis)
{
return ParseFunctionAssignmentStatement(SyntaxKind.FunctionKeyword);
}
if (Current.Kind == SyntaxKind.LocalKeyword && Next.Kind == SyntaxKind.FunctionKeyword)
{
return ParseFunctionAssignmentStatement(SyntaxKind.FunctionKeyword);
}
if (Current.Kind == SyntaxKind.CoroutineKeyword && Next.Kind != SyntaxKind.OpenParenthesis)
{
return ParseFunctionAssignmentStatement(SyntaxKind.CoroutineKeyword);
}
if (Current.Kind == SyntaxKind.LocalKeyword && Next.Kind == SyntaxKind.CoroutineKeyword)
{
return ParseFunctionAssignmentStatement(SyntaxKind.CoroutineKeyword);
}
if (Current.Kind == SyntaxKind.ForKeyword)
{
return ParseForStatement();
}
if (Current.Kind == SyntaxKind.WhileKeyword)
{
return ParseWhileStatement();
}
if (Current.Kind == SyntaxKind.BreakKeyword)
{
return new BreakStatementSyntax(NextToken());
}
if (Current.Kind == SyntaxKind.YieldKeyword)
{
return ParseYieldStatement();
}
return ParseExpressionStatement();
}
private StatementSyntax ParseBlockStatement(SyntaxKind[] endTokens)
{
var statements = ImmutableArray.CreateBuilder<StatementSyntax>();
SyntaxToken current = null;
while (!endTokens.Contains(Current.Kind) && Current.Kind != SyntaxKind.EndOfFile)
{
if (current == Current)
{
break;
}
current = Current;
var next = ParseStatement();
statements.Add(next);
}
return new BlockStatementSyntax(statements.ToImmutable());
}
private StatementSyntax ParseIfStatement(SyntaxKind requiredToken)
{
var ifToken = MatchToken(requiredToken);
var condition = ParseExpressionStatement();
var thenToken = MatchToken(SyntaxKind.ThenKeyword);
var block = ParseBlockStatement(new []{SyntaxKind.EndKeyword, SyntaxKind.ElseIfKeyword, SyntaxKind.ElseKeyword});
switch (Current.Kind)
{
case SyntaxKind.ElseIfKeyword:
var nextElseIf =
new ElseIfStatementSyntax((IfStatementSyntax) ParseIfStatement(SyntaxKind.ElseIfKeyword));
return new IfStatementSyntax(ifToken, (ExpressionStatementSyntax) condition, thenToken, (BlockStatementSyntax) block,
nextElseIf);
case SyntaxKind.ElseKeyword:
{
var elseToken = MatchToken(SyntaxKind.ElseKeyword);
var elseBlock = ParseBlockStatement(new[]{SyntaxKind.EndKeyword});
var endEndToken = MatchToken(SyntaxKind.EndKeyword);
var elseStatement = new ElseStatementSyntax(elseToken, (BlockStatementSyntax) elseBlock, endEndToken);
return new IfStatementSyntax(ifToken, (ExpressionStatementSyntax) condition, thenToken, (BlockStatementSyntax) block, elseStatement);
}
case SyntaxKind.EndKeyword:
var endToken = MatchToken(SyntaxKind.EndKeyword);
return new IfStatementSyntax(ifToken, (ExpressionStatementSyntax) condition, thenToken, (BlockStatementSyntax) block, endToken);
default:
throw new ArgumentOutOfRangeException();
}
}
private StatementSyntax ParseForStatement()
{
var forToken = MatchToken(SyntaxKind.ForKeyword);
if (Next.Kind == SyntaxKind.Equals)
{
return ParseNumericForStatement(forToken);
}
return ParseGenericForStatement(forToken);
}
private StatementSyntax ParseNumericForStatement(SyntaxToken forToken)
{
var identifier = (IdentifierToken)MatchToken(SyntaxKind.Identifier);
var equals = MatchToken(SyntaxKind.Equals);
var v1 = ParseExpression();
var comma1 = MatchToken(SyntaxKind.Comma);
var v2 = ParseExpression();
SyntaxToken comma2 = null;
ExpressionSyntax v3 = null;
if (Current.Kind == SyntaxKind.Comma)
{
comma2 = MatchToken(SyntaxKind.Comma);
v3 = ParseExpression();
}
var doToken = MatchToken(SyntaxKind.DoKeyword);
var block = ParseBlockStatement(new []{SyntaxKind.EndKeyword});
var endToken = MatchToken(SyntaxKind.EndKeyword);
return new NumericForStatementSyntax(forToken, identifier, equals, v1, comma1,
v2, comma2, v3, doToken, block, endToken);
}
private StatementSyntax ParseGenericForStatement(SyntaxToken forToken)
{
var arr = ImmutableArray.CreateBuilder<IdentifierToken>();
while (true)
{
var identifier = MatchToken(SyntaxKind.Identifier);
arr.Add((IdentifierToken) identifier);
if (Current.Kind == SyntaxKind.InKeyword || Current.Kind == SyntaxKind.EndOfFile)
break;
MatchToken(SyntaxKind.Comma);
}
var inKeyword = MatchToken(SyntaxKind.InKeyword);
var enumerableExpression = ParseExpression();
var doKeyword = MatchToken(SyntaxKind.DoKeyword);
var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword});
var endKeyword = MatchToken(SyntaxKind.EndKeyword);
return new GenericForStatementSyntax(forToken, arr.ToImmutable(), inKeyword, enumerableExpression,
doKeyword, (BlockStatementSyntax) block, endKeyword);
}
private StatementSyntax ParseWhileStatement()
{
var whileToken = MatchToken(SyntaxKind.WhileKeyword);
var expression = ParseExpression();
var doToken = MatchToken(SyntaxKind.DoKeyword);
var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword});
var endKeyword = MatchToken(SyntaxKind.EndKeyword);
return new WhileStatementSyntax(whileToken, expression, doToken, block, endKeyword);
}
private ExpressionSyntax ParseFunctionExpression(SyntaxKind openKeyword)
{
var functionToken = MatchToken(openKeyword);
var openParenthesis = MatchToken(SyntaxKind.OpenParenthesis);
var variableBuilder = ImmutableArray.CreateBuilder<ParameterToken>();
SyntaxToken current = null;
while (Current.Kind != SyntaxKind.CloseParenthesis)
{
if (Current == current)
{
break;
}
current = Current;
var firstToken = MatchToken(SyntaxKind.Identifier);
if (Current.Kind == SyntaxKind.Identifier)
{
variableBuilder.Add(new ParameterToken((IdentifierToken) firstToken,
(IdentifierToken) NextToken()));
}
else
{
variableBuilder.Add(new ParameterToken(null, (IdentifierToken) firstToken));
}
if (Current.Kind == SyntaxKind.Comma)
NextToken();
}
var closeParenthesis = MatchToken(SyntaxKind.CloseParenthesis);
var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword});
var endToken = MatchToken(SyntaxKind.EndKeyword);
var isCoroutine = openKeyword == SyntaxKind.CoroutineKeyword;
return new FunctionExpressionSyntax(functionToken, openParenthesis,
variableBuilder.ToImmutable(), closeParenthesis, (BlockStatementSyntax) block, endToken, isCoroutine);
}
private StatementSyntax ParseFunctionAssignmentStatement(SyntaxKind identifyingKeyword)
{
SyntaxToken localToken = null;
string[] commentData = null;
if (Current.Kind == SyntaxKind.LocalKeyword)
{
localToken = NextToken();
commentData = localToken.CommentData;
}
var functionToken = MatchToken(identifyingKeyword);
if (commentData == null)
{
commentData = functionToken.CommentData;
}
var identifier = MatchToken(SyntaxKind.Identifier);
var openParenthesis = MatchToken(SyntaxKind.OpenParenthesis);
var variableBuilder = ImmutableArray.CreateBuilder<ParameterToken>();
while (Current.Kind != SyntaxKind.CloseParenthesis || Current.Kind == SyntaxKind.EndOfFile)
{
var firstToken = MatchToken(SyntaxKind.Identifier);
if (Current.Kind == SyntaxKind.Identifier)
{
variableBuilder.Add(new ParameterToken((IdentifierToken) firstToken,
(IdentifierToken) NextToken()));
}
else
{
variableBuilder.Add(new ParameterToken(null, (IdentifierToken) firstToken));
}
if (Current.Kind == SyntaxKind.Comma)
NextToken();
}
var closeParenthesis = MatchToken(SyntaxKind.CloseParenthesis);
var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword});
var endToken = MatchToken(SyntaxKind.EndKeyword);
var isCoroutine = identifyingKeyword == SyntaxKind.CoroutineKeyword;
var functionExpression = new FunctionExpressionSyntax(functionToken, openParenthesis,
variableBuilder.ToImmutable(), closeParenthesis, (BlockStatementSyntax) block, endToken, isCoroutine);
return new FunctionAssignmentStatementSyntax(localToken, (IdentifierToken) identifier, functionExpression)
{
CommentData = commentData
};
}
private StatementSyntax ParseExpressionStatement()
{
var expression = ParseExpression();
if (expression.Kind == SyntaxKind.IndexExpression && Current.Kind == SyntaxKind.Equals)
{
return ParseTableAssignmentExpression(expression);
}
if (expression.Kind == SyntaxKind.FullStopIndexExpression && Current.Kind == SyntaxKind.Equals)
{
return ParseTableAssignmentExpression(expression);
}
return new ExpressionStatementSyntax(expression);
}
private StatementSyntax ParseReturnStatement()
{
var returnToken = (ReturnSyntaxToken)MatchToken(SyntaxKind.ReturnKeyword);
ExpressionSyntax expression = null;
if (Current.Kind != SyntaxKind.EndKeyword && !returnToken.FollowedByLineBreak)
expression = ParseExpression();
return new ReturnStatementSyntax(returnToken, expression);
}
private StatementSyntax ParseYieldStatement()
{
var yieldToken = MatchToken(SyntaxKind.YieldKeyword);
var expression = ParseExpression();
return new YieldStatementSyntax(yieldToken, expression);
}
private ExpressionSyntax ParseExpression()
{
ExpressionSyntax expression;
if (Current.Kind == SyntaxKind.FunctionKeyword && Next.Kind == SyntaxKind.OpenParenthesis)
{
expression = ParseFunctionExpression(SyntaxKind.FunctionKeyword);
}
else if (Current.Kind == SyntaxKind.CoroutineKeyword && Next.Kind == SyntaxKind.OpenParenthesis)
{
expression = ParseFunctionExpression(SyntaxKind.FunctionKeyword);
}
else
{
expression = ParseBinaryExpression();
}
expression = HandleComplexExpression(expression);
return expression;
}
private ExpressionSyntax HandleComplexExpression(ExpressionSyntax baseExpression)
{
SyntaxToken current = null;
while (Current.Kind == SyntaxKind.OpenBracket || Current.Kind == SyntaxKind.OpenParenthesis ||
Current.Kind == SyntaxKind.FullStop)
{
if (Current == current)
break;
current = Current;
if (Current.Kind == SyntaxKind.OpenBracket)
baseExpression = ParseIndexExpression(baseExpression);
else if (Current.Kind == SyntaxKind.OpenParenthesis)
baseExpression = ParseFunctionCallExpression(baseExpression);
else if (Current.Kind == SyntaxKind.FullStop)
baseExpression = ParseFullStopIndexExpression(baseExpression);
}
return baseExpression;
}
private StatementSyntax ParseAssignmentExpression()
{
string[] commentData = null;
SyntaxToken localKeyword = null;
if (Current.Kind == SyntaxKind.LocalKeyword)
{
localKeyword = MatchToken(SyntaxKind.LocalKeyword);
commentData = localKeyword.CommentData;
}
var identifier = ParseExpression();
if (commentData == null && identifier.Kind == SyntaxKind.VariableExpression)
{
var variable = (VariableExpressionSyntax)identifier;
commentData = variable.Identifier.CommentData;
}
if (Current.Kind == SyntaxKind.Comma)
{
if (identifier.Kind != SyntaxKind.VariableExpression)
{
_diagnostics.LogError("Only identifiers can be used for a multi assignment statement.", identifier.Span);
return new ExpressionStatementSyntax(new BadExpressionSyntax(identifier.Span));
}
var cast = (VariableExpressionSyntax)identifier;
var ls = new List<IdentifierToken>(){cast.Identifier};
while (Current.Kind == SyntaxKind.Comma)
{
NextToken();
ls.Add((IdentifierToken) MatchToken(SyntaxKind.Identifier));
}
var assignmentTokenMulti = MatchToken(SyntaxKind.Equals);
var expressionMulti = ParseExpression();
return new MultiAssignmentStatementSyntax(localKeyword, ls, assignmentTokenMulti, expressionMulti);
}
var assignmentToken = MatchToken(SyntaxKind.Equals);
var expression = ParseExpression();
return new AssignmentStatementSyntax(localKeyword, identifier, assignmentToken, expression)
{
CommentData = commentData
};
}
private StatementSyntax ParseTableAssignmentExpression(ExpressionSyntax tableExpression)
{
var assignmentToken = MatchToken(SyntaxKind.Equals);
var expression = ParseExpression();
return new TableAssigmentStatementSyntax(tableExpression, assignmentToken, expression);
}
private ExpressionSyntax ParseBinaryExpression(SyntaxKindPrecedence.Precedence parentPrecedence = SyntaxKindPrecedence.Precedence.None)
{
ExpressionSyntax left;
var unaryOperatorPrecedence = Current.Kind.UnaryOperatorPrecedence();
if (unaryOperatorPrecedence != SyntaxKindPrecedence.Precedence.None
&& unaryOperatorPrecedence >= parentPrecedence)
{
var operatorToken = NextToken();
var operand = ParseBinaryExpression(unaryOperatorPrecedence);
left = new UnaryExpressionSyntax(operatorToken, operand);
}
else
{
left = ParsePrimaryExpression();
}
left = HandleComplexExpression(left);
while (true)
{
var precedence = Current.Kind.BinaryOperatorPrecedence();
if (precedence == SyntaxKindPrecedence.Precedence.None || precedence <= parentPrecedence)
break;
var op = NextToken();
var right = ParseBinaryExpression(precedence);
right = HandleComplexExpression(right);
left = new BinaryExpressionSyntax(left, op, right);
}
return left;
}
private ExpressionSyntax ParsePrimaryExpression()
{
ExpressionSyntax expression;
switch (Current.Kind)
{
case SyntaxKind.OpenParenthesis:
expression = ParseParenthesizedExpression();
break;
case SyntaxKind.Number:
expression = ParseNumber();
break;
case SyntaxKind.TrueKeyword:
case SyntaxKind.FalseKeyword:
expression = ParseBoolean();
break;
case SyntaxKind.String:
expression = ParseString();
break;
case SyntaxKind.Identifier:
expression = ParseVariableExpression();
break;
case SyntaxKind.OpenBrace:
expression = ParseTable();
break;
case SyntaxKind.NilKeyword:
var nilToken = MatchToken(SyntaxKind.NilKeyword);
expression = new LiteralExpressionSyntax(nilToken, null);
break;
default:
_diagnostics.LogBadCharacter(Current.Span);
expression = new BadExpressionSyntax(Current.Span);
NextToken();
break;
}
return expression;
}
private ExpressionSyntax ParseVariableExpression()
{
var token = (IdentifierToken)MatchToken(SyntaxKind.Identifier);
return new VariableExpressionSyntax(token);
}
private ExpressionSyntax ParseFunctionCallExpression(ExpressionSyntax expression)
{
var openParenthesis = MatchToken(SyntaxKind.OpenParenthesis);
var parameters = ImmutableArray.CreateBuilder<ExpressionSyntax>();
SyntaxToken current = null;
while (Current.Kind != SyntaxKind.CloseParenthesis && Current.Kind != SyntaxKind.EndOfFile)
{
if (Current == current)
{
break;
}
current = Current;
var exp = ParseExpression();
parameters.Add(exp);
if (Current.Kind == SyntaxKind.Comma)
NextToken();
}
var closeParenthesis = MatchToken(SyntaxKind.CloseParenthesis);
return new FunctionCallExpressionSyntax(expression, openParenthesis,
parameters.ToImmutable(), closeParenthesis);
}
private ExpressionSyntax ParseIndexExpression(ExpressionSyntax expression)
{
var openBracket = MatchToken(SyntaxKind.OpenBracket);
var index = ParseExpression();
var closeBracket = MatchToken(SyntaxKind.CloseBracket);
return new IndexExpressionSyntax(expression, openBracket, index, closeBracket);
}
private ExpressionSyntax ParseFullStopIndexExpression(ExpressionSyntax expression)
{
var fullStop = MatchToken(SyntaxKind.FullStop);
if (Current.Kind == SyntaxKind.Identifier)
{
var index = NextToken();
return new FullStopIndexExpressionSyntax(expression, fullStop, (IdentifierToken) index);
}
_diagnostics.LogBadCharacter(expression.Span, SyntaxKind.Identifier);
// We'll still want to return a index expression, but we just express that the identifier is empty.
// This is helpful for tools, etc
return new FullStopIndexExpressionSyntax(expression, fullStop,
new IdentifierToken(string.Empty, fullStop.Span));
}
private ExpressionSyntax ParseParenthesizedExpression()
{
var l = MatchToken(SyntaxKind.OpenParenthesis);
var e = ParseExpression();
var r = MatchToken(SyntaxKind.CloseParenthesis);
return new ParenthesizedExpressionSyntax(l, e, r);
}
private ExpressionSyntax ParseNumber()
{
var numberToken = MatchToken(SyntaxKind.Number);
return new LiteralExpressionSyntax(numberToken, numberToken.Value);
}
private ExpressionSyntax ParseBoolean()
{
var isTrue = Current.Kind == SyntaxKind.TrueKeyword;
var token = MatchToken(isTrue ? SyntaxKind.TrueKeyword : SyntaxKind.FalseKeyword);
return new LiteralExpressionSyntax(token, isTrue);
}
private ExpressionSyntax ParseString()
{
var stringToken = MatchToken(SyntaxKind.String);
return new LiteralExpressionSyntax(stringToken, stringToken.Value);
}
private ExpressionSyntax ParseTable()
{
var openBrace = MatchToken(SyntaxKind.OpenBrace);
var arrBuilder = ImmutableArray.CreateBuilder<SyntaxNode>();
bool lastCommaFound = true;
SyntaxToken current = null;
while (Current.Kind != SyntaxKind.CloseBrace)
{
if (!lastCommaFound)
break;
if (Current == current)
{
break;
}
current = Current;
var parsed = ParseStatement();
SyntaxNode node = parsed;
arrBuilder.Add(node);
lastCommaFound = Current.Kind == SyntaxKind.Comma;
if (lastCommaFound) NextToken();
}
var closeBrace = MatchToken(SyntaxKind.CloseBrace);
return new TableExpressionSyntax(openBrace, arrBuilder.ToImmutable(), closeBrace);
}
}
}