using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; namespace Upsilon.Parser { public class Parser { private readonly ImmutableArray _tokens; private readonly Diagnostics _diagnostics; private int _position; private Parser(ImmutableArray tokens, Diagnostics diagnostics) { _tokens = tokens; _diagnostics = diagnostics; } public static BlockStatementSyntax Parse(string text, Diagnostics diagnostics, bool saveComments) { var tokens = Lexer.Lex(text, diagnostics, saveComments); return (BlockStatementSyntax) new Parser(tokens, diagnostics).ParseScriptSyntax(); } private SyntaxToken Current => Get(0); private SyntaxToken Next => Get(1); private SyntaxToken Get(int offset) { if (_position + offset >= _tokens.Length) return new SyntaxToken(SyntaxKind.EndOfFile, _tokens.Last().Span, null); else { return _tokens[_position + offset]; } } private SyntaxToken NextToken() { var current = Current; _position++; return current; } private SyntaxToken MatchToken(SyntaxKind kind) { if (Current.Kind == kind) return NextToken(); _diagnostics.LogBadCharacter(Current.Span, kind, Current.Kind); return new SyntaxToken(kind, Current.Span, null); } private StatementSyntax ParseScriptSyntax() { var statement = ParseBlockStatement(new []{SyntaxKind.EndOfFile}); MatchToken(SyntaxKind.EndOfFile); return statement; } private StatementSyntax ParseStatement() { if (Current.Kind == SyntaxKind.Identifier && Next.Kind == SyntaxKind.Equals) { return ParseAssignmentExpression(); } if (Current.Kind == SyntaxKind.LocalKeyword && Next.Kind == SyntaxKind.Identifier) { return ParseAssignmentExpression(); } if (Current.Kind == SyntaxKind.IfKeyword) { return ParseIfStatement(SyntaxKind.IfKeyword); } if (Current.Kind == SyntaxKind.ReturnKeyword) { return ParseReturnStatement(); } if (Current.Kind == SyntaxKind.FunctionKeyword && Next.Kind != SyntaxKind.OpenParenthesis) { return ParseFunctionAssignmentStatement(SyntaxKind.FunctionKeyword); } if (Current.Kind == SyntaxKind.LocalKeyword && Next.Kind == SyntaxKind.FunctionKeyword) { return ParseFunctionAssignmentStatement(SyntaxKind.FunctionKeyword); } if (Current.Kind == SyntaxKind.CoroutineKeyword && Next.Kind != SyntaxKind.OpenParenthesis) { return ParseFunctionAssignmentStatement(SyntaxKind.CoroutineKeyword); } if (Current.Kind == SyntaxKind.LocalKeyword && Next.Kind == SyntaxKind.CoroutineKeyword) { return ParseFunctionAssignmentStatement(SyntaxKind.CoroutineKeyword); } if (Current.Kind == SyntaxKind.ForKeyword) { return ParseForStatement(); } if (Current.Kind == SyntaxKind.WhileKeyword) { return ParseWhileStatement(); } if (Current.Kind == SyntaxKind.BreakKeyword) { return new BreakStatementSyntax(NextToken()); } if (Current.Kind == SyntaxKind.YieldKeyword) { return ParseYieldStatement(); } return ParseExpressionStatement(); } private StatementSyntax ParseBlockStatement(SyntaxKind[] endTokens) { var statements = ImmutableArray.CreateBuilder(); SyntaxToken current = null; while (!endTokens.Contains(Current.Kind) && Current.Kind != SyntaxKind.EndOfFile) { if (current == Current) { break; } current = Current; var next = ParseStatement(); statements.Add(next); } return new BlockStatementSyntax(statements.ToImmutable()); } private StatementSyntax ParseIfStatement(SyntaxKind requiredToken) { var ifToken = MatchToken(requiredToken); var condition = ParseExpressionStatement(); var thenToken = MatchToken(SyntaxKind.ThenKeyword); var block = ParseBlockStatement(new []{SyntaxKind.EndKeyword, SyntaxKind.ElseIfKeyword, SyntaxKind.ElseKeyword}); switch (Current.Kind) { case SyntaxKind.ElseIfKeyword: var nextElseIf = new ElseIfStatementSyntax((IfStatementSyntax) ParseIfStatement(SyntaxKind.ElseIfKeyword)); return new IfStatementSyntax(ifToken, (ExpressionStatementSyntax) condition, thenToken, (BlockStatementSyntax) block, nextElseIf); case SyntaxKind.ElseKeyword: { var elseToken = MatchToken(SyntaxKind.ElseKeyword); var elseBlock = ParseBlockStatement(new[]{SyntaxKind.EndKeyword}); var endEndToken = MatchToken(SyntaxKind.EndKeyword); var elseStatement = new ElseStatementSyntax(elseToken, (BlockStatementSyntax) elseBlock, endEndToken); return new IfStatementSyntax(ifToken, (ExpressionStatementSyntax) condition, thenToken, (BlockStatementSyntax) block, elseStatement); } case SyntaxKind.EndKeyword: var endToken = MatchToken(SyntaxKind.EndKeyword); return new IfStatementSyntax(ifToken, (ExpressionStatementSyntax) condition, thenToken, (BlockStatementSyntax) block, endToken); default: throw new ArgumentOutOfRangeException(); } } private StatementSyntax ParseForStatement() { var forToken = MatchToken(SyntaxKind.ForKeyword); if (Next.Kind == SyntaxKind.Equals) { return ParseNumericForStatement(forToken); } return ParseGenericForStatement(forToken); } private StatementSyntax ParseNumericForStatement(SyntaxToken forToken) { var identifier = (IdentifierToken)MatchToken(SyntaxKind.Identifier); var equals = MatchToken(SyntaxKind.Equals); var v1 = ParseExpression(); var comma1 = MatchToken(SyntaxKind.Comma); var v2 = ParseExpression(); SyntaxToken comma2 = null; ExpressionSyntax v3 = null; if (Current.Kind == SyntaxKind.Comma) { comma2 = MatchToken(SyntaxKind.Comma); v3 = ParseExpression(); } var doToken = MatchToken(SyntaxKind.DoKeyword); var block = ParseBlockStatement(new []{SyntaxKind.EndKeyword}); var endToken = MatchToken(SyntaxKind.EndKeyword); return new NumericForStatementSyntax(forToken, identifier, equals, v1, comma1, v2, comma2, v3, doToken, block, endToken); } private StatementSyntax ParseGenericForStatement(SyntaxToken forToken) { var arr = ImmutableArray.CreateBuilder(); while (true) { var identifier = MatchToken(SyntaxKind.Identifier); arr.Add((IdentifierToken) identifier); if (Current.Kind == SyntaxKind.InKeyword || Current.Kind == SyntaxKind.EndOfFile) break; MatchToken(SyntaxKind.Comma); } var inKeyword = MatchToken(SyntaxKind.InKeyword); var enumerableExpression = ParseExpression(); var doKeyword = MatchToken(SyntaxKind.DoKeyword); var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword}); var endKeyword = MatchToken(SyntaxKind.EndKeyword); return new GenericForStatementSyntax(forToken, arr.ToImmutable(), inKeyword, enumerableExpression, doKeyword, (BlockStatementSyntax) block, endKeyword); } private StatementSyntax ParseWhileStatement() { var whileToken = MatchToken(SyntaxKind.WhileKeyword); var expression = ParseExpression(); var doToken = MatchToken(SyntaxKind.DoKeyword); var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword}); var endKeyword = MatchToken(SyntaxKind.EndKeyword); return new WhileStatementSyntax(whileToken, expression, doToken, block, endKeyword); } private ExpressionSyntax ParseFunctionExpression(SyntaxKind openKeyword) { var functionToken = MatchToken(openKeyword); var openParenthesis = MatchToken(SyntaxKind.OpenParenthesis); var variableBuilder = ImmutableArray.CreateBuilder(); SyntaxToken current = null; while (Current.Kind != SyntaxKind.CloseParenthesis) { if (Current == current) { break; } current = Current; var firstToken = MatchToken(SyntaxKind.Identifier); if (Current.Kind == SyntaxKind.Identifier) { variableBuilder.Add(new ParameterToken((IdentifierToken) firstToken, (IdentifierToken) NextToken())); } else { variableBuilder.Add(new ParameterToken(null, (IdentifierToken) firstToken)); } if (Current.Kind == SyntaxKind.Comma) NextToken(); } var closeParenthesis = MatchToken(SyntaxKind.CloseParenthesis); var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword}); var endToken = MatchToken(SyntaxKind.EndKeyword); var isCoroutine = openKeyword == SyntaxKind.CoroutineKeyword; return new FunctionExpressionSyntax(functionToken, openParenthesis, variableBuilder.ToImmutable(), closeParenthesis, (BlockStatementSyntax) block, endToken, isCoroutine); } private StatementSyntax ParseFunctionAssignmentStatement(SyntaxKind identifyingKeyword) { SyntaxToken localToken = null; string[] commentData = null; if (Current.Kind == SyntaxKind.LocalKeyword) { localToken = NextToken(); commentData = localToken.CommentData; } var functionToken = MatchToken(identifyingKeyword); if (commentData == null) { commentData = functionToken.CommentData; } var identifier = MatchToken(SyntaxKind.Identifier); var openParenthesis = MatchToken(SyntaxKind.OpenParenthesis); var variableBuilder = ImmutableArray.CreateBuilder(); while (Current.Kind != SyntaxKind.CloseParenthesis || Current.Kind == SyntaxKind.EndOfFile) { var firstToken = MatchToken(SyntaxKind.Identifier); if (Current.Kind == SyntaxKind.Identifier) { variableBuilder.Add(new ParameterToken((IdentifierToken) firstToken, (IdentifierToken) NextToken())); } else { variableBuilder.Add(new ParameterToken(null, (IdentifierToken) firstToken)); } if (Current.Kind == SyntaxKind.Comma) NextToken(); } var closeParenthesis = MatchToken(SyntaxKind.CloseParenthesis); var block = ParseBlockStatement(new[] {SyntaxKind.EndKeyword}); var endToken = MatchToken(SyntaxKind.EndKeyword); var isCoroutine = identifyingKeyword == SyntaxKind.CoroutineKeyword; var functionExpression = new FunctionExpressionSyntax(functionToken, openParenthesis, variableBuilder.ToImmutable(), closeParenthesis, (BlockStatementSyntax) block, endToken, isCoroutine); return new FunctionAssignmentStatementSyntax(localToken, (IdentifierToken) identifier, functionExpression) { CommentData = commentData }; } private StatementSyntax ParseExpressionStatement() { var expression = ParseExpression(); if (expression.Kind == SyntaxKind.IndexExpression && Current.Kind == SyntaxKind.Equals) { return ParseTableAssignmentExpression(expression); } if (expression.Kind == SyntaxKind.FullStopIndexExpression && Current.Kind == SyntaxKind.Equals) { return ParseTableAssignmentExpression(expression); } return new ExpressionStatementSyntax(expression); } private StatementSyntax ParseReturnStatement() { var returnToken = (ReturnSyntaxToken)MatchToken(SyntaxKind.ReturnKeyword); ExpressionSyntax expression = null; if (Current.Kind != SyntaxKind.EndKeyword && !returnToken.FollowedByLineBreak) expression = ParseExpression(); return new ReturnStatementSyntax(returnToken, expression); } private StatementSyntax ParseYieldStatement() { var yieldToken = MatchToken(SyntaxKind.YieldKeyword); var expression = ParseExpression(); return new YieldStatementSyntax(yieldToken, expression); } private ExpressionSyntax ParseExpression() { ExpressionSyntax expression; if (Current.Kind == SyntaxKind.FunctionKeyword && Next.Kind == SyntaxKind.OpenParenthesis) { expression = ParseFunctionExpression(SyntaxKind.FunctionKeyword); } else if (Current.Kind == SyntaxKind.CoroutineKeyword && Next.Kind == SyntaxKind.OpenParenthesis) { expression = ParseFunctionExpression(SyntaxKind.FunctionKeyword); } else { expression = ParseBinaryExpression(); } expression = HandleComplexExpression(expression); return expression; } private ExpressionSyntax HandleComplexExpression(ExpressionSyntax baseExpression) { SyntaxToken current = null; while (Current.Kind == SyntaxKind.OpenBracket || Current.Kind == SyntaxKind.OpenParenthesis || Current.Kind == SyntaxKind.FullStop) { if (Current == current) break; current = Current; if (Current.Kind == SyntaxKind.OpenBracket) baseExpression = ParseIndexExpression(baseExpression); else if (Current.Kind == SyntaxKind.OpenParenthesis) baseExpression = ParseFunctionCallExpression(baseExpression); else if (Current.Kind == SyntaxKind.FullStop) baseExpression = ParseFullStopIndexExpression(baseExpression); } return baseExpression; } private StatementSyntax ParseAssignmentExpression() { string[] commentData = null; SyntaxToken localKeyword = null; if (Current.Kind == SyntaxKind.LocalKeyword) { localKeyword = MatchToken(SyntaxKind.LocalKeyword); commentData = localKeyword.CommentData; } var identifier = ParseExpression(); if (commentData == null && identifier.Kind == SyntaxKind.VariableExpression) { var variable = (VariableExpressionSyntax)identifier; commentData = variable.Identifier.CommentData; } if (Current.Kind == SyntaxKind.Comma) { if (identifier.Kind != SyntaxKind.VariableExpression) { _diagnostics.LogError("Only identifiers can be used for a multi assignment statement.", identifier.Span); return new ExpressionStatementSyntax(new BadExpressionSyntax(identifier.Span)); } var cast = (VariableExpressionSyntax)identifier; var ls = new List(){cast.Identifier}; while (Current.Kind == SyntaxKind.Comma) { NextToken(); ls.Add((IdentifierToken) MatchToken(SyntaxKind.Identifier)); } var assignmentTokenMulti = MatchToken(SyntaxKind.Equals); var expressionMulti = ParseExpression(); return new MultiAssignmentStatementSyntax(localKeyword, ls, assignmentTokenMulti, expressionMulti); } var assignmentToken = MatchToken(SyntaxKind.Equals); var expression = ParseExpression(); return new AssignmentStatementSyntax(localKeyword, identifier, assignmentToken, expression) { CommentData = commentData }; } private StatementSyntax ParseTableAssignmentExpression(ExpressionSyntax tableExpression) { var assignmentToken = MatchToken(SyntaxKind.Equals); var expression = ParseExpression(); return new TableAssigmentStatementSyntax(tableExpression, assignmentToken, expression); } private ExpressionSyntax ParseBinaryExpression(SyntaxKindPrecedence.Precedence parentPrecedence = SyntaxKindPrecedence.Precedence.None) { ExpressionSyntax left; var unaryOperatorPrecedence = Current.Kind.UnaryOperatorPrecedence(); if (unaryOperatorPrecedence != SyntaxKindPrecedence.Precedence.None && unaryOperatorPrecedence >= parentPrecedence) { var operatorToken = NextToken(); var operand = ParseBinaryExpression(unaryOperatorPrecedence); left = new UnaryExpressionSyntax(operatorToken, operand); } else { left = ParsePrimaryExpression(); } left = HandleComplexExpression(left); while (true) { var precedence = Current.Kind.BinaryOperatorPrecedence(); if (precedence == SyntaxKindPrecedence.Precedence.None || precedence <= parentPrecedence) break; var op = NextToken(); var right = ParseBinaryExpression(precedence); right = HandleComplexExpression(right); left = new BinaryExpressionSyntax(left, op, right); } return left; } private ExpressionSyntax ParsePrimaryExpression() { ExpressionSyntax expression; switch (Current.Kind) { case SyntaxKind.OpenParenthesis: expression = ParseParenthesizedExpression(); break; case SyntaxKind.Number: expression = ParseNumber(); break; case SyntaxKind.TrueKeyword: case SyntaxKind.FalseKeyword: expression = ParseBoolean(); break; case SyntaxKind.String: expression = ParseString(); break; case SyntaxKind.Identifier: expression = ParseVariableExpression(); break; case SyntaxKind.OpenBrace: expression = ParseTable(); break; case SyntaxKind.NilKeyword: var nilToken = MatchToken(SyntaxKind.NilKeyword); expression = new LiteralExpressionSyntax(nilToken, null); break; default: _diagnostics.LogBadCharacter(Current.Span); expression = new BadExpressionSyntax(Current.Span); NextToken(); break; } return expression; } private ExpressionSyntax ParseVariableExpression() { var token = (IdentifierToken)MatchToken(SyntaxKind.Identifier); return new VariableExpressionSyntax(token); } private ExpressionSyntax ParseFunctionCallExpression(ExpressionSyntax expression) { var openParenthesis = MatchToken(SyntaxKind.OpenParenthesis); var parameters = ImmutableArray.CreateBuilder(); SyntaxToken current = null; while (Current.Kind != SyntaxKind.CloseParenthesis && Current.Kind != SyntaxKind.EndOfFile) { if (Current == current) { break; } current = Current; var exp = ParseExpression(); parameters.Add(exp); if (Current.Kind == SyntaxKind.Comma) NextToken(); } var closeParenthesis = MatchToken(SyntaxKind.CloseParenthesis); return new FunctionCallExpressionSyntax(expression, openParenthesis, parameters.ToImmutable(), closeParenthesis); } private ExpressionSyntax ParseIndexExpression(ExpressionSyntax expression) { var openBracket = MatchToken(SyntaxKind.OpenBracket); var index = ParseExpression(); var closeBracket = MatchToken(SyntaxKind.CloseBracket); return new IndexExpressionSyntax(expression, openBracket, index, closeBracket); } private ExpressionSyntax ParseFullStopIndexExpression(ExpressionSyntax expression) { var fullStop = MatchToken(SyntaxKind.FullStop); if (Current.Kind == SyntaxKind.Identifier) { var index = NextToken(); return new FullStopIndexExpressionSyntax(expression, fullStop, (IdentifierToken) index); } _diagnostics.LogBadCharacter(expression.Span, SyntaxKind.Identifier); // We'll still want to return a index expression, but we just express that the identifier is empty. // This is helpful for tools, etc return new FullStopIndexExpressionSyntax(expression, fullStop, new IdentifierToken(string.Empty, fullStop.Span)); } private ExpressionSyntax ParseParenthesizedExpression() { var l = MatchToken(SyntaxKind.OpenParenthesis); var e = ParseExpression(); var r = MatchToken(SyntaxKind.CloseParenthesis); return new ParenthesizedExpressionSyntax(l, e, r); } private ExpressionSyntax ParseNumber() { var numberToken = MatchToken(SyntaxKind.Number); return new LiteralExpressionSyntax(numberToken, numberToken.Value); } private ExpressionSyntax ParseBoolean() { var isTrue = Current.Kind == SyntaxKind.TrueKeyword; var token = MatchToken(isTrue ? SyntaxKind.TrueKeyword : SyntaxKind.FalseKeyword); return new LiteralExpressionSyntax(token, isTrue); } private ExpressionSyntax ParseString() { var stringToken = MatchToken(SyntaxKind.String); return new LiteralExpressionSyntax(stringToken, stringToken.Value); } private ExpressionSyntax ParseTable() { var openBrace = MatchToken(SyntaxKind.OpenBrace); var arrBuilder = ImmutableArray.CreateBuilder(); bool lastCommaFound = true; SyntaxToken current = null; while (Current.Kind != SyntaxKind.CloseBrace) { if (!lastCommaFound) break; if (Current == current) { break; } current = Current; var parsed = ParseStatement(); SyntaxNode node = parsed; arrBuilder.Add(node); lastCommaFound = Current.Kind == SyntaxKind.Comma; if (lastCommaFound) NextToken(); } var closeBrace = MatchToken(SyntaxKind.CloseBrace); return new TableExpressionSyntax(openBrace, arrBuilder.ToImmutable(), closeBrace); } } }