From c4c3f650746cda9d694333f702b8611f3bc5abc8 Mon Sep 17 00:00:00 2001 From: Deukhoofd Date: Tue, 30 Apr 2019 15:28:43 +0200 Subject: [PATCH] Faster number lexing, better handling of exceptions in C# code --- .../ScriptMethodInfoFunction.cs | 3 +- Upsilon/Exceptions/EvaluationException.cs | 15 ++- Upsilon/Parser/IdentifierToken.cs | 2 +- Upsilon/Parser/Lexer.cs | 114 ++++++++++++------ Upsilon/Parser/Parser.cs | 4 +- Upsilon/Parser/SyntaxToken.cs | 4 +- .../BasicFunctionsTests.cs | 7 +- 7 files changed, 96 insertions(+), 53 deletions(-) diff --git a/Upsilon/BaseTypes/ScriptFunction/ScriptMethodInfoFunction.cs b/Upsilon/BaseTypes/ScriptFunction/ScriptMethodInfoFunction.cs index 795b73d..89bb701 100644 --- a/Upsilon/BaseTypes/ScriptFunction/ScriptMethodInfoFunction.cs +++ b/Upsilon/BaseTypes/ScriptFunction/ScriptMethodInfoFunction.cs @@ -159,8 +159,7 @@ namespace Upsilon.BaseTypes.ScriptFunction { Exception exception = e; if (e.InnerException != null) exception = e.InnerException; - throw new EvaluationException(state.Script.FileName, - "An Exception occured while executing a C# function:\n" + exception, span, state.Stacktrace); + throw new EvaluationException(state.Script.FileName, exception, span, state.Stacktrace); } return result; } diff --git a/Upsilon/Exceptions/EvaluationException.cs b/Upsilon/Exceptions/EvaluationException.cs index e20ec2e..8d4f307 100644 --- a/Upsilon/Exceptions/EvaluationException.cs +++ b/Upsilon/Exceptions/EvaluationException.cs @@ -14,10 +14,19 @@ namespace Upsilon.Exceptions public EvaluationException(string fileName, string message, TextSpan span, Stacktrace stacktrace = null) { - _stacktrace = stacktrace; - FileName = fileName; + _stacktrace = stacktrace; + FileName = fileName; ErrorMessage = message; - Span = span; + Span = span; + } + + public EvaluationException(string fileName, Exception inner, TextSpan span, Stacktrace stacktrace = null) + : base("An exception occured in called C# code. See the inner exception for more details.", inner) + { + _stacktrace = stacktrace; + FileName = fileName; + ErrorMessage = Message; + Span = span; } public override string ToString() diff --git a/Upsilon/Parser/IdentifierToken.cs b/Upsilon/Parser/IdentifierToken.cs index 85ccdb7..668aced 100644 --- a/Upsilon/Parser/IdentifierToken.cs +++ b/Upsilon/Parser/IdentifierToken.cs @@ -6,7 +6,7 @@ namespace Upsilon.Parser public class IdentifierToken : SyntaxToken { public IdentifierToken(string name, TextSpan position) - : base(SyntaxKind.Identifier, position, name, null) + : base(SyntaxKind.Identifier, position, null) { Name = name; } diff --git a/Upsilon/Parser/Lexer.cs b/Upsilon/Parser/Lexer.cs index d1bbe60..dde0f26 100644 --- a/Upsilon/Parser/Lexer.cs +++ b/Upsilon/Parser/Lexer.cs @@ -88,53 +88,53 @@ namespace Upsilon.Parser switch (Current) { case '\0': - return new SyntaxToken(SyntaxKind.EndOfFile, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "\0", null); + return new SyntaxToken(SyntaxKind.EndOfFile, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case ' ': case '\t': case '\r': - return new SyntaxToken(SyntaxKind.WhiteSpace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), Current.ToString(), null); + return new SyntaxToken(SyntaxKind.WhiteSpace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '\n': { _linePosition++; var pos = _position; _position = -1; - return new SyntaxToken(SyntaxKind.WhiteSpace, new TextSpan(_linePosition, pos, _linePosition, pos + 1), "\n", null); + return new SyntaxToken(SyntaxKind.WhiteSpace, new TextSpan(_linePosition, pos, _linePosition, pos + 1), null); } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexNumber(); case '+': - return new SyntaxToken(SyntaxKind.Plus, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "+", null); + return new SyntaxToken(SyntaxKind.Plus, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '-': if (Next == '-') { _position++; return LexComments(); } - return new SyntaxToken(SyntaxKind.Minus, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "-", null); + return new SyntaxToken(SyntaxKind.Minus, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '*': - return new SyntaxToken(SyntaxKind.Star, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "*", null); + return new SyntaxToken(SyntaxKind.Star, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '/': - return new SyntaxToken(SyntaxKind.Slash, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "/", null); + return new SyntaxToken(SyntaxKind.Slash, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '(': - return new SyntaxToken(SyntaxKind.OpenParenthesis, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "(", null); + return new SyntaxToken(SyntaxKind.OpenParenthesis, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case ')': - return new SyntaxToken(SyntaxKind.CloseParenthesis, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ")", null); + return new SyntaxToken(SyntaxKind.CloseParenthesis, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '{': - return new SyntaxToken(SyntaxKind.OpenBrace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "{", null); + return new SyntaxToken(SyntaxKind.OpenBrace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '}': - return new SyntaxToken(SyntaxKind.CloseBrace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "}", null); + return new SyntaxToken(SyntaxKind.CloseBrace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '[': - return new SyntaxToken(SyntaxKind.OpenBracket, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "[", null); + return new SyntaxToken(SyntaxKind.OpenBracket, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case ']': - return new SyntaxToken(SyntaxKind.CloseBracket, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "]", null); + return new SyntaxToken(SyntaxKind.CloseBracket, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '.': - return new SyntaxToken(SyntaxKind.FullStop, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ".", null); + return new SyntaxToken(SyntaxKind.FullStop, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case ',': - return new SyntaxToken(SyntaxKind.Comma, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ",", null); + return new SyntaxToken(SyntaxKind.Comma, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '#': - return new SyntaxToken(SyntaxKind.PoundSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "#", null); + return new SyntaxToken(SyntaxKind.PoundSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '%': - return new SyntaxToken(SyntaxKind.PercentSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "%", null); + return new SyntaxToken(SyntaxKind.PercentSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '^': - return new SyntaxToken(SyntaxKind.RoofSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "^", null); + return new SyntaxToken(SyntaxKind.RoofSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '"': case '\'': return LexString(Current); @@ -142,35 +142,35 @@ namespace Upsilon.Parser if (Next == '=') { _position++; - return new SyntaxToken(SyntaxKind.EqualsEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), "==", null); + return new SyntaxToken(SyntaxKind.EqualsEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), null); } - return new SyntaxToken(SyntaxKind.Equals, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "=", null); + return new SyntaxToken(SyntaxKind.Equals, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '~': if (Next == '=') { _position++; - return new SyntaxToken(SyntaxKind.TildeEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), "~=", null); + return new SyntaxToken(SyntaxKind.TildeEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), null); } - return new SyntaxToken(SyntaxKind.Tilde, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "~", null); + return new SyntaxToken(SyntaxKind.Tilde, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '<': if (Next == '=') { _position++; - return new SyntaxToken(SyntaxKind.LessEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), "<=", null); + return new SyntaxToken(SyntaxKind.LessEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), null); } - return new SyntaxToken(SyntaxKind.Less, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "<", null); + return new SyntaxToken(SyntaxKind.Less, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); case '>': if (Next == '=') { _position++; - return new SyntaxToken(SyntaxKind.GreaterEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), ">=", null); + return new SyntaxToken(SyntaxKind.GreaterEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), null); } - return new SyntaxToken(SyntaxKind.Greater, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ">", null); + return new SyntaxToken(SyntaxKind.Greater, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); default: if (char.IsLetter(Current) || Current == '_') return LexIdentifierOrKeyword(); _diagnostics.LogBadCharacter(new TextSpan(_linePosition, _position, _linePosition, _position + 1), SyntaxKind.Identifier); - return new SyntaxToken(SyntaxKind.BadToken, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "", null); + return new SyntaxToken(SyntaxKind.BadToken, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); } } @@ -178,30 +178,65 @@ namespace Upsilon.Parser { var start = _position; var hasDecimalPoint = false; - var numStr = new StringBuilder(); - numStr.Append(Current); - while (char.IsDigit(Next) || Next == '.' || Next == '_') + long num = ParseChar(Current); + double floatNum = 0f; + var decimalPlace = 0; + char c = Next; + while (char.IsDigit(c) || c == '.' || c == '_') { - if (Next == '.') + if (c == '.') { if (hasDecimalPoint) { _diagnostics.LogBadCharacter(new TextSpan(_linePosition, _position, _linePosition, _position + 1), SyntaxKind.Number); - return new SyntaxToken(SyntaxKind.BadToken, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "", null); + return new SyntaxToken(SyntaxKind.BadToken, new TextSpan(_linePosition, _position, _linePosition, _position + 1), null); } hasDecimalPoint = true; + floatNum = Convert.ToDouble(num); + decimalPlace++; + } + else if (c != '_') + { + var parsed = ParseChar(c); + if (hasDecimalPoint) + { + floatNum += parsed / Math.Pow(10, decimalPlace); + decimalPlace++; + } + else + { + num *= 10; + num += parsed; + } } - if (Next != '_') - numStr.Append(Next); _position++; + c = Next; } object o; if (hasDecimalPoint) - o = double.Parse(numStr.ToString()); + o = floatNum; else - o = long.Parse(numStr.ToString()); - return new SyntaxToken(SyntaxKind.Number, new TextSpan(_linePosition, start, _linePosition, _position + 1), numStr.ToString(), o); + o = num; + return new SyntaxToken(SyntaxKind.Number, new TextSpan(_linePosition, start, _linePosition, _position + 1), o); + } + + private static byte ParseChar(char c) + { + switch (c) + { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + default: throw new ArgumentException($"Expected digit, got {c}."); + } } private SyntaxToken LexString(char current) @@ -230,8 +265,7 @@ namespace Upsilon.Parser } var res = sb.ToString(); - return new SyntaxToken(SyntaxKind.String, new TextSpan(_linePosition, start, _linePosition, _position + 1), - $"\"{res}\"", res); + return new SyntaxToken(SyntaxKind.String, new TextSpan(_linePosition, start, _linePosition, _position + 1), res); } private SyntaxToken LexIdentifierOrKeyword() @@ -257,7 +291,7 @@ namespace Upsilon.Parser { return new ReturnSyntaxToken(new TextSpan(_linePosition, start, _linePosition, _position + 1), Next == Environment.NewLine[0]); } - return new SyntaxToken(kind, new TextSpan(_linePosition, start, _linePosition, _position + 1), str, null); + return new SyntaxToken(kind, new TextSpan(_linePosition, start, _linePosition, _position + 1), null); } private SyntaxToken LexComments() diff --git a/Upsilon/Parser/Parser.cs b/Upsilon/Parser/Parser.cs index 45fb5d5..0a39e88 100644 --- a/Upsilon/Parser/Parser.cs +++ b/Upsilon/Parser/Parser.cs @@ -29,7 +29,7 @@ namespace Upsilon.Parser private SyntaxToken Get(int offset) { if (_position + offset >= _tokens.Length) - return new SyntaxToken(SyntaxKind.EndOfFile, _tokens.Last().Span, "\0", null); + return new SyntaxToken(SyntaxKind.EndOfFile, _tokens.Last().Span, null); else { return _tokens[_position + offset]; @@ -49,7 +49,7 @@ namespace Upsilon.Parser return NextToken(); _diagnostics.LogBadCharacter(Current.Span, kind, Current.Kind); - return new SyntaxToken(kind, Current.Span, "", null); + return new SyntaxToken(kind, Current.Span, null); } private StatementSyntax ParseScriptSyntax() diff --git a/Upsilon/Parser/SyntaxToken.cs b/Upsilon/Parser/SyntaxToken.cs index 9b37483..be584ee 100644 --- a/Upsilon/Parser/SyntaxToken.cs +++ b/Upsilon/Parser/SyntaxToken.cs @@ -5,7 +5,7 @@ namespace Upsilon.Parser { public class SyntaxToken : SyntaxNode { - public SyntaxToken(SyntaxKind kind, TextSpan position, string text, object value) + public SyntaxToken(SyntaxKind kind, TextSpan position, object value) { Kind = kind; Span = position; @@ -34,7 +34,7 @@ namespace Upsilon.Parser public bool FollowedByLineBreak { get; } public ReturnSyntaxToken(TextSpan position, bool followedByLineBreak) - : base(SyntaxKind.ReturnKeyword, position, "return", null) + : base(SyntaxKind.ReturnKeyword, position, null) { FollowedByLineBreak = followedByLineBreak; } diff --git a/UpsilonTests/StandardLibraryTests/BasicFunctionsTests.cs b/UpsilonTests/StandardLibraryTests/BasicFunctionsTests.cs index 2fc8571..27d9247 100644 --- a/UpsilonTests/StandardLibraryTests/BasicFunctionsTests.cs +++ b/UpsilonTests/StandardLibraryTests/BasicFunctionsTests.cs @@ -1,5 +1,6 @@ using System; using Upsilon; +using Upsilon.Exceptions; using Xunit; namespace UpsilonTests.StandardLibraryTests @@ -14,14 +15,14 @@ namespace UpsilonTests.StandardLibraryTests public void AssertTest() { Executor.EvaluateScript("assert(true)", Options); - Assert.Throws(() => Executor.EvaluateScript("assert(false)", Options)); + Assert.Throws(() => Executor.EvaluateScript("assert(false)", Options)); } [Fact] public void Error() { - var e = Assert.Throws(() => Executor.EvaluateScript(@"error(""test_error"")", Options)); - Assert.Equal("test_error", e.Message); + var e = Assert.Throws(() => Executor.EvaluateScript(@"error(""test_error"")", Options)); + Assert.Equal("test_error", e.InnerException.Message); } [Fact]