285 lines
13 KiB
C#
285 lines
13 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Collections.Immutable;
|
|
using System.Text;
|
|
using Upsilon.Text;
|
|
|
|
namespace Upsilon.Parser
|
|
{
|
|
public class Lexer
|
|
{
|
|
private bool SaveComments { get; }
|
|
private readonly string[] _text;
|
|
private readonly Diagnostics _diagnostics;
|
|
private int _linePosition;
|
|
private int _position;
|
|
|
|
private Lexer(string text, Diagnostics diagnostics, bool saveComments)
|
|
{
|
|
SaveComments = saveComments;
|
|
_text = text.Split('\n');
|
|
_diagnostics = diagnostics;
|
|
}
|
|
|
|
public static ImmutableArray<SyntaxToken> Lex(string text, Diagnostics diagnostics, bool saveComments)
|
|
{
|
|
var lexer = new Lexer(text, diagnostics, saveComments);
|
|
return lexer.Lex();
|
|
}
|
|
|
|
private char Current
|
|
{
|
|
get
|
|
{
|
|
var linePos = _linePosition;
|
|
var pos = _position;
|
|
if (linePos >= _text.Length) return '\0';
|
|
if (pos >= _text[linePos].Length) return '\n';
|
|
return _text[linePos][pos];
|
|
}
|
|
}
|
|
|
|
private char Next
|
|
{
|
|
get
|
|
{
|
|
var linePos = _linePosition;
|
|
var pos = _position + 1;
|
|
if (linePos >= _text.Length) return '\0';
|
|
if (pos >= _text[linePos].Length) return '\n';
|
|
return _text[linePos][pos];
|
|
}
|
|
}
|
|
|
|
private readonly List<string> _activeComments = new List<string>();
|
|
private ImmutableArray<SyntaxToken> Lex()
|
|
{
|
|
var array = ImmutableArray.CreateBuilder<SyntaxToken>();
|
|
while (true)
|
|
{
|
|
var next = LexNext();
|
|
if (next.Kind != SyntaxKind.WhiteSpace)
|
|
{
|
|
switch (next.Kind)
|
|
{
|
|
case SyntaxKind.Comment:
|
|
if (SaveComments)
|
|
_activeComments.Add(next.Value.ToString());
|
|
continue;
|
|
case SyntaxKind.FunctionKeyword:
|
|
case SyntaxKind.LocalKeyword:
|
|
case SyntaxKind.Identifier:
|
|
next.CommentData = _activeComments.ToArray();
|
|
break;
|
|
}
|
|
|
|
_activeComments.Clear();
|
|
array.Add(next);
|
|
if (next.Kind == SyntaxKind.EndOfFile)
|
|
break;
|
|
}
|
|
_position++;
|
|
}
|
|
return array.ToImmutable();
|
|
}
|
|
|
|
private SyntaxToken LexNext()
|
|
{
|
|
switch (Current)
|
|
{
|
|
case '\0':
|
|
return new SyntaxToken(SyntaxKind.EndOfFile, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "\0", null);
|
|
case ' ': case '\t': case '\r':
|
|
return new SyntaxToken(SyntaxKind.WhiteSpace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), Current.ToString(), null);
|
|
case '\n':
|
|
{
|
|
_linePosition++;
|
|
var pos = _position;
|
|
_position = -1;
|
|
return new SyntaxToken(SyntaxKind.WhiteSpace, new TextSpan(_linePosition, pos, _linePosition, pos + 1), "\n", null);
|
|
}
|
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
return LexNumber();
|
|
case '+':
|
|
return new SyntaxToken(SyntaxKind.Plus, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "+", null);
|
|
case '-':
|
|
if (Next == '-')
|
|
{
|
|
_position++;
|
|
return LexComments();
|
|
}
|
|
return new SyntaxToken(SyntaxKind.Minus, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "-", null);
|
|
case '*':
|
|
return new SyntaxToken(SyntaxKind.Star, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "*", null);
|
|
case '/':
|
|
return new SyntaxToken(SyntaxKind.Slash, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "/", null);
|
|
case '(':
|
|
return new SyntaxToken(SyntaxKind.OpenParenthesis, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "(", null);
|
|
case ')':
|
|
return new SyntaxToken(SyntaxKind.CloseParenthesis, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ")", null);
|
|
case '{':
|
|
return new SyntaxToken(SyntaxKind.OpenBrace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "{", null);
|
|
case '}':
|
|
return new SyntaxToken(SyntaxKind.CloseBrace, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "}", null);
|
|
case '[':
|
|
return new SyntaxToken(SyntaxKind.OpenBracket, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "[", null);
|
|
case ']':
|
|
return new SyntaxToken(SyntaxKind.CloseBracket, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "]", null);
|
|
case '.':
|
|
return new SyntaxToken(SyntaxKind.FullStop, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ".", null);
|
|
case ',':
|
|
return new SyntaxToken(SyntaxKind.Comma, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ",", null);
|
|
case '#':
|
|
return new SyntaxToken(SyntaxKind.PoundSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "#", null);
|
|
case '%':
|
|
return new SyntaxToken(SyntaxKind.PercentSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "%", null);
|
|
case '^':
|
|
return new SyntaxToken(SyntaxKind.RoofSign, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "^", null);
|
|
case '"':
|
|
case '\'':
|
|
return LexString(Current);
|
|
case '=':
|
|
if (Next == '=')
|
|
{
|
|
_position++;
|
|
return new SyntaxToken(SyntaxKind.EqualsEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), "==", null);
|
|
}
|
|
return new SyntaxToken(SyntaxKind.Equals, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "=", null);
|
|
case '~':
|
|
if (Next == '=')
|
|
{
|
|
_position++;
|
|
return new SyntaxToken(SyntaxKind.TildeEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), "~=", null);
|
|
}
|
|
return new SyntaxToken(SyntaxKind.Tilde, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "~", null);
|
|
case '<':
|
|
if (Next == '=')
|
|
{
|
|
_position++;
|
|
return new SyntaxToken(SyntaxKind.LessEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), "<=", null);
|
|
}
|
|
return new SyntaxToken(SyntaxKind.Less, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "<", null);
|
|
case '>':
|
|
if (Next == '=')
|
|
{
|
|
_position++;
|
|
return new SyntaxToken(SyntaxKind.GreaterEquals, new TextSpan(_linePosition, _position - 1, _linePosition, _position + 1), ">=", null);
|
|
}
|
|
return new SyntaxToken(SyntaxKind.Greater, new TextSpan(_linePosition, _position, _linePosition, _position + 1), ">", null);
|
|
default:
|
|
if (char.IsLetter(Current) || Current == '_')
|
|
return LexIdentifierOrKeyword();
|
|
_diagnostics.LogBadCharacter(new TextSpan(_linePosition, _position, _linePosition, _position + 1), SyntaxKind.Identifier);
|
|
return new SyntaxToken(SyntaxKind.BadToken, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "", null);
|
|
}
|
|
}
|
|
|
|
private SyntaxToken LexNumber()
|
|
{
|
|
var start = _position;
|
|
var hasDecimalPoint = false;
|
|
var numStr = new StringBuilder();
|
|
numStr.Append(Current);
|
|
while (char.IsDigit(Next) || Next == '.' || Next == '_')
|
|
{
|
|
if (Next == '.')
|
|
{
|
|
if (hasDecimalPoint)
|
|
{
|
|
_diagnostics.LogBadCharacter(new TextSpan(_linePosition, _position, _linePosition, _position + 1), SyntaxKind.Number);
|
|
return new SyntaxToken(SyntaxKind.BadToken, new TextSpan(_linePosition, _position, _linePosition, _position + 1), "", null);
|
|
}
|
|
hasDecimalPoint = true;
|
|
}
|
|
if (Next != '_')
|
|
numStr.Append(Next);
|
|
_position++;
|
|
}
|
|
|
|
object o;
|
|
if (hasDecimalPoint)
|
|
o = double.Parse(numStr.ToString());
|
|
else
|
|
o = long.Parse(numStr.ToString());
|
|
return new SyntaxToken(SyntaxKind.Number, new TextSpan(_linePosition, start, _linePosition, _position + 1), numStr.ToString(), o);
|
|
}
|
|
|
|
private SyntaxToken LexString(char current)
|
|
{
|
|
var start = _position;
|
|
var sb = new StringBuilder();
|
|
while (true)
|
|
{
|
|
_position++;
|
|
if (Current == '\0')
|
|
break;
|
|
if (Current == '\\' && Next == current)
|
|
{
|
|
sb.Append(current);
|
|
_position += 2;
|
|
}
|
|
if (Current == current)
|
|
break;
|
|
sb.Append(Current);
|
|
}
|
|
|
|
if (Current != current)
|
|
{
|
|
_diagnostics.LogBadCharacter(new TextSpan(_linePosition, _position, _linePosition, _position + 1),
|
|
current, Current);
|
|
}
|
|
|
|
var res = sb.ToString();
|
|
return new SyntaxToken(SyntaxKind.String, new TextSpan(_linePosition, start, _linePosition, _position + 1),
|
|
$"\"{res}\"", res);
|
|
}
|
|
|
|
private SyntaxToken LexIdentifierOrKeyword()
|
|
{
|
|
var startLine = _linePosition;
|
|
var start = _position;
|
|
var stringBuilder = new StringBuilder();
|
|
stringBuilder.Append(Current);
|
|
while (char.IsLetterOrDigit(Next) || Next == '_')
|
|
{
|
|
stringBuilder.Append(Next);
|
|
_position++;
|
|
}
|
|
|
|
var kind = SyntaxKeyWords.GetSyntaxKind(stringBuilder.ToString());
|
|
var str = stringBuilder.ToString();
|
|
if (kind == SyntaxKind.Identifier)
|
|
{
|
|
return new IdentifierToken(str, new TextSpan(startLine, start, _linePosition, _position + 1));
|
|
}
|
|
|
|
if (kind == SyntaxKind.ReturnKeyword)
|
|
{
|
|
return new ReturnSyntaxToken(new TextSpan(_linePosition, start, _linePosition, _position + 1), Next == Environment.NewLine[0]);
|
|
}
|
|
return new SyntaxToken(kind, new TextSpan(_linePosition, start, _linePosition, _position + 1), str, null);
|
|
}
|
|
|
|
private SyntaxToken LexComments()
|
|
{
|
|
_position++;
|
|
var startLine = _linePosition;
|
|
var start = _position;
|
|
StringBuilder stringBuilder = null;
|
|
if (SaveComments)
|
|
{
|
|
stringBuilder = new StringBuilder();
|
|
if (Current != ' ')
|
|
stringBuilder.Append(Current);
|
|
}
|
|
while (Next != Environment.NewLine[0] && Next != '\0')
|
|
{
|
|
stringBuilder?.Append(Next);
|
|
_position++;
|
|
}
|
|
var str = stringBuilder?.ToString();
|
|
_position++;
|
|
return new SyntaxToken(SyntaxKind.Comment, new TextSpan(startLine, start, _linePosition, _position + 1), _position - start, str);
|
|
}
|
|
}
|
|
} |