2018-11-26 13:42:50 +01:00

203 lines
7.4 KiB
C#

using System.Collections.Immutable;
using System.Text;
using Upsilon.Text;
namespace Upsilon.Parser
{
public class Lexer
{
private readonly string _text;
private readonly Diagnostics _diagnostics;
private int _position;
private Lexer(string text, Diagnostics diagnostics)
{
_text = text;
_diagnostics = diagnostics;
}
public static ImmutableArray<SyntaxToken> Lex(string text, Diagnostics diagnostics)
{
var lexer = new Lexer(text, diagnostics);
return lexer.Lex();
}
private char Current => _position >= _text.Length ? '\0' : _text[_position];
private char Next => _position + 1 >= _text.Length ? '\0' : _text[_position + 1];
private ImmutableArray<SyntaxToken> Lex()
{
var array = ImmutableArray.CreateBuilder<SyntaxToken>();
while (true)
{
var next = LexNext();
if (next.Kind != SyntaxKind.WhiteSpace)
{
array.Add(next);
if (next.Kind == SyntaxKind.EndOfFile)
break;
}
_position++;
}
return array.ToImmutable();
}
private SyntaxToken LexNext()
{
switch (Current)
{
case '\0':
return new SyntaxToken(SyntaxKind.EndOfFile, _position, "\0", null);
case ' ': case '\t': case '\r': case '\n':
return new SyntaxToken(SyntaxKind.WhiteSpace, _position, Current.ToString(), null);
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
return LexNumber();
case '+':
return new SyntaxToken(SyntaxKind.Plus, _position, "+", null);
case '-':
if (Next == '-')
{
_position++;
return LexComments();
}
return new SyntaxToken(SyntaxKind.Minus, _position, "-", null);
case '*':
return new SyntaxToken(SyntaxKind.Star, _position, "*", null);
case '/':
return new SyntaxToken(SyntaxKind.Slash, _position, "/", null);
case '(':
return new SyntaxToken(SyntaxKind.OpenParenthesis, _position, "(", null);
case ')':
return new SyntaxToken(SyntaxKind.CloseParenthesis, _position, ")", null);
case '{':
return new SyntaxToken(SyntaxKind.OpenBrace, _position, "{", null);
case '}':
return new SyntaxToken(SyntaxKind.CloseBrace, _position, "}", null);
case '[':
return new SyntaxToken(SyntaxKind.OpenBracket, _position, "[", null);
case ']':
return new SyntaxToken(SyntaxKind.CloseBracket, _position, "]", null);
case '.':
return new SyntaxToken(SyntaxKind.FullStop, _position, ".", null);
case ',':
return new SyntaxToken(SyntaxKind.Comma, _position, ",", null);
case '#':
return new SyntaxToken(SyntaxKind.PoundSign, _position, "#", null);
case '"':
return LexString();
case '=':
if (Next == '=')
{
_position++;
return new SyntaxToken(SyntaxKind.EqualsEquals, _position - 1, "==", null);
}
return new SyntaxToken(SyntaxKind.Equals, _position, "=", null);
case '~':
if (Next == '=')
{
_position++;
return new SyntaxToken(SyntaxKind.TildeEquals, _position - 1, "~=", null);
}
return new SyntaxToken(SyntaxKind.Tilde, _position, "~", null);
default:
if (char.IsLetter(Current) || Current == '_')
return LexIdentifierOrKeyword();
_diagnostics.LogBadCharacter(new TextSpan(_position, 1), SyntaxKind.Identifier);
return new SyntaxToken(SyntaxKind.BadToken, _position, "", null);
}
}
private SyntaxToken LexNumber()
{
var start = _position;
var hasDecimalPoint = false;
var numStr = new StringBuilder();
numStr.Append(Current);
while (char.IsDigit(Next) || Next == '.' || Next == '_')
{
if (Next == '.')
{
if (hasDecimalPoint)
{
_diagnostics.LogBadCharacter(new TextSpan(_position, 1), SyntaxKind.Number);
return new SyntaxToken(SyntaxKind.BadToken, _position, "", null);
}
hasDecimalPoint = true;
}
if (Next != '_')
numStr.Append(Next);
_position++;
}
object o;
if (hasDecimalPoint)
o = double.Parse(numStr.ToString());
else
o = long.Parse(numStr.ToString());
return new SyntaxToken(SyntaxKind.Number, start, numStr.ToString(), o);
}
private SyntaxToken LexString()
{
var start = _position;
var sb = new StringBuilder();
while (_position < _text.Length)
{
_position++;
if (Current == '\\' && Next == '"')
{
sb.Append("\"");
_position += 2;
}
if (Current == '"')
break;
sb.Append(Current);
}
if (Current != '"')
{
_diagnostics.LogBadCharacter(new TextSpan(_position, 1), '"', Current);
}
var res = sb.ToString();
return new SyntaxToken(SyntaxKind.String, start, $"\"{res}\"", res);
}
private SyntaxToken LexIdentifierOrKeyword()
{
var start = _position;
var stringBuilder = new StringBuilder();
stringBuilder.Append(Current);
while (char.IsLetterOrDigit(Next) || Next == '_')
{
stringBuilder.Append(Next);
_position++;
}
var kind = SyntaxKeyWords.GetSyntaxKind(stringBuilder.ToString());
var str = stringBuilder.ToString();
if (kind == SyntaxKind.Identifier)
{
return new IdentifierToken(str, start);
}
return new SyntaxToken(kind, start, str, null);
}
private SyntaxToken LexComments()
{
var start = _position;
var stringBuilder = new StringBuilder();
if (Current != ' ')
stringBuilder.Append(Current);
while (Next != '\n' && Next != '\0')
{
stringBuilder.Append(Next);
_position++;
}
var str = stringBuilder.ToString();
return new SyntaxToken(SyntaxKind.Comment, start, str, str);
}
}
}