142 lines
4.7 KiB
C#
142 lines
4.7 KiB
C#
using System;
|
|
using System.Collections.Immutable;
|
|
using System.Text;
|
|
|
|
namespace Upsilon.Parser
|
|
{
|
|
public class Lexer
|
|
{
|
|
private readonly string _text;
|
|
private int _position;
|
|
|
|
private Lexer(string text)
|
|
{
|
|
_text = text;
|
|
}
|
|
|
|
public static ImmutableArray<SyntaxToken> Lex(string text)
|
|
{
|
|
var lexer = new Lexer(text);
|
|
return lexer.Lex();
|
|
}
|
|
|
|
private char Current
|
|
{
|
|
get
|
|
{
|
|
if (_position >= _text.Length)
|
|
return '\0';
|
|
return _text[_position];
|
|
}
|
|
}
|
|
|
|
private char Next
|
|
{
|
|
get
|
|
{
|
|
if (_position + 1 >= _text.Length)
|
|
return '\0';
|
|
return _text[_position + 1];
|
|
}
|
|
}
|
|
|
|
|
|
private ImmutableArray<SyntaxToken> Lex()
|
|
{
|
|
var array = ImmutableArray.CreateBuilder<SyntaxToken>();
|
|
while (true)
|
|
{
|
|
var next = LexNext();
|
|
if (next.Kind != SyntaxKind.WhiteSpace)
|
|
{
|
|
array.Add(next);
|
|
if (next.Kind == SyntaxKind.EndOfFile)
|
|
break;
|
|
}
|
|
_position++;
|
|
}
|
|
return array.ToImmutable();
|
|
}
|
|
|
|
private SyntaxToken LexNext()
|
|
{
|
|
switch (Current)
|
|
{
|
|
case '\0':
|
|
return new SyntaxToken(SyntaxKind.EndOfFile, _position, "\0", null);
|
|
case ' ': case '\t': case '\r': case '\n':
|
|
return new SyntaxToken(SyntaxKind.WhiteSpace, _position, Current.ToString(), null);
|
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
return LexNumber();
|
|
case '+':
|
|
return new SyntaxToken(SyntaxKind.Plus, _position, "+", null);
|
|
case '-':
|
|
return new SyntaxToken(SyntaxKind.Minus, _position, "-", null);
|
|
case '*':
|
|
return new SyntaxToken(SyntaxKind.Star, _position, "*", null);
|
|
case '/':
|
|
return new SyntaxToken(SyntaxKind.Slash, _position, "/", null);
|
|
case '(':
|
|
return new SyntaxToken(SyntaxKind.OpenParenthesis, _position, "(", null);
|
|
case ')':
|
|
return new SyntaxToken(SyntaxKind.CloseParenthesis, _position, ")", null);
|
|
case '=':
|
|
if (Next == '=')
|
|
{
|
|
_position++;
|
|
return new SyntaxToken(SyntaxKind.EqualsEquals, _position - 1, "==", null);
|
|
}
|
|
return new SyntaxToken(SyntaxKind.Equals, _position, "=", null);
|
|
case '~':
|
|
if (Next == '=')
|
|
{
|
|
_position++;
|
|
return new SyntaxToken(SyntaxKind.TildeEquals, _position - 1, "~=", null);
|
|
}
|
|
return new SyntaxToken(SyntaxKind.Tilde, _position, "~", null);
|
|
default:
|
|
if (char.IsLetter(Current))
|
|
return LexIdentifierOrKeyword();
|
|
throw new Exception("Unknown token character: " + Current);
|
|
}
|
|
}
|
|
|
|
private SyntaxToken LexNumber()
|
|
{
|
|
var start = _position;
|
|
var hasDecimalPoint = false;
|
|
var numStr = new StringBuilder();
|
|
numStr.Append(Current);
|
|
while (char.IsDigit(Next) || Next == '.' || Next == '_')
|
|
{
|
|
if (Next == '.')
|
|
{
|
|
if (hasDecimalPoint)
|
|
{
|
|
throw new Exception("No second decimal allowed there");
|
|
}
|
|
hasDecimalPoint = true;
|
|
}
|
|
numStr.Append(Next);
|
|
_position++;
|
|
}
|
|
var i = double.Parse(numStr.ToString());
|
|
return new SyntaxToken(SyntaxKind.Number, start, numStr.ToString(), i);
|
|
}
|
|
|
|
private SyntaxToken LexIdentifierOrKeyword()
|
|
{
|
|
var start = _position;
|
|
var numStr = new StringBuilder();
|
|
numStr.Append(Current);
|
|
while (char.IsLetterOrDigit(Next) || Next == '_')
|
|
{
|
|
numStr.Append(Next);
|
|
_position++;
|
|
}
|
|
|
|
var kind = SyntaxKeyWords.GetSyntaxKind(numStr.ToString());
|
|
return new SyntaxToken(kind, start, numStr.ToString(), null);
|
|
}
|
|
}
|
|
} |