MalachScript/src/Parser/Lexer/Lexer.cpp

559 lines
26 KiB
C++
Raw Normal View History

#include "Lexer.hpp"
#include <cmath>
#include <stdexcept>
#include "NumericalLexers.hpp"
namespace ElohimScript::Parser {
const LexToken* Lexer::Lex() {
auto* first = LexNext();
if (first->GetKind() == LexTokenKind::EndOfFile) {
return first;
}
auto* last = first;
while (true) {
auto* next = LexNext();
last->_next = std::unique_ptr<const LexToken>(next);
last = next;
if (next->GetKind() == LexTokenKind::EndOfFile) {
break;
}
}
return first;
}
LexToken* Lexer::LexNext() {
2020-10-04 17:38:13 +00:00
auto start = _position;
auto c = Consume();
switch (c) {
2020-10-04 17:38:13 +00:00
case u8'\0': return new LexTokenImpl<LexTokenKind::EndOfFile>(TextSpan(start, 1));
case u8'*': {
auto n = Peek();
if (n == u8'*') {
Progress();
n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// **=
return new LexTokenImpl<LexTokenKind::StarStarEqualsSymbol>(TextSpan(start, 3));
}
2020-10-04 17:38:13 +00:00
// **
return new LexTokenImpl<LexTokenKind::StarStarSymbol>(TextSpan(start, 2));
}
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// *=
return new LexTokenImpl<LexTokenKind::StarEqualsSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// *
return new LexTokenImpl<LexTokenKind::StarSymbol>(TextSpan(start, 1));
}
case u8'/':
if (Peek() == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// /=
return new LexTokenImpl<LexTokenKind::SlashEqualsSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// /
return new LexTokenImpl<LexTokenKind::SlashSymbol>(TextSpan(start, 1));
case u8'%':
if (Peek() == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// %=
return new LexTokenImpl<LexTokenKind::PercentEqualsSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// %
return new LexTokenImpl<LexTokenKind::PercentSymbol>(TextSpan(start, 1));
case u8'+': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// +=
return new LexTokenImpl<LexTokenKind::PlusEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'+') {
Progress();
2020-10-04 17:38:13 +00:00
// ++
return new LexTokenImpl<LexTokenKind::PlusPlusSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// +
return new LexTokenImpl<LexTokenKind::PlusSymbol>(TextSpan(start, 1));
}
case u8'-': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// -=
return new LexTokenImpl<LexTokenKind::MinusEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'-') {
Progress();
2020-10-04 17:38:13 +00:00
// --
return new LexTokenImpl<LexTokenKind::MinusMinusSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// -
return new LexTokenImpl<LexTokenKind::MinusSymbol>(TextSpan(start, 1));
}
case u8'<': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// <=
return new LexTokenImpl<LexTokenKind::LessThanEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'<') {
Progress();
if (Peek() == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// <<=
return new LexTokenImpl<LexTokenKind::LessThanLessThanEqualsSymbol>(TextSpan(start, 3));
}
2020-10-04 17:38:13 +00:00
// <<
return new LexTokenImpl<LexTokenKind::LessThanLessThanSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// <
return new LexTokenImpl<LexTokenKind::LessThanSymbol>(TextSpan(start, 1));
}
case u8'>': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// >=
return new LexTokenImpl<LexTokenKind::GreaterThanEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'>') {
Progress();
n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// >>=
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanEqualsSymbol>(TextSpan(start, 3));
}
if (n == u8'>') {
Progress();
if (Peek() == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// >>>=
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol>(
TextSpan(start, 4));
}
2020-10-04 17:38:13 +00:00
// >>>
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol>(
TextSpan(start, 3));
}
2020-10-04 17:38:13 +00:00
// >>
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// >
return new LexTokenImpl<LexTokenKind::GreaterThanSymbol>(TextSpan(start, 1));
}
2020-10-04 17:38:13 +00:00
case u8'(': return new LexTokenImpl<LexTokenKind::OpenParenthesisSymbol>(TextSpan(start, 1));
case u8')': return new LexTokenImpl<LexTokenKind::CloseParenthesisSymbol>(TextSpan(start, 1));
case u8'=': {
if (Peek() == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// ==
return new LexTokenImpl<LexTokenKind::EqualsEqualsSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// =
return new LexTokenImpl<LexTokenKind::EqualsSymbol>(TextSpan(start, 1));
}
case u8'!': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// !=
return new LexTokenImpl<LexTokenKind::ExclamationMarkEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'i' && Peek(2) == u8's') {
Progress(2);
2020-10-04 17:38:13 +00:00
// !is
return new LexTokenImpl<LexTokenKind::ExclamationMarkIsSymbol>(TextSpan(start, 3));
}
2020-10-04 17:38:13 +00:00
// !
return new LexTokenImpl<LexTokenKind::ExclamationMarkSymbol>(TextSpan(start, 1));
}
2020-10-04 17:38:13 +00:00
case u8'?': return new LexTokenImpl<LexTokenKind::QuestionMarkSymbol>(TextSpan(start, 1));
case u8':': {
if (Peek() == u8':') {
Progress();
2020-10-04 17:38:13 +00:00
// ::
return new LexTokenImpl<LexTokenKind::ColonColonSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// :
return new LexTokenImpl<LexTokenKind::ColonSymbol>(TextSpan(start, 1));
}
case u8'&': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// &=
return new LexTokenImpl<LexTokenKind::AmpersandEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'&') {
Progress();
2020-10-04 17:38:13 +00:00
// &&
return new LexTokenImpl<LexTokenKind::AmpersandAmpersandSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// &
return new LexTokenImpl<LexTokenKind::AmpersandSymbol>(TextSpan(start, 1));
}
2020-10-04 17:38:13 +00:00
case u8',': return new LexTokenImpl<LexTokenKind::CommaSymbol>(TextSpan(start, 1));
case u8'{': return new LexTokenImpl<LexTokenKind::OpenCurlyParenthesisSymbol>(TextSpan(start, 1));
case u8'}': return new LexTokenImpl<LexTokenKind::CloseCurlyParenthesisSymbol>(TextSpan(start, 1));
case u8';': return new LexTokenImpl<LexTokenKind::SemicolonSymbol>(TextSpan(start, 1));
case u8'|': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// |=
return new LexTokenImpl<LexTokenKind::VerticalLineEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'|') {
Progress();
2020-10-04 17:38:13 +00:00
// ||
return new LexTokenImpl<LexTokenKind::VerticalLineVerticalLineSymbol>(TextSpan(start, 2));
}
2020-10-04 17:38:13 +00:00
// |
return new LexTokenImpl<LexTokenKind::VerticalLineSymbol>(TextSpan(start, 1));
}
case u8'^': {
auto n = Peek();
if (n == u8'=') {
Progress();
2020-10-04 17:38:13 +00:00
// ^=
return new LexTokenImpl<LexTokenKind::CaretEqualsSymbol>(TextSpan(start, start + 2));
}
if (n == u8'^') {
Progress();
2020-10-04 17:38:13 +00:00
// ^^
return new LexTokenImpl<LexTokenKind::CaretCaretSymbol>(TextSpan(start, start + 2));
}
2020-10-04 17:38:13 +00:00
// ^
return new LexTokenImpl<LexTokenKind::CaretSymbol>(TextSpan(start, start + 1));
}
2020-10-04 17:38:13 +00:00
case u8'~': return new LexTokenImpl<LexTokenKind::TildeSymbol>(TextSpan(start, start + 1));
case u8'.': return new LexTokenImpl<LexTokenKind::DotSymbol>(TextSpan(start, start + 1));
case u8'[': return new LexTokenImpl<LexTokenKind::OpenBlockParenthesisSymbol>(TextSpan(start, start + 1));
case u8']': return new LexTokenImpl<LexTokenKind::CloseBlockParenthesisSymbol>(TextSpan(start, start + 1));
case u8'@': return new LexTokenImpl<LexTokenKind::AtSymbol>(TextSpan(start, start + 1));
case u8' ':
case u8'\r':
case u8'\n':
2020-10-04 17:38:13 +00:00
case u8'\t': return new LexTokenImpl<LexTokenKind::Whitespace>(TextSpan(start, start + 1));
// Byte order mark
case u8'\xEF': {
if (Peek() == u8'\xBB' && Peek(2) == u8'\xBF') {
Progress(2);
2020-10-04 17:38:13 +00:00
return new LexTokenImpl<LexTokenKind::Whitespace>(TextSpan(start, start + 3));
}
}
case u8'0':
case u8'1':
case u8'2':
case u8'3':
case u8'4':
case u8'5':
case u8'6':
case u8'7':
case u8'8':
case u8'9': return LexNumerical(c);
2020-10-04 15:15:28 +00:00
case u8'\'': return LexString(u8'\'', false);
case u8'"': {
if (Peek() == '"' && Peek(2) == '\"') {
return LexString(u8'"', true);
}
return LexString(u8'"', false);
}
2020-10-04 16:30:53 +00:00
default:
if (IsAlphaNumericalOrUnderscore(c))
return LexKeywordOrIdentifier();
2020-10-04 17:38:13 +00:00
_diagnostics->LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1));
return new LexTokenImpl<LexTokenKind::Unknown>(TextSpan(start, start + 1));
}
}
LexToken* Lexer::LexNumerical(char8_t c) {
auto initialValue = LexDecimalValue(c);
auto numericalSystem = 10; // Default to decimal system.
if (initialValue == 0) {
auto secondChar = Peek();
auto secondValue = LexDecimalValue(secondChar);
if (secondChar != '.' && secondValue == 255) {
Progress();
switch (secondChar) {
case 'x': numericalSystem = 16; break;
case 'd': numericalSystem = 10; break;
case 'o':
numericalSystem = 8;
break;
;
case 'b': numericalSystem = 2; break;
default:
2020-10-04 17:38:13 +00:00
_diagnostics->LogError(Diagnostics::DiagnosticType::InvalidNumericalBase,
TextSpan(_position - 1, _position + 1));
// Set to the largest numerical system, so we can prevent errors down the line.
numericalSystem = 16;
break;
}
}
}
switch (numericalSystem) {
case 10: return LexDecimal(initialValue);
case 16: return LexHexadecimal();
case 8: return LexOctal();
case 2: return LexBinary();
default: throw std::logic_error("Not implemented");
}
}
constexpr int64_t quick_pow10(int n) {
constexpr int64_t pow10[20] = {1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000,
10000000000,
100000000000,
1000000000000,
10000000000000,
10000000000000,
100000000000000,
1000000000000000,
10000000000000000,
100000000000000000,
1000000000000000000};
return pow10[n];
}
LexToken* Lexer::LexDecimal(uint64_t initial) {
2020-10-04 17:38:13 +00:00
auto start = _position;
uint64_t value = initial;
uint64_t decimalValue = 0;
uint64_t exponentValue = 0;
uint8_t decimalLength = 0;
bool isDecimal = false;
bool isExponent = false;
while (true) {
auto v = (uint64_t)LexDecimalValue(Peek());
if (v == 255) {
2020-10-04 15:15:28 +00:00
if (!isDecimal && Peek() == u8'.') {
isDecimal = true;
Progress();
continue;
}
2020-10-04 15:15:28 +00:00
if (isDecimal && (Peek() == u8'e' || Peek() == u8'E')) {
isDecimal = false;
isExponent = true;
Progress();
continue;
}
break;
}
Progress();
if (isDecimal) {
decimalValue *= 10;
decimalValue += v;
decimalLength++;
} else if (isExponent) {
exponentValue *= 10;
exponentValue += v;
} else {
value *= 10;
value += v;
}
}
if (isDecimal || isExponent) {
auto val = value + ((double)decimalValue / quick_pow10(decimalLength));
if (isExponent) {
val *= pow(10, exponentValue);
}
2020-10-04 17:38:13 +00:00
return new FloatLiteral(TextSpan(start, _position), val);
}
2020-10-04 17:38:13 +00:00
return new IntegerLiteral(TextSpan(start, _position), value);
}
2020-10-04 15:15:28 +00:00
IntegerLiteral* Lexer::LexHexadecimal() {
2020-10-04 17:38:13 +00:00
auto start = _position;
uint64_t value = 0;
while (true) {
auto v = LexHexadecimalValue(Peek());
if (v == 255) {
break;
}
Progress();
value <<= 4;
value += v;
}
2020-10-04 17:38:13 +00:00
return new IntegerLiteral(TextSpan(start, _position), value);
}
2020-10-04 15:15:28 +00:00
IntegerLiteral* Lexer::LexOctal() {
2020-10-04 17:38:13 +00:00
auto start = _position;
uint64_t value = 0;
while (true) {
auto v = LexOctalValue(Peek());
if (v == 255) {
break;
}
Progress();
value <<= 3;
value += v;
}
2020-10-04 17:38:13 +00:00
return new IntegerLiteral(TextSpan(start, _position), value);
}
2020-10-04 15:15:28 +00:00
IntegerLiteral* Lexer::LexBinary() {
2020-10-04 17:38:13 +00:00
auto start = _position;
uint64_t value = 0;
while (true) {
auto v = LexBinaryValue(Peek());
if (v == 255) {
break;
}
Progress();
value <<= 1;
value += v;
}
2020-10-04 17:38:13 +00:00
return new IntegerLiteral(TextSpan(start, _position), value);
2020-10-04 15:15:28 +00:00
}
StringLiteral* Lexer::LexString(char8_t opening, bool heredoc) {
Progress();
if (heredoc) {
Progress(2);
}
auto start = _position;
size_t offset = 0;
while (true) {
auto current = Peek(offset);
if (heredoc) {
if (current == '"' && Peek(offset + 1) == '"' && Peek(offset + 2) == '"' && Peek(offset + 3) != '"') {
break;
}
} else if (current == opening) {
break;
}
if (current == u8'\0') {
2020-10-04 17:38:13 +00:00
_diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString,
TextSpan(start, start + offset));
2020-10-04 15:15:28 +00:00
break;
}
if (!heredoc && (current == u8'\n' || current == u8'\r')) {
2020-10-04 17:38:13 +00:00
_diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString,
TextSpan(start, start + offset));
2020-10-04 15:15:28 +00:00
break;
}
offset++;
}
Progress(offset);
if (heredoc) {
Progress(2);
}
2020-10-04 17:38:13 +00:00
return new StringLiteral(TextSpan(start, start + _position), std::u8string(_script.substr(start, offset)));
}
2020-10-04 16:30:53 +00:00
static uint32_t constexpr Hash(const char8_t* input) {
2020-10-04 17:38:13 +00:00
return *input != 0U ? static_cast<uint32_t>(*input) + 33 * Hash(input + 1) : 5381;
2020-10-04 16:30:53 +00:00
};
LexToken* Lexer::LexKeywordOrIdentifier() {
2020-10-04 17:38:13 +00:00
auto start = _position;
2020-10-04 16:30:53 +00:00
auto offset = 0;
while (IsAlphaNumericalOrUnderscore(Peek(offset))) {
offset++;
}
auto str = _script.substr(_position, offset);
Progress(offset);
switch (Hash(str.data())) {
2020-10-04 17:38:13 +00:00
case Hash(u8"and"): return new LexTokenImpl<LexTokenKind::AndKeyword>(TextSpan(start, _position));
case Hash(u8"abstract"): return new LexTokenImpl<LexTokenKind::AbstractKeyword>(TextSpan(start, _position));
case Hash(u8"auto"): return new LexTokenImpl<LexTokenKind::AutoKeyword>(TextSpan(start, _position));
case Hash(u8"bool"): return new LexTokenImpl<LexTokenKind::BoolKeyword>(TextSpan(start, _position));
case Hash(u8"break"): return new LexTokenImpl<LexTokenKind::BreakKeyword>(TextSpan(start, _position));
case Hash(u8"case"): return new LexTokenImpl<LexTokenKind::CaseKeyword>(TextSpan(start, _position));
case Hash(u8"cast"): return new LexTokenImpl<LexTokenKind::CastKeyword>(TextSpan(start, _position));
case Hash(u8"catch"): return new LexTokenImpl<LexTokenKind::CatchKeyword>(TextSpan(start, _position));
case Hash(u8"class"): return new LexTokenImpl<LexTokenKind::ClassKeyword>(TextSpan(start, _position));
case Hash(u8"const"): return new LexTokenImpl<LexTokenKind::ConstKeyword>(TextSpan(start, _position));
case Hash(u8"continue"): return new LexTokenImpl<LexTokenKind::ContinueKeyword>(TextSpan(start, _position));
case Hash(u8"default"): return new LexTokenImpl<LexTokenKind::DefaultKeyword>(TextSpan(start, _position));
case Hash(u8"do"): return new LexTokenImpl<LexTokenKind::DoKeyword>(TextSpan(start, _position));
case Hash(u8"double"): return new LexTokenImpl<LexTokenKind::DoubleKeyword>(TextSpan(start, _position));
case Hash(u8"else"): return new LexTokenImpl<LexTokenKind::ElseKeyword>(TextSpan(start, _position));
case Hash(u8"enum"): return new LexTokenImpl<LexTokenKind::EnumKeyword>(TextSpan(start, _position));
case Hash(u8"explicit"): return new LexTokenImpl<LexTokenKind::ExplicitKeyword>(TextSpan(start, _position));
case Hash(u8"external"): return new LexTokenImpl<LexTokenKind::ExternalKeyword>(TextSpan(start, _position));
case Hash(u8"false"): return new LexTokenImpl<LexTokenKind::FalseKeyword>(TextSpan(start, _position));
case Hash(u8"final"): return new LexTokenImpl<LexTokenKind::FinalKeyword>(TextSpan(start, _position));
case Hash(u8"float"): return new LexTokenImpl<LexTokenKind::FloatKeyword>(TextSpan(start, _position));
case Hash(u8"for"): return new LexTokenImpl<LexTokenKind::ForKeyword>(TextSpan(start, _position));
case Hash(u8"from"): return new LexTokenImpl<LexTokenKind::FromKeyword>(TextSpan(start, _position));
case Hash(u8"funcdef"): return new LexTokenImpl<LexTokenKind::FuncdefKeyword>(TextSpan(start, _position));
case Hash(u8"function"): return new LexTokenImpl<LexTokenKind::FunctionKeyword>(TextSpan(start, _position));
case Hash(u8"get"): return new LexTokenImpl<LexTokenKind::GetKeyword>(TextSpan(start, _position));
case Hash(u8"if"): return new LexTokenImpl<LexTokenKind::IfKeyword>(TextSpan(start, _position));
case Hash(u8"import"): return new LexTokenImpl<LexTokenKind::ImportKeyword>(TextSpan(start, _position));
case Hash(u8"in"): return new LexTokenImpl<LexTokenKind::InKeyword>(TextSpan(start, _position));
case Hash(u8"inout"): return new LexTokenImpl<LexTokenKind::InoutKeyword>(TextSpan(start, _position));
case Hash(u8"int"): return new LexTokenImpl<LexTokenKind::IntKeyword>(TextSpan(start, _position));
2020-10-04 16:30:53 +00:00
case Hash(u8"interface"):
2020-10-04 17:38:13 +00:00
return new LexTokenImpl<LexTokenKind::InterfaceKeyword>(TextSpan(start, _position));
case Hash(u8"int8"): return new LexTokenImpl<LexTokenKind::Int8Keyword>(TextSpan(start, _position));
case Hash(u8"int16"): return new LexTokenImpl<LexTokenKind::Int16Keyword>(TextSpan(start, _position));
case Hash(u8"int32"): return new LexTokenImpl<LexTokenKind::Int32Keyword>(TextSpan(start, _position));
case Hash(u8"int64"): return new LexTokenImpl<LexTokenKind::Int64Keyword>(TextSpan(start, _position));
case Hash(u8"is"): return new LexTokenImpl<LexTokenKind::IsKeyword>(TextSpan(start, _position));
case Hash(u8"mixin"): return new LexTokenImpl<LexTokenKind::MixinKeyword>(TextSpan(start, _position));
2020-10-04 16:30:53 +00:00
case Hash(u8"namespace"):
2020-10-04 17:38:13 +00:00
return new LexTokenImpl<LexTokenKind::NamespaceKeyword>(TextSpan(start, _position));
case Hash(u8"not"): return new LexTokenImpl<LexTokenKind::NotKeyword>(TextSpan(start, _position));
case Hash(u8"null"): return new LexTokenImpl<LexTokenKind::NullKeyword>(TextSpan(start, _position));
case Hash(u8"or"): return new LexTokenImpl<LexTokenKind::OrKeyword>(TextSpan(start, _position));
case Hash(u8"out"): return new LexTokenImpl<LexTokenKind::OutKeyword>(TextSpan(start, _position));
case Hash(u8"override"): return new LexTokenImpl<LexTokenKind::OverrideKeyword>(TextSpan(start, _position));
case Hash(u8"private"): return new LexTokenImpl<LexTokenKind::PrivateKeyword>(TextSpan(start, _position));
case Hash(u8"property"): return new LexTokenImpl<LexTokenKind::PropertyKeyword>(TextSpan(start, _position));
2020-10-04 16:30:53 +00:00
case Hash(u8"protected"):
2020-10-04 17:38:13 +00:00
return new LexTokenImpl<LexTokenKind::ProtectedKeyword>(TextSpan(start, _position));
case Hash(u8"return"): return new LexTokenImpl<LexTokenKind::ReturnKeyword>(TextSpan(start, _position));
case Hash(u8"set"): return new LexTokenImpl<LexTokenKind::SetKeyword>(TextSpan(start, _position));
case Hash(u8"shared"): return new LexTokenImpl<LexTokenKind::SharedKeyword>(TextSpan(start, _position));
case Hash(u8"super"): return new LexTokenImpl<LexTokenKind::SuperKeyword>(TextSpan(start, _position));
case Hash(u8"switch"): return new LexTokenImpl<LexTokenKind::SwitchKeyword>(TextSpan(start, _position));
case Hash(u8"this"): return new LexTokenImpl<LexTokenKind::ThisKeyword>(TextSpan(start, _position));
case Hash(u8"true"): return new LexTokenImpl<LexTokenKind::TrueKeyword>(TextSpan(start, _position));
case Hash(u8"try"): return new LexTokenImpl<LexTokenKind::TryKeyword>(TextSpan(start, _position));
case Hash(u8"typedef"): return new LexTokenImpl<LexTokenKind::TypedefKeyword>(TextSpan(start, _position));
case Hash(u8"uint"): return new LexTokenImpl<LexTokenKind::UintKeyword>(TextSpan(start, _position));
case Hash(u8"uint8"): return new LexTokenImpl<LexTokenKind::Uint8Keyword>(TextSpan(start, _position));
case Hash(u8"uint16"): return new LexTokenImpl<LexTokenKind::Uint16Keyword>(TextSpan(start, _position));
case Hash(u8"uint32"): return new LexTokenImpl<LexTokenKind::Uint32Keyword>(TextSpan(start, _position));
case Hash(u8"uint64"): return new LexTokenImpl<LexTokenKind::Uint64Keyword>(TextSpan(start, _position));
case Hash(u8"void"): return new LexTokenImpl<LexTokenKind::VoidKeyword>(TextSpan(start, _position));
case Hash(u8"while"): return new LexTokenImpl<LexTokenKind::WhileKeyword>(TextSpan(start, _position));
case Hash(u8"xor"): return new LexTokenImpl<LexTokenKind::XorKeyword>(TextSpan(start, _position));
2020-10-04 16:30:53 +00:00
2020-10-04 17:38:13 +00:00
default: return new IdentifierToken(TextSpan(start, _position), std::u8string(str));
2020-10-04 16:30:53 +00:00
}
}
bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) {
if (c >= 'a' && c <= 'z') {
return true;
}
if (c >= 'A' && c <= 'Z') {
return true;
}
if (c >= '0' && c <= '9') {
return true;
}
if (c == '_') {
return true;
}
return false;
}
}