2020-10-04 14:33:12 +00:00
|
|
|
#include "Lexer.hpp"
|
|
|
|
#include <cmath>
|
|
|
|
#include <stdexcept>
|
|
|
|
#include "NumericalLexers.hpp"
|
|
|
|
|
2020-10-05 15:45:00 +00:00
|
|
|
namespace MalachScript::Parser {
|
2020-10-04 14:33:12 +00:00
|
|
|
const LexToken* Lexer::Lex() {
|
|
|
|
auto* first = LexNext();
|
|
|
|
if (first->GetKind() == LexTokenKind::EndOfFile) {
|
|
|
|
return first;
|
|
|
|
}
|
|
|
|
auto* last = first;
|
|
|
|
while (true) {
|
|
|
|
auto* next = LexNext();
|
2020-10-07 20:11:18 +00:00
|
|
|
last->SetNext(next);
|
2020-10-04 14:33:12 +00:00
|
|
|
last = next;
|
|
|
|
if (next->GetKind() == LexTokenKind::EndOfFile) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return first;
|
|
|
|
}
|
|
|
|
|
|
|
|
LexToken* Lexer::LexNext() {
|
2020-10-04 17:38:13 +00:00
|
|
|
auto start = _position;
|
2020-10-04 14:33:12 +00:00
|
|
|
auto c = Consume();
|
|
|
|
switch (c) {
|
2021-01-05 17:17:40 +00:00
|
|
|
case u8'\0': return Create<LexTokenImpl<LexTokenKind::EndOfFile>>(TextSpan(start + 1, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
case u8'*': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'*') {
|
|
|
|
Progress();
|
|
|
|
n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// **=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::StarStarEqualsSymbol>>(TextSpan(start, start + 3));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// **
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::StarStarSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// *=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::StarEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// *
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::StarSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'/':
|
|
|
|
if (Peek() == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// /=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::SlashEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// /
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::SlashSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
case u8'%':
|
|
|
|
if (Peek() == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// %=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::PercentEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// %
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::PercentSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
case u8'+': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// +=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::PlusEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'+') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// ++
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::PlusPlusSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// +
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::PlusSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'-': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// -=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::MinusEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'-') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// --
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::MinusMinusSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// -
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::MinusSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'<': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// <=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::LessThanEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'<') {
|
|
|
|
Progress();
|
|
|
|
if (Peek() == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// <<=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::LessThanLessThanEqualsSymbol>>(
|
|
|
|
TextSpan(start, start + 3));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// <<
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::LessThanLessThanSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// <
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::LessThanSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'>': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// >=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::GreaterThanEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'>') {
|
|
|
|
Progress();
|
|
|
|
n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// >>=
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::GreaterThanGreaterThanEqualsSymbol>>(
|
2021-01-02 18:08:46 +00:00
|
|
|
TextSpan(start, start + 3));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'>') {
|
|
|
|
Progress();
|
|
|
|
if (Peek() == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// >>>=
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol>>(
|
2021-01-02 18:08:46 +00:00
|
|
|
TextSpan(start, start + 4));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// >>>
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol>>(
|
2021-01-02 18:08:46 +00:00
|
|
|
TextSpan(start, start + 3));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// >>
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::GreaterThanGreaterThanSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// >
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::GreaterThanSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2021-01-02 18:08:46 +00:00
|
|
|
case u8'(': return Create<LexTokenImpl<LexTokenKind::OpenParenthesisSymbol>>(TextSpan(start, start + 1));
|
|
|
|
case u8')': return Create<LexTokenImpl<LexTokenKind::CloseParenthesisSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
case u8'=': {
|
|
|
|
if (Peek() == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// ==
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::EqualsEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// =
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::EqualsSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'!': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// !=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ExclamationMarkEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'i' && Peek(2) == u8's') {
|
|
|
|
Progress(2);
|
2020-10-04 17:38:13 +00:00
|
|
|
// !is
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ExclamationMarkIsSymbol>>(TextSpan(start, start + 3));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// !
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ExclamationMarkSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2021-01-02 18:08:46 +00:00
|
|
|
case u8'?': return Create<LexTokenImpl<LexTokenKind::QuestionMarkSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
case u8':': {
|
|
|
|
if (Peek() == u8':') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// ::
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ColonColonSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// :
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ColonSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'&': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// &=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::AmpersandEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'&') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// &&
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::AmpersandAmpersandSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// &
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::AmpersandSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2021-01-02 18:08:46 +00:00
|
|
|
case u8',': return Create<LexTokenImpl<LexTokenKind::CommaSymbol>>(TextSpan(start, start + 1));
|
|
|
|
case u8'{':
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::OpenCurlyParenthesisSymbol>>(TextSpan(start, start + 1));
|
|
|
|
case u8'}':
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::CloseCurlyParenthesisSymbol>>(TextSpan(start, start + 1));
|
|
|
|
case u8';': return Create<LexTokenImpl<LexTokenKind::SemicolonSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
case u8'|': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// |=
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::VerticalLineEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'|') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// ||
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::VerticalLineVerticalLineSymbol>>(
|
|
|
|
TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// |
|
2021-01-02 18:08:46 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::VerticalLineSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'^': {
|
|
|
|
auto n = Peek();
|
|
|
|
if (n == u8'=') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// ^=
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::CaretEqualsSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
if (n == u8'^') {
|
|
|
|
Progress();
|
2020-10-04 17:38:13 +00:00
|
|
|
// ^^
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::CaretCaretSymbol>>(TextSpan(start, start + 2));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 17:38:13 +00:00
|
|
|
// ^
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::CaretSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 20:21:20 +00:00
|
|
|
case u8'~': return Create<LexTokenImpl<LexTokenKind::TildeSymbol>>(TextSpan(start, start + 1));
|
|
|
|
case u8'.': return Create<LexTokenImpl<LexTokenKind::DotSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 21:16:37 +00:00
|
|
|
case u8'[':
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::OpenBlockParenthesisSymbol>>(TextSpan(start, start + 1));
|
|
|
|
case u8']':
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::CloseBlockParenthesisSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 20:21:20 +00:00
|
|
|
case u8'@': return Create<LexTokenImpl<LexTokenKind::AtSymbol>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
|
|
|
|
case u8' ':
|
|
|
|
case u8'\r':
|
|
|
|
case u8'\n':
|
2020-10-04 20:21:20 +00:00
|
|
|
case u8'\t': return Create<LexTokenImpl<LexTokenKind::Whitespace>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
// Byte order mark
|
|
|
|
case u8'\xEF': {
|
|
|
|
if (Peek() == u8'\xBB' && Peek(2) == u8'\xBF') {
|
|
|
|
Progress(2);
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::Whitespace>>(TextSpan(start, start + 3));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-11-01 12:42:46 +00:00
|
|
|
LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1));
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Unknown>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
case u8'0':
|
|
|
|
case u8'1':
|
|
|
|
case u8'2':
|
|
|
|
case u8'3':
|
|
|
|
case u8'4':
|
|
|
|
case u8'5':
|
|
|
|
case u8'6':
|
|
|
|
case u8'7':
|
|
|
|
case u8'8':
|
|
|
|
case u8'9': return LexNumerical(c);
|
2020-10-04 15:15:28 +00:00
|
|
|
case u8'\'': return LexString(u8'\'', false);
|
|
|
|
case u8'"': {
|
|
|
|
if (Peek() == '"' && Peek(2) == '\"') {
|
|
|
|
return LexString(u8'"', true);
|
|
|
|
}
|
|
|
|
return LexString(u8'"', false);
|
|
|
|
}
|
2020-10-04 14:33:12 +00:00
|
|
|
|
2020-10-04 16:30:53 +00:00
|
|
|
default:
|
|
|
|
if (IsAlphaNumericalOrUnderscore(c))
|
|
|
|
return LexKeywordOrIdentifier();
|
2020-10-05 10:18:05 +00:00
|
|
|
LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1));
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::Unknown>>(TextSpan(start, start + 1));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LexToken* Lexer::LexNumerical(char8_t c) {
|
|
|
|
auto initialValue = LexDecimalValue(c);
|
|
|
|
auto numericalSystem = 10; // Default to decimal system.
|
|
|
|
if (initialValue == 0) {
|
|
|
|
auto secondChar = Peek();
|
|
|
|
auto secondValue = LexDecimalValue(secondChar);
|
|
|
|
if (secondChar != '.' && secondValue == 255) {
|
|
|
|
Progress();
|
|
|
|
switch (secondChar) {
|
2020-10-04 21:16:37 +00:00
|
|
|
case 'x':
|
|
|
|
case 'X': numericalSystem = 16; break;
|
|
|
|
case 'd':
|
|
|
|
case 'D': numericalSystem = 10; break;
|
2020-10-04 14:33:12 +00:00
|
|
|
case 'o':
|
2020-10-04 21:16:37 +00:00
|
|
|
case 'O': numericalSystem = 8; break;
|
|
|
|
case 'b':
|
|
|
|
case 'B': numericalSystem = 2; break;
|
2020-10-04 14:33:12 +00:00
|
|
|
default:
|
2020-10-05 10:18:05 +00:00
|
|
|
LogError(Diagnostics::DiagnosticType::InvalidNumericalBase,
|
|
|
|
TextSpan(_position - 1, _position + 1));
|
2020-10-04 17:38:13 +00:00
|
|
|
// Set to the largest numerical system, so we can prevent errors down the line.
|
|
|
|
numericalSystem = 16;
|
2020-10-04 14:33:12 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
switch (numericalSystem) {
|
|
|
|
case 10: return LexDecimal(initialValue);
|
|
|
|
case 16: return LexHexadecimal();
|
|
|
|
case 8: return LexOctal();
|
|
|
|
case 2: return LexBinary();
|
|
|
|
default: throw std::logic_error("Not implemented");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr int64_t quick_pow10(int n) {
|
|
|
|
constexpr int64_t pow10[20] = {1,
|
|
|
|
10,
|
|
|
|
100,
|
|
|
|
1000,
|
|
|
|
10000,
|
|
|
|
100000,
|
|
|
|
1000000,
|
|
|
|
10000000,
|
|
|
|
100000000,
|
|
|
|
1000000000,
|
|
|
|
10000000000,
|
|
|
|
100000000000,
|
|
|
|
1000000000000,
|
|
|
|
10000000000000,
|
|
|
|
10000000000000,
|
|
|
|
100000000000000,
|
|
|
|
1000000000000000,
|
|
|
|
10000000000000000,
|
|
|
|
100000000000000000,
|
|
|
|
1000000000000000000};
|
|
|
|
return pow10[n];
|
|
|
|
}
|
|
|
|
|
2020-11-08 14:41:18 +00:00
|
|
|
LexToken* Lexer::LexDecimal(ParseInt initial) {
|
2020-10-04 17:38:13 +00:00
|
|
|
auto start = _position;
|
2020-11-08 14:41:18 +00:00
|
|
|
ParseInt value = initial;
|
|
|
|
ParseInt decimalValue = 0;
|
|
|
|
ParseInt exponentValue = 0;
|
2020-10-04 14:33:12 +00:00
|
|
|
uint8_t decimalLength = 0;
|
|
|
|
bool isDecimal = false;
|
|
|
|
bool isExponent = false;
|
|
|
|
while (true) {
|
2020-11-08 14:41:18 +00:00
|
|
|
auto v = (ParseInt)LexDecimalValue(Peek());
|
2020-10-04 14:33:12 +00:00
|
|
|
if (v == 255) {
|
2020-10-04 15:15:28 +00:00
|
|
|
if (!isDecimal && Peek() == u8'.') {
|
2020-10-04 14:33:12 +00:00
|
|
|
isDecimal = true;
|
|
|
|
Progress();
|
|
|
|
continue;
|
|
|
|
}
|
2020-10-04 15:15:28 +00:00
|
|
|
if (isDecimal && (Peek() == u8'e' || Peek() == u8'E')) {
|
2020-10-04 14:33:12 +00:00
|
|
|
isDecimal = false;
|
|
|
|
isExponent = true;
|
|
|
|
Progress();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Progress();
|
|
|
|
if (isDecimal) {
|
|
|
|
decimalValue *= 10;
|
|
|
|
decimalValue += v;
|
|
|
|
decimalLength++;
|
|
|
|
} else if (isExponent) {
|
|
|
|
exponentValue *= 10;
|
|
|
|
exponentValue += v;
|
|
|
|
} else {
|
|
|
|
value *= 10;
|
|
|
|
value += v;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (isDecimal || isExponent) {
|
2020-11-08 14:41:18 +00:00
|
|
|
auto val = value + ((ParseFloat)decimalValue / quick_pow10(decimalLength));
|
2020-10-04 14:33:12 +00:00
|
|
|
if (isExponent) {
|
|
|
|
val *= pow(10, exponentValue);
|
|
|
|
}
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<FloatLiteral>(TextSpan(start, _position), val);
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<IntegerLiteral>(TextSpan(start, _position), value);
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 15:15:28 +00:00
|
|
|
IntegerLiteral* Lexer::LexHexadecimal() {
|
2020-10-04 17:38:13 +00:00
|
|
|
auto start = _position;
|
2020-11-08 14:41:18 +00:00
|
|
|
ParseInt value = 0;
|
2020-10-04 14:33:12 +00:00
|
|
|
while (true) {
|
|
|
|
auto v = LexHexadecimalValue(Peek());
|
|
|
|
if (v == 255) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Progress();
|
|
|
|
value <<= 4;
|
|
|
|
value += v;
|
|
|
|
}
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<IntegerLiteral>(TextSpan(start, _position), value);
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 15:15:28 +00:00
|
|
|
IntegerLiteral* Lexer::LexOctal() {
|
2020-10-04 17:38:13 +00:00
|
|
|
auto start = _position;
|
2020-11-08 14:41:18 +00:00
|
|
|
ParseInt value = 0;
|
2020-10-04 14:33:12 +00:00
|
|
|
while (true) {
|
|
|
|
auto v = LexOctalValue(Peek());
|
|
|
|
if (v == 255) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Progress();
|
|
|
|
value <<= 3;
|
|
|
|
value += v;
|
|
|
|
}
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<IntegerLiteral>(TextSpan(start, _position), value);
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 15:15:28 +00:00
|
|
|
IntegerLiteral* Lexer::LexBinary() {
|
2020-10-04 17:38:13 +00:00
|
|
|
auto start = _position;
|
2020-11-08 14:41:18 +00:00
|
|
|
ParseInt value = 0;
|
2020-10-04 14:33:12 +00:00
|
|
|
while (true) {
|
|
|
|
auto v = LexBinaryValue(Peek());
|
|
|
|
if (v == 255) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Progress();
|
|
|
|
value <<= 1;
|
|
|
|
value += v;
|
|
|
|
}
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<IntegerLiteral>(TextSpan(start, _position), value);
|
2020-10-04 15:15:28 +00:00
|
|
|
}
|
|
|
|
StringLiteral* Lexer::LexString(char8_t opening, bool heredoc) {
|
2021-01-05 23:08:47 +00:00
|
|
|
auto openingPos = _position;
|
2020-10-04 15:15:28 +00:00
|
|
|
Progress();
|
|
|
|
if (heredoc) {
|
|
|
|
Progress(2);
|
|
|
|
}
|
|
|
|
auto start = _position;
|
|
|
|
size_t offset = 0;
|
|
|
|
while (true) {
|
|
|
|
auto current = Peek(offset);
|
|
|
|
if (heredoc) {
|
|
|
|
if (current == '"' && Peek(offset + 1) == '"' && Peek(offset + 2) == '"' && Peek(offset + 3) != '"') {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (current == opening) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (current == u8'\0') {
|
2020-10-05 10:18:05 +00:00
|
|
|
LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, TextSpan(start, start + offset));
|
2020-10-04 15:15:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!heredoc && (current == u8'\n' || current == u8'\r')) {
|
2020-10-05 10:18:05 +00:00
|
|
|
LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, TextSpan(start, start + offset));
|
2020-10-04 15:15:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
offset++;
|
|
|
|
}
|
|
|
|
Progress(offset);
|
|
|
|
if (heredoc) {
|
|
|
|
Progress(2);
|
|
|
|
}
|
2021-01-05 23:08:47 +00:00
|
|
|
return Create<StringLiteral>(TextSpan(openingPos, openingPos + _position),
|
|
|
|
ParseString(_script.substr(start, offset)));
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|
2020-10-04 16:30:53 +00:00
|
|
|
|
|
|
|
LexToken* Lexer::LexKeywordOrIdentifier() {
|
2020-10-04 17:38:13 +00:00
|
|
|
auto start = _position;
|
2020-10-04 16:30:53 +00:00
|
|
|
auto offset = 0;
|
|
|
|
while (IsAlphaNumericalOrUnderscore(Peek(offset))) {
|
|
|
|
offset++;
|
|
|
|
}
|
2020-10-07 20:11:18 +00:00
|
|
|
auto str = _script.substr(start, offset);
|
2020-10-04 19:05:51 +00:00
|
|
|
Progress(offset - 1);
|
2020-10-09 10:55:49 +00:00
|
|
|
auto hash = Identifier::Hash(str);
|
2020-10-08 17:53:02 +00:00
|
|
|
switch (hash) {
|
|
|
|
case Identifier::Hash(u8"and"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::AndKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"abstract"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::AbstractKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"auto"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::AutoKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"bool"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::BoolKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"break"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::BreakKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"case"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::CaseKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"cast"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::CastKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"catch"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::CatchKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"class"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::ClassKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"const"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::ConstKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"continue"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ContinueKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"default"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::DefaultKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"do"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::DoKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"double"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::DoubleKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"else"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::ElseKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"enum"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::EnumKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"explicit"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ExplicitKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"external"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ExternalKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"false"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::FalseKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"final"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::FinalKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"float"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::FloatKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"for"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::ForKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"from"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::FromKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"funcdef"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::FuncdefKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"function"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::FunctionKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"get"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::GetKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"if"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::IfKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"import"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::ImportKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"in"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::InKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"inout"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::InoutKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"int"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::IntKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"interface"):
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::InterfaceKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"int8"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Int8Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"int16"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Int16Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"int32"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Int32Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"int64"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Int64Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"is"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::IsKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"mixin"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::MixinKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"namespace"):
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::NamespaceKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"not"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::NotKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"null"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::NullKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"or"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::OrKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"out"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::OutKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"override"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::OverrideKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"private"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::PrivateKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"property"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::PropertyKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"protected"):
|
2020-10-04 20:21:20 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::ProtectedKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"return"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::ReturnKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"set"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::SetKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"shared"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::SharedKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"super"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::SuperKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"switch"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::SwitchKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"this"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::ThisKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"true"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::TrueKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"try"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::TryKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"typedef"):
|
2020-10-04 21:16:37 +00:00
|
|
|
return Create<LexTokenImpl<LexTokenKind::TypedefKeyword>>(TextSpan(start, _position));
|
2020-10-08 17:53:02 +00:00
|
|
|
case Identifier::Hash(u8"uint"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::UintKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"uint8"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Uint8Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"uint16"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Uint16Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"uint32"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Uint32Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"uint64"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::Uint64Keyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"void"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::VoidKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"while"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::WhileKeyword>>(TextSpan(start, _position));
|
|
|
|
case Identifier::Hash(u8"xor"):
|
|
|
|
return Create<LexTokenImpl<LexTokenKind::XorKeyword>>(TextSpan(start, _position));
|
2020-10-04 16:30:53 +00:00
|
|
|
|
2021-01-02 16:41:53 +00:00
|
|
|
default: return Create<IdentifierToken>(TextSpan(start, _position), Identifier(str.data(), offset, hash));
|
2020-10-04 16:30:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) {
|
|
|
|
if (c >= 'a' && c <= 'z') {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (c >= 'A' && c <= 'Z') {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (c >= '0' && c <= '9') {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (c == '_') {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2020-10-04 14:33:12 +00:00
|
|
|
}
|