Initial commit, support for lexing symbols and numericals.

2020-10-04 16:33:12 +02:00
commit e0c52f4ae7
13 changed files with 6895 additions and 0 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -0,0 +1,136 @@
+# ClangFormatConfigureSource: 'clang-format-file:///home/nathan/Projects/PokemonLibraries/PkmnLib/.clang-format'
+---
+Language:        Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveMacros: false
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: MultiLine
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: Never
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: false
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeBlocks:   Merge
+IncludeCategories:
+  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+    Priority:        2
+    SortPriority:    0
+  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
+    Priority:        1
+    SortPriority:    0
+  - Regex:           '.*'
+    Priority:        3
+    SortPriority:    0
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentCaseLabels: true
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: All
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+Standard:        c++20
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+  - Try
+TabWidth:        8
+UseCRLF:         false
+UseTab:          Never
+...
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -0,0 +1,11 @@
+Checks: 'readability-*,clang-diagnostic-*,clang-analyzer-*,-clang-analyzer-alpha*,performance-*,cppcoreguidelines-*,
+bugprone-*,modernize-*,-modernize-use-trailing-return-type'
+HeaderFilterRegex: ''
+AnalyzeTemporaryDtors: false
+CheckOptions:
+  - key: readability-identifier-naming.ClassCase
+    value: CamelCase
+  - key: readability-identifier-naming.PrivateMemberCase
+    value: camelBack
+  - key: readability-identifier-naming.PrivateMemberPrefix
+    value: '_'
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
+/cmake-build-debug/
+/cmake-build-release/
+/build-release-windows/
+/.idea/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 3.17)
+project(ElohimScript)
+
+# Enable all warnings, and make them error when occurring.
+add_compile_options(-Wall -Wextra -Werror)
+# We like new stuff, so set the c++ standard to c++20.
+set(CMAKE_CXX_STANDARD 20)
+
+option(TESTS "Whether the test executable should be build as well." OFF)
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    add_link_options(-fuse-ld=lld)
+endif ()
+
+if (NOT WINDOWS)
+    # Include debug symbols in all linux builds
+    add_compile_options(-g -gfull -g3)
+endif ()
+
+file(GLOB_RECURSE SRC_FILES "src/*.cpp" "src/*.hpp")
+add_library(ElohimScript SHARED ${SRC_FILES})
+
+if (TESTS)
+    # Create Test executable
+    file(GLOB_RECURSE TEST_FILES "tests/*.cpp" "tests/*.hpp")
+    add_executable(ElohimScriptTests ${TEST_FILES} extern/doctest.hpp)
+    target_link_libraries(ElohimScriptTests PUBLIC ElohimScript)
+
+    # Add a definition for the test library
+    target_compile_definitions(ElohimScriptTests PRIVATE TESTS_BUILD)
+endif ()
+
+
+
--- a/extern/doctest.hpp
+++ b/extern/doctest.hpp
--- a/src/Parser/Lexer/LexToken.hpp
+++ b/src/Parser/Lexer/LexToken.hpp
@@ -0,0 +1,42 @@
+#ifndef ELOHIMSCRIPT_LEXTOKEN_HPP
+#define ELOHIMSCRIPT_LEXTOKEN_HPP
+
+#include <memory>
+#include "LexTokenKind.hpp"
+
+namespace ElohimScript::Parser {
+    class LexToken {
+        friend class Lexer;
+
+        std::unique_ptr<const LexToken> _next;
+
+    public:
+        virtual ~LexToken() = default;
+        [[nodiscard]] virtual LexTokenKind GetKind() const noexcept = 0;
+        [[nodiscard]] const std::unique_ptr<const LexToken>& GetNext() const noexcept { return _next; }
+    };
+
+    template <LexTokenKind kind> class LexTokenImpl : public LexToken {
+    public:
+        LexTokenImpl() = default;
+        [[nodiscard]] LexTokenKind GetKind() const noexcept override { return kind; }
+    };
+
+    class IntegerToken : public LexTokenImpl<LexTokenKind::IntegerToken> {
+        uint64_t _value;
+
+    public:
+        IntegerToken(uint64_t value) : _value(value) {}
+        [[nodiscard]] uint64_t GetValue() const noexcept { return _value; }
+    };
+
+    class FloatToken : public LexTokenImpl<LexTokenKind::FloatToken> {
+        double _value;
+
+    public:
+        FloatToken(double value) : _value(value) {}
+        [[nodiscard]] double GetValue() const noexcept { return _value; }
+    };
+}
+
+#endif // ELOHIMSCRIPT_LEXTOKEN_HPP
--- a/src/Parser/Lexer/LexTokenKind.hpp
+++ b/src/Parser/Lexer/LexTokenKind.hpp
@@ -0,0 +1,71 @@
+#ifndef ELOHIMSCRIPT_LEXTOKENKIND_HPP
+#define ELOHIMSCRIPT_LEXTOKENKIND_HPP
+
+#include <cstdint>
+namespace ElohimScript::Parser {
+    enum class LexTokenKind : uint8_t {
+        Unknown,
+        EndOfFile,
+        Whitespace,
+
+        // Symbols
+        StarSymbol,
+        StarStarSymbol,
+        SlashSymbol,
+        PercentSymbol,
+        PlusSymbol,
+        MinusSymbol,
+        LessThanEqualsSymbol,
+        LessThanSymbol,
+        GreaterThanEqualsSymbol,
+        GreaterThanSymbol,
+        OpenParenthesisSymbol,
+        CloseParenthesisSymbol,
+        EqualsEqualsSymbol,
+        ExclamationMarkEqualsSymbol,
+        QuestionMarkSymbol,
+        ColonSymbol,
+        EqualsSymbol,
+        PlusEqualsSymbol,
+        MinusEqualsSymbol,
+        StarEqualsSymbol,
+        SlashEqualsSymbol,
+        PercentEqualsSymbol,
+        StarStarEqualsSymbol,
+        PlusPlusSymbol,
+        MinusMinusSymbol,
+        AmpersandSymbol,
+        CommaSymbol,
+        OpenCurlyParenthesisSymbol,
+        CloseCurlyParenthesisSymbol,
+        SemicolonSymbol,
+        VerticalLineSymbol,
+        CaretSymbol,
+        TildeSymbol,
+        LessThanLessThanSymbol,
+        GreaterThanGreaterThanSymbol,
+        GreaterThanGreaterThanGreaterThanSymbol,
+        AmpersandEqualsSymbol,
+        VerticalLineEqualsSymbol,
+        CaretEqualsSymbol,
+        LessThanLessThanEqualsSymbol,
+        GreaterThanGreaterThanEqualsSymbol,
+        GreaterThanGreaterThanGreaterThanEqualsSymbol,
+        DotSymbol,
+        AmpersandAmpersandSymbol,
+        VerticalLineVerticalLineSymbol,
+        ExclamationMarkSymbol,
+        OpenBlockParenthesisSymbol,
+        CloseBlockParenthesisSymbol,
+        CaretCaretSymbol,
+        AtSymbol,
+        ExclamationMarkIsSymbol,
+        ColonColonSymbol,
+
+        // Misc
+        FloatToken,
+        IntegerToken,
+    };
+}
+
+#endif // ELOHIMSCRIPT_LEXTOKENKIND_HPP
--- a/src/Parser/Lexer/Lexer.cpp
+++ b/src/Parser/Lexer/Lexer.cpp
@@ -0,0 +1,363 @@
+#include "Lexer.hpp"
+#include <cmath>
+#include <stdexcept>
+#include "NumericalLexers.hpp"
+
+namespace ElohimScript::Parser {
+    const LexToken* Lexer::Lex() {
+        auto* first = LexNext();
+        if (first->GetKind() == LexTokenKind::EndOfFile) {
+            return first;
+        }
+        auto* last = first;
+        while (true) {
+            auto* next = LexNext();
+            last->_next = std::unique_ptr<const LexToken>(next);
+            last = next;
+            if (next->GetKind() == LexTokenKind::EndOfFile) {
+                break;
+            }
+        }
+        return first;
+    }
+
+    LexToken* Lexer::LexNext() {
+        auto c = Consume();
+        switch (c) {
+            case u8'\0': return new LexTokenImpl<LexTokenKind::EndOfFile>();
+            case u8'*': {
+                auto n = Peek();
+                if (n == u8'*') {
+                    Progress();
+                    n = Peek();
+                    if (n == u8'=') {
+                        Progress();
+                        return new LexTokenImpl<LexTokenKind::StarStarEqualsSymbol>();
+                    }
+                    return new LexTokenImpl<LexTokenKind::StarStarSymbol>();
+                }
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::StarEqualsSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::StarSymbol>();
+            }
+            case u8'/':
+                if (Peek() == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::SlashEqualsSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::SlashSymbol>();
+            case u8'%':
+                if (Peek() == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::PercentEqualsSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::PercentSymbol>();
+            case u8'+': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::PlusEqualsSymbol>();
+                }
+                if (n == u8'+') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::PlusPlusSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::PlusSymbol>();
+            }
+            case u8'-': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::MinusEqualsSymbol>();
+                }
+                if (n == u8'-') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::MinusMinusSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::MinusSymbol>();
+            }
+            case u8'<': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::LessThanEqualsSymbol>();
+                }
+                if (n == u8'<') {
+                    Progress();
+                    if (Peek() == u8'=') {
+                        Progress();
+                        return new LexTokenImpl<LexTokenKind::LessThanLessThanEqualsSymbol>();
+                    }
+                    return new LexTokenImpl<LexTokenKind::LessThanLessThanSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::LessThanSymbol>();
+            }
+            case u8'>': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::GreaterThanEqualsSymbol>();
+                }
+                if (n == u8'>') {
+                    Progress();
+                    n = Peek();
+                    if (n == u8'=') {
+                        Progress();
+                        return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanEqualsSymbol>();
+                    }
+                    if (n == u8'>') {
+                        Progress();
+                        if (Peek() == u8'=') {
+                            Progress();
+                            return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol>();
+                        }
+                        return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol>();
+                    }
+                    return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::GreaterThanSymbol>();
+            }
+            case u8'(': return new LexTokenImpl<LexTokenKind::OpenParenthesisSymbol>();
+            case u8')': return new LexTokenImpl<LexTokenKind::CloseParenthesisSymbol>();
+            case u8'=': {
+                if (Peek() == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::EqualsEqualsSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::EqualsSymbol>();
+            }
+            case u8'!': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::ExclamationMarkEqualsSymbol>();
+                }
+                if (n == u8'i' && Peek(2) == u8's') {
+                    Progress(2);
+                    return new LexTokenImpl<LexTokenKind::ExclamationMarkIsSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::ExclamationMarkSymbol>();
+            }
+            case u8'?': return new LexTokenImpl<LexTokenKind::QuestionMarkSymbol>();
+            case u8':': {
+                if (Peek() == u8':') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::ColonColonSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::ColonSymbol>();
+            }
+            case u8'&': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::AmpersandEqualsSymbol>();
+                }
+                if (n == u8'&') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::AmpersandAmpersandSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::AmpersandSymbol>();
+            }
+            case u8',': return new LexTokenImpl<LexTokenKind::CommaSymbol>();
+            case u8'{': return new LexTokenImpl<LexTokenKind::OpenCurlyParenthesisSymbol>();
+            case u8'}': return new LexTokenImpl<LexTokenKind::CloseCurlyParenthesisSymbol>();
+            case u8';': return new LexTokenImpl<LexTokenKind::SemicolonSymbol>();
+            case u8'|': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::VerticalLineEqualsSymbol>();
+                }
+                if (n == u8'|') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::VerticalLineVerticalLineSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::VerticalLineSymbol>();
+            }
+            case u8'^': {
+                auto n = Peek();
+                if (n == u8'=') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::CaretEqualsSymbol>();
+                }
+                if (n == u8'^') {
+                    Progress();
+                    return new LexTokenImpl<LexTokenKind::CaretCaretSymbol>();
+                }
+                return new LexTokenImpl<LexTokenKind::CaretSymbol>();
+            }
+            case u8'~': return new LexTokenImpl<LexTokenKind::TildeSymbol>();
+            case u8'.': return new LexTokenImpl<LexTokenKind::DotSymbol>();
+            case u8'[': return new LexTokenImpl<LexTokenKind::OpenBlockParenthesisSymbol>();
+            case u8']': return new LexTokenImpl<LexTokenKind::CloseBlockParenthesisSymbol>();
+            case u8'@': return new LexTokenImpl<LexTokenKind::AtSymbol>();
+
+            case u8' ':
+            case u8'\r':
+            case u8'\n':
+            case u8'\t': return new LexTokenImpl<LexTokenKind::Whitespace>();
+            // Byte order mark
+            case u8'\xEF': {
+                if (Peek() == u8'\xBB' && Peek(2) == u8'\xBF') {
+                    Progress(2);
+                    return new LexTokenImpl<LexTokenKind::Whitespace>();
+                }
+            }
+            case u8'0':
+            case u8'1':
+            case u8'2':
+            case u8'3':
+            case u8'4':
+            case u8'5':
+            case u8'6':
+            case u8'7':
+            case u8'8':
+            case u8'9': return LexNumerical(c);
+
+            default: return new LexTokenImpl<LexTokenKind::Unknown>();
+        }
+    }
+
+    LexToken* Lexer::LexNumerical(char8_t c) {
+        auto initialValue = LexDecimalValue(c);
+        auto numericalSystem = 10; // Default to decimal system.
+        if (initialValue == 0) {
+            auto secondChar = Peek();
+            auto secondValue = LexDecimalValue(secondChar);
+            if (secondChar != '.' && secondValue == 255) {
+                Progress();
+                switch (secondChar) {
+                    case 'x': numericalSystem = 16; break;
+                    case 'd': numericalSystem = 10; break;
+                    case 'o':
+                        numericalSystem = 8;
+                        break;
+                        ;
+                    case 'b': numericalSystem = 2; break;
+                    default:
+                        // TODO: Log Invalid numerical system
+                        break;
+                }
+            }
+        }
+        switch (numericalSystem) {
+            case 10: return LexDecimal(initialValue);
+            case 16: return LexHexadecimal();
+            case 8: return LexOctal();
+            case 2: return LexBinary();
+            default: throw std::logic_error("Not implemented");
+        }
+    }
+
+    constexpr int64_t quick_pow10(int n) {
+        constexpr int64_t pow10[20] = {1,
+                                       10,
+                                       100,
+                                       1000,
+                                       10000,
+                                       100000,
+                                       1000000,
+                                       10000000,
+                                       100000000,
+                                       1000000000,
+                                       10000000000,
+                                       100000000000,
+                                       1000000000000,
+                                       10000000000000,
+                                       10000000000000,
+                                       100000000000000,
+                                       1000000000000000,
+                                       10000000000000000,
+                                       100000000000000000,
+                                       1000000000000000000};
+        return pow10[n];
+    }
+
+    LexToken* Lexer::LexDecimal(uint64_t initial) {
+        uint64_t value = initial;
+        uint64_t decimalValue = 0;
+        uint64_t exponentValue = 0;
+        uint8_t decimalLength = 0;
+        bool isDecimal = false;
+        bool isExponent = false;
+        while (true) {
+            auto v = (uint64_t)LexDecimalValue(Peek());
+            if (v == 255) {
+                if (!isDecimal && Peek() == '.') {
+                    isDecimal = true;
+                    Progress();
+                    continue;
+                }
+                if (isDecimal && (Peek() == 'e' || Peek() == 'E')) {
+                    isDecimal = false;
+                    isExponent = true;
+                    Progress();
+                    continue;
+                }
+                break;
+            }
+            Progress();
+            if (isDecimal) {
+                decimalValue *= 10;
+                decimalValue += v;
+                decimalLength++;
+            } else if (isExponent) {
+                exponentValue *= 10;
+                exponentValue += v;
+            } else {
+                value *= 10;
+                value += v;
+            }
+        }
+        if (isDecimal || isExponent) {
+            auto val = value + ((double)decimalValue / quick_pow10(decimalLength));
+            if (isExponent) {
+                val *= pow(10, exponentValue);
+            }
+            return new FloatToken(val);
+        }
+        return new IntegerToken(value);
+    }
+
+    IntegerToken* Lexer::LexHexadecimal() {
+        uint64_t value = 0;
+        while (true) {
+            auto v = LexHexadecimalValue(Peek());
+            if (v == 255) {
+                break;
+            }
+            Progress();
+            value <<= 4;
+            value += v;
+        }
+        return new IntegerToken(value);
+    }
+    IntegerToken* Lexer::LexOctal() {
+        uint64_t value = 0;
+        while (true) {
+            auto v = LexOctalValue(Peek());
+            if (v == 255) {
+                break;
+            }
+            Progress();
+            value <<= 3;
+            value += v;
+        }
+        return new IntegerToken(value);
+    }
+    IntegerToken* Lexer::LexBinary() {
+        uint64_t value = 0;
+        while (true) {
+            auto v = LexBinaryValue(Peek());
+            if (v == 255) {
+                break;
+            }
+            Progress();
+            value <<= 1;
+            value += v;
+        }
+        return new IntegerToken(value);
+    }
+}
--- a/src/Parser/Lexer/Lexer.hpp
+++ b/src/Parser/Lexer/Lexer.hpp
@@ -0,0 +1,46 @@
+#ifndef ELOHIMSCRIPT_LEXER_HPP
+#define ELOHIMSCRIPT_LEXER_HPP
+
+#include <string_view>
+#include "LexToken.hpp"
+
+namespace ElohimScript::Parser {
+    class Lexer {
+    public:
+        Lexer(const char* script) : _script(reinterpret_cast<const char8_t*>(script)) {}
+        Lexer(std::u8string_view script) : _script(script) {}
+        const LexToken* Lex();
+
+    private:
+        std::u8string_view _script;
+        size_t _position = -1;
+
+        inline char8_t Consume() {
+            if (++_position >= _script.size()) {
+                return '\0';
+            }
+            return _script[_position];
+        }
+
+        inline void Progress(size_t steps = 1){
+            _position += steps;
+        }
+
+        inline char8_t Peek(size_t offset = 1) {
+            auto pos = _position + offset;
+            if (pos >= _script.size()) {
+                return '\0';
+            }
+            return _script[pos];
+        }
+
+        LexToken* LexNext();
+        LexToken* LexNumerical(char8_t);
+        LexToken* LexDecimal(uint64_t initial);
+        IntegerToken* LexHexadecimal();
+        IntegerToken* LexOctal();
+        IntegerToken* LexBinary();
+    };
+}
+
+#endif // ELOHIMSCRIPT_LEXER_HPP
--- a/src/Parser/Lexer/NumericalLexers.cpp
+++ b/src/Parser/Lexer/NumericalLexers.cpp
@@ -0,0 +1,67 @@
+#include <cstdint>
+#include "NumericalLexers.hpp"
+
+
+uint8_t LexDecimalValue(char8_t c) {
+    switch (c) {
+        case u8'0': return 0;
+        case u8'1': return 1;
+        case u8'2': return 2;
+        case u8'3': return 3;
+        case u8'4': return 4;
+        case u8'5': return 5;
+        case u8'6': return 6;
+        case u8'7': return 7;
+        case u8'8': return 8;
+        case u8'9': return 9;
+        default: return 255;
+    }
+}
+
+uint8_t LexHexadecimalValue(char8_t c) {
+    switch (c) {
+        case u8'0': return 0;
+        case u8'1': return 1;
+        case u8'2': return 2;
+        case u8'3': return 3;
+        case u8'4': return 4;
+        case u8'5': return 5;
+        case u8'6': return 6;
+        case u8'7': return 7;
+        case u8'8': return 8;
+        case u8'9': return 9;
+        case u8'a':
+        case u8'A': return 10;
+        case u8'b':
+        case u8'B': return 11;
+        case u8'c':
+        case u8'C': return 12;
+        case u8'd':
+        case u8'D': return 13;
+        case u8'e':
+        case u8'E': return 14;
+        case u8'f':
+        case u8'F': return 15;
+        default: return 255;
+    }
+}
+uint8_t LexOctalValue(char8_t c) {
+    switch (c) {
+        case u8'0': return 0;
+        case u8'1': return 1;
+        case u8'2': return 2;
+        case u8'3': return 3;
+        case u8'4': return 4;
+        case u8'5': return 5;
+        case u8'6': return 6;
+        case u8'7': return 7;
+        default: return 255;
+    }
+}
+uint8_t LexBinaryValue(char8_t c) {
+    switch (c) {
+        case u8'0': return 0;
+        case u8'1': return 1;
+        default: return 255;
+    }
+}
--- a/src/Parser/Lexer/NumericalLexers.hpp
+++ b/src/Parser/Lexer/NumericalLexers.hpp
@@ -0,0 +1,11 @@
+#ifndef ELOHIMSCRIPT_NUMERICALLEXERS_HPP
+#define ELOHIMSCRIPT_NUMERICALLEXERS_HPP
+
+#include <cstdint>
+
+uint8_t LexDecimalValue(char8_t c);
+uint8_t LexHexadecimalValue(char8_t c);
+uint8_t LexOctalValue(char8_t c);
+uint8_t LexBinaryValue(char8_t c);
+
+#endif // ELOHIMSCRIPT_NUMERICALLEXERS_HPP
--- a/tests/LexerTests/NumericalLexTests.cpp
+++ b/tests/LexerTests/NumericalLexTests.cpp
@@ -0,0 +1,64 @@
+#include "../../extern/doctest.hpp"
+#include "../../src/Parser/Lexer/Lexer.hpp"
+
+using namespace ElohimScript::Parser;
+
+#define INTEGER_TEST(script, expected)                                                                                 \
+    TEST_CASE("Lex " script) {                                                                                         \
+        auto lexer = Lexer(script);                                                                                    \
+        const auto* token = lexer.Lex();                                                                               \
+        REQUIRE(token->GetKind() == LexTokenKind::IntegerToken);                                                       \
+        auto value = ((const IntegerToken*)token)->GetValue();                                                         \
+        CHECK(value == (expected));                                                                                    \
+        CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile);                                                 \
+        delete token;                                                                                                  \
+    }
+
+#define FLOAT_TEST(script, expected)                                                                                 \
+    TEST_CASE("Lex " script) {                                                                                         \
+        auto lexer = Lexer(script);                                                                                    \
+        const auto* token = lexer.Lex();                                                                               \
+        REQUIRE(token->GetKind() == LexTokenKind::FloatToken);                                                       \
+        auto value = ((const FloatToken*)token)->GetValue();                                                         \
+        CHECK(value == (expected));                                                                                    \
+        CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile);                                                 \
+        delete token;                                                                                                  \
+    }
+
+
+// Decimal lexing
+INTEGER_TEST("123456", 123456);
+INTEGER_TEST("0d123456", 123456);
+INTEGER_TEST("50000000000", 50000000000);
+
+// Decimal float lexing
+FLOAT_TEST("123.456", 123.456);
+FLOAT_TEST("0.456", 0.456);
+FLOAT_TEST("0.456e12", 0.456e12);
+FLOAT_TEST("0.456E12", 0.456E12);
+
+// Hexadecimal lexing
+INTEGER_TEST("0x0", 0);
+INTEGER_TEST("0xF", 15);
+INTEGER_TEST("0xf", 15);
+INTEGER_TEST("0xFF", 255);
+INTEGER_TEST("0xfF", 255);
+INTEGER_TEST("0xFFF", 4095);
+INTEGER_TEST("0xFFFF", 65535);
+INTEGER_TEST("0xFFFFF", 1048575);
+INTEGER_TEST("0xFFFFFF", 16777215);
+
+// Octal lexing
+INTEGER_TEST("0o0", 0);
+INTEGER_TEST("0o7", 7);
+INTEGER_TEST("0o77", 63);
+INTEGER_TEST("0o777", 511);
+INTEGER_TEST("0o7777", 4095);
+
+// Binary lexing
+INTEGER_TEST("0b0", 0);
+INTEGER_TEST("0b1", 1);
+INTEGER_TEST("0b11", 3);
+INTEGER_TEST("0b111", 7);
+INTEGER_TEST("0b1111", 15);
+INTEGER_TEST("0b110011", 51);
--- a/tests/LexerTests/SymbolLexTests.cpp
+++ b/tests/LexerTests/SymbolLexTests.cpp
@@ -0,0 +1,81 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include "../../extern/doctest.hpp"
+#include "../../src/Parser/Lexer/Lexer.hpp"
+
+using namespace ElohimScript::Parser;
+
+#define SYMBOL_TEST(script, symbol)                                                                                    \
+    TEST_CASE("Lex " script) {                                                                                         \
+        auto lexer = Lexer(script);                                                                                    \
+        const auto* token = lexer.Lex();                                                                               \
+        CHECK(token->GetKind() == LexTokenKind::symbol);                                                               \
+        CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile);                                                 \
+        delete token;                                                                                                  \
+    }
+
+SYMBOL_TEST("*", StarSymbol)
+SYMBOL_TEST("**", StarStarSymbol)
+SYMBOL_TEST("*=", StarEqualsSymbol)
+SYMBOL_TEST("**=", StarStarEqualsSymbol)
+SYMBOL_TEST("/", SlashSymbol)
+SYMBOL_TEST("/=", SlashEqualsSymbol)
+SYMBOL_TEST("%", PercentSymbol)
+SYMBOL_TEST("%=", PercentEqualsSymbol)
+SYMBOL_TEST("+", PlusSymbol)
+SYMBOL_TEST("+=", PlusEqualsSymbol)
+SYMBOL_TEST("++", PlusPlusSymbol)
+SYMBOL_TEST("-", MinusSymbol)
+SYMBOL_TEST("-=", MinusEqualsSymbol)
+SYMBOL_TEST("--", MinusMinusSymbol)
+SYMBOL_TEST("<", LessThanSymbol)
+SYMBOL_TEST("<=", LessThanEqualsSymbol)
+SYMBOL_TEST("<<", LessThanLessThanSymbol)
+SYMBOL_TEST("<<=", LessThanLessThanEqualsSymbol)
+SYMBOL_TEST(">", GreaterThanSymbol)
+SYMBOL_TEST(">=", GreaterThanEqualsSymbol)
+SYMBOL_TEST(">>", GreaterThanGreaterThanSymbol)
+SYMBOL_TEST(">>=", GreaterThanGreaterThanEqualsSymbol)
+SYMBOL_TEST(">>>", GreaterThanGreaterThanGreaterThanSymbol)
+SYMBOL_TEST(">>>=", GreaterThanGreaterThanGreaterThanEqualsSymbol)
+SYMBOL_TEST("(", OpenParenthesisSymbol)
+SYMBOL_TEST(")", CloseParenthesisSymbol)
+SYMBOL_TEST("=", EqualsSymbol)
+SYMBOL_TEST("==", EqualsEqualsSymbol)
+SYMBOL_TEST("!", ExclamationMarkSymbol)
+SYMBOL_TEST("!=", ExclamationMarkEqualsSymbol)
+SYMBOL_TEST("!is", ExclamationMarkIsSymbol)
+SYMBOL_TEST("?", QuestionMarkSymbol)
+SYMBOL_TEST(":", ColonSymbol)
+SYMBOL_TEST("::", ColonColonSymbol)
+SYMBOL_TEST("&", AmpersandSymbol)
+SYMBOL_TEST("&=", AmpersandEqualsSymbol)
+SYMBOL_TEST("&&", AmpersandAmpersandSymbol)
+SYMBOL_TEST(",", CommaSymbol)
+SYMBOL_TEST("{", OpenCurlyParenthesisSymbol)
+SYMBOL_TEST("}", CloseCurlyParenthesisSymbol)
+SYMBOL_TEST(";", SemicolonSymbol)
+SYMBOL_TEST("|", VerticalLineSymbol)
+SYMBOL_TEST("|=", VerticalLineEqualsSymbol)
+SYMBOL_TEST("||", VerticalLineVerticalLineSymbol)
+SYMBOL_TEST("^", CaretSymbol)
+SYMBOL_TEST("^=", CaretEqualsSymbol)
+SYMBOL_TEST("^^", CaretCaretSymbol)
+SYMBOL_TEST("~", TildeSymbol)
+SYMBOL_TEST(".", DotSymbol)
+SYMBOL_TEST("[", OpenBlockParenthesisSymbol)
+SYMBOL_TEST("]", CloseBlockParenthesisSymbol)
+SYMBOL_TEST("@", AtSymbol)
+SYMBOL_TEST(" ", Whitespace)
+
+#undef SYMBOL_TEST
+
+TEST_CASE("Lex whitespace") {
+    auto whitespace = {" ", "\t", "\n", "\r", "\xef\xbb\xbf"};
+    for (auto v : whitespace) {
+        auto lexer = Lexer(v);
+        const auto* token = lexer.Lex();
+        CHECK(token->GetKind() == LexTokenKind::Whitespace);
+        CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile);
+        delete token;
+    }
+}