From 22e450e7e740d79f035f66e0c54d622925ae4c1d Mon Sep 17 00:00:00 2001
From: Deukhoofd <Deukhoofd@gmail.com>
Date: Sat, 18 May 2019 20:35:51 +0200
Subject: [PATCH] Initial commit, adds very basic Lexing

---
 .gitignore                           |  72 +++++++++++
 .gitmodules                          |   3 +
 .idea/PorygonLang.iml                |   2 +
 .idea/codeStyles/codeStyleConfig.xml |   5 +
 .idea/misc.xml                       |  15 +++
 .idea/modules.xml                    |   8 ++
 .idea/vcs.xml                        |   7 ++
 CMakeLists.txt                       |  13 ++
 extern/Catch2                        |   1 +
 library.cpp                          |  18 +++
 library.h                            |   6 +
 src/Parser/Lexer.cpp                 | 173 +++++++++++++++++++++++++++
 src/Parser/Lexer.hpp                 |  26 ++++
 src/Parser/Token.hpp                 |  37 ++++++
 src/Parser/TokenKind.hpp             |  12 ++
 src/main.cpp                         |   3 +
 src/main.h++                         |  10 ++
 17 files changed, 411 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .gitmodules
 create mode 100755 .idea/PorygonLang.iml
 create mode 100644 .idea/codeStyles/codeStyleConfig.xml
 create mode 100755 .idea/misc.xml
 create mode 100755 .idea/modules.xml
 create mode 100644 .idea/vcs.xml
 create mode 100755 CMakeLists.txt
 create mode 160000 extern/Catch2
 create mode 100755 library.cpp
 create mode 100755 library.h
 create mode 100644 src/Parser/Lexer.cpp
 create mode 100644 src/Parser/Lexer.hpp
 create mode 100644 src/Parser/Token.hpp
 create mode 100644 src/Parser/TokenKind.hpp
 create mode 100644 src/main.cpp
 create mode 100644 src/main.h++
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e81e6c1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,72 @@
+ 
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
+
+.directory
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..1b392af
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "extern/Catch2"]
+	path = extern/Catch2
+	url = git@github.com:catchorg/Catch2.git
diff --git a/.idea/PorygonLang.iml b/.idea/PorygonLang.iml
new file mode 100755
index 0000000..f08604b
--- /dev/null
+++ b/.idea/PorygonLang.iml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module classpath="CMake" type="CPP_MODULE" version="4" />
\ No newline at end of file
diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml
new file mode 100644
index 0000000..a55e7a1
--- /dev/null
+++ b/.idea/codeStyles/codeStyleConfig.xml
@@ -0,0 +1,5 @@
+<component name="ProjectCodeStyleConfiguration">
+  <state>
+    <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
+  </state>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100755
index 0000000..d959e79
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
+  <component name="CidrRootsConfiguration">
+    <sourceRoots>
+      <file path="$PROJECT_DIR$/src" />
+    </sourceRoots>
+    <libraryRoots>
+      <file path="$PROJECT_DIR$/extern" />
+    </libraryRoots>
+  </component>
+  <component name="JavaScriptSettings">
+    <option name="languageLevel" value="ES6" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100755
index 0000000..77e9569
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PorygonLang.iml" filepath="$PROJECT_DIR$/.idea/PorygonLang.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..b27de7c
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/extern/Catch2" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100755
index 0000000..69d95ff
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,13 @@
+cmake_minimum_required(VERSION 3.13)
+project(PorygonLang)
+
+set(CMAKE_CXX_STANDARD 17)
+
+add_subdirectory(extern/Catch2)
+
+add_library(PorygonLang library.cpp library.h src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp)
+add_executable(PorygonLangTests library.cpp library.h src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp)
+target_link_libraries(PorygonLangTests Catch2::Catch2)
+target_compile_definitions(PorygonLangTests PRIVATE TESTS_BUILD)
+
+include(CTest)
diff --git a/extern/Catch2 b/extern/Catch2
new file mode 160000
index 0000000..e680c4b
--- /dev/null
+++ b/extern/Catch2
@@ -0,0 +1 @@
+Subproject commit e680c4b9fb1d699bfad239e42ce7643d7cf00371
diff --git a/library.cpp b/library.cpp
new file mode 100755
index 0000000..afd96d6
--- /dev/null
+++ b/library.cpp
@@ -0,0 +1,18 @@
+#include "library.h"
+#include <iostream>
+#define CATCH_CONFIG_MAIN
+
+unsigned int Factorial( unsigned int number ) {
+    return number <= 1 ? number : Factorial(number-1)*number;
+}
+
+#ifdef TESTS_BUILD
+#include <catch2/catch.hpp>
+
+TEST_CASE( "Factorials are computed", "[factorial]" ) {
+    REQUIRE( Factorial(1) == 1 );
+    REQUIRE( Factorial(2) == 2 );
+    REQUIRE( Factorial(3) == 6 );
+    REQUIRE( Factorial(10) == 3628800 );
+}
+#endif
\ No newline at end of file
diff --git a/library.h b/library.h
new file mode 100755
index 0000000..74856d7
--- /dev/null
+++ b/library.h
@@ -0,0 +1,6 @@
+#ifndef PORYGONLANG_LIBRARY_H
+#define PORYGONLANG_LIBRARY_H
+
+int theAnswer();
+
+#endif //PORYGONLANG_LIBRARY_H
\ No newline at end of file
diff --git a/src/Parser/Lexer.cpp b/src/Parser/Lexer.cpp
new file mode 100644
index 0000000..307426d
--- /dev/null
+++ b/src/Parser/Lexer.cpp
@@ -0,0 +1,173 @@
+#include <utility>
+#include <cmath>
+
+#include "Lexer.hpp"
+
+Lexer::Lexer(string script) {
+    Lexer::Script = std::move(script);
+    Lexer::Position = 0;
+}
+
+vector<IToken*> Lexer::Lex() {
+    vector<IToken*> tokens;
+    while (true){
+        IToken* next = Lexer::LexNext(Lexer::Next());
+        tokens.push_back(next);
+        if (next->GetKind() == TokenKind::EndOfFile)
+            break;
+    }
+    return tokens;
+}
+
+char Lexer::Peek(){
+    if (Lexer::Position > Lexer::Script.length())
+        return '\0';
+    return Lexer::Script[Lexer::Position];
+}
+
+char Lexer::Next(){
+    char next = Peek();
+    Lexer::Position++;
+    return next;
+}
+
+IToken* Lexer::LexNext(char c){
+    switch (c) {
+        case '\0':
+            return new SimpleToken(TokenKind::EndOfFile);
+        case ' ':
+        case '\t':
+        case '\n':
+        case '\r':
+            return new SimpleToken(TokenKind::WhiteSpace);
+        case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+            return LexNumber(c);
+        default:
+            throw;
+    }
+}
+
+int CharToInt(char c){
+    switch (c){
+        case '0': return 0;
+        case '1': return 1;
+        case '2': return 2;
+        case '3': return 3;
+        case '4': return 4;
+        case '5': return 5;
+        case '6': return 6;
+        case '7': return 7;
+        case '8': return 8;
+        case '9': return 9;
+        default: return -1;
+    }
+}
+
+IToken* Lexer::LexNumber(char c){
+    long int_value = CharToInt(c);
+    double float_value = 0;
+    short decimal_index = 0;
+    bool has_point = false;
+    bool is_searching = true;
+    while (is_searching){
+        char next = Lexer::Next();
+        int next_val = CharToInt(next);
+        if (next_val == -1){
+            switch (next){
+                case '_': continue;
+                case '.':
+                    has_point = true;
+                    decimal_index = 0;
+                    float_value = int_value;
+                    continue;
+                default:
+                    is_searching = false;
+                    continue;
+            }
+        }
+        else{
+            if (has_point){
+                decimal_index++;
+                float_value += next_val / pow(10, decimal_index);
+            }
+            else {
+                int_value *= 10;
+                int_value += next_val;
+            }
+        }
+    }
+    if (has_point){
+        throw;
+    }
+    else{
+        return new IntegerToken(int_value);
+    }
+}
+
+
+#ifdef TESTS_BUILD
+#include <catch2/catch.hpp>
+
+TEST_CASE( "When at end of script return terminator", "[lexer]" ) {
+    Lexer lexer = Lexer("");
+    REQUIRE(lexer.Peek() == '\0');
+}
+
+TEST_CASE( "Peek doesn't advance", "[lexer]" ) {
+    Lexer lexer = Lexer("5 + 5");
+    REQUIRE(lexer.Peek() == '5');
+    REQUIRE(lexer.Peek() == '5');
+    REQUIRE(lexer.Peek() == '5');
+}
+
+TEST_CASE( "Next does advance", "[lexer]" ) {
+    Lexer lexer = Lexer("5 + 5");
+    REQUIRE(lexer.Next() == '5');
+    REQUIRE(lexer.Next() == ' ');
+    REQUIRE(lexer.Next() == '+');
+    REQUIRE(lexer.Next() == ' ');
+    REQUIRE(lexer.Next() == '5');
+    REQUIRE(lexer.Next() == '\0');
+}
+
+TEST_CASE( "Lex Null Terminator as EOF", "[lexer]" ) {
+    Lexer lexer = Lexer("");
+    REQUIRE(lexer.LexNext('\0') -> GetKind() == TokenKind::EndOfFile);
+}
+
+TEST_CASE( "Lex Whitespace", "[lexer]" ) {
+    Lexer lexer = Lexer("");
+    CHECK(lexer.LexNext(' ')  -> GetKind() == TokenKind::WhiteSpace);
+    CHECK(lexer.LexNext('\t') -> GetKind() == TokenKind::WhiteSpace);
+    CHECK(lexer.LexNext('\n') -> GetKind() == TokenKind::WhiteSpace);
+    CHECK(lexer.LexNext('\r') -> GetKind() == TokenKind::WhiteSpace);
+}
+
+TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
+    Lexer lexer = Lexer("");
+    CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('2') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('3') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('4') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('5') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('6') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('7') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('8') -> GetKind() == TokenKind::Integer);
+    CHECK(lexer.LexNext('9') -> GetKind() == TokenKind::Integer);
+}
+
+TEST_CASE( "Lex Longer Integers", "[lexer]" ) {
+    long integers[] {0,1,5,9,10,50,100,1000,99999,6484,62163,48862};
+    for (int integer : integers){
+        Lexer lexer = Lexer(std::to_string(integer));
+        auto tokens = lexer.Lex();
+        REQUIRE(tokens.size() == 2);
+        IToken* firstToken = tokens[0];
+        REQUIRE(firstToken -> GetKind() == TokenKind::Integer);
+        auto* integerToken = (IntegerToken *)firstToken;
+        CHECK(integerToken -> Value == integer);
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/src/Parser/Lexer.hpp b/src/Parser/Lexer.hpp
new file mode 100644
index 0000000..75d993e
--- /dev/null
+++ b/src/Parser/Lexer.hpp
@@ -0,0 +1,26 @@
+#ifndef PORYGONLANG_LEXER_HPP
+#define PORYGONLANG_LEXER_HPP
+#include <string>
+#include <vector>
+#include "Token.hpp"
+
+using namespace std;
+
+class Lexer {
+    string Script;
+#ifdef TESTS_BUILD
+public:
+#endif
+    unsigned int Position;
+    char Peek();
+    char Next();
+    IToken* LexNext(char c);
+    IToken* LexNumber(char c);
+public:
+    vector<IToken*> Lex();
+    explicit Lexer(string script);
+
+};
+
+
+#endif //PORYGONLANG_LEXER_HPP
diff --git a/src/Parser/Token.hpp b/src/Parser/Token.hpp
new file mode 100644
index 0000000..445aec5
--- /dev/null
+++ b/src/Parser/Token.hpp
@@ -0,0 +1,37 @@
+#ifndef PORYGONLANG_TOKEN_HPP
+#define PORYGONLANG_TOKEN_HPP
+
+#include "TokenKind.hpp"
+
+class IToken{
+public:
+    virtual TokenKind GetKind() = 0;
+};
+
+class SimpleToken : public IToken{
+public:
+    TokenKind Kind;
+
+    explicit SimpleToken(TokenKind type){
+        Kind = type;
+    }
+
+    TokenKind GetKind() override{
+        return Kind;
+    }
+};
+
+class IntegerToken : public IToken{
+public:
+    long Value;
+
+    explicit IntegerToken(long value){
+        Value = value;
+    }
+
+    TokenKind GetKind() override{
+        return TokenKind::Integer;
+    }
+};
+
+#endif //PORYGONLANG_TOKEN_HPP
diff --git a/src/Parser/TokenKind.hpp b/src/Parser/TokenKind.hpp
new file mode 100644
index 0000000..ebbf145
--- /dev/null
+++ b/src/Parser/TokenKind.hpp
@@ -0,0 +1,12 @@
+#ifndef PORYGONLANG_TOKENKIND_HPP
+#define PORYGONLANG_TOKENKIND_HPP
+
+enum TokenKind{
+    EndOfFile,
+    WhiteSpace,
+
+    Integer,
+};
+
+
+#endif //PORYGONLANG_TOKENKIND_HPP
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..489d364
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,3 @@
+#include "main.h++"
+
+
diff --git a/src/main.h++ b/src/main.h++
new file mode 100644
index 0000000..4fd6c40
--- /dev/null
+++ b/src/main.h++
@@ -0,0 +1,10 @@
+#ifndef PORYGONLANG_MAIN_H
+#define PORYGONLANG_MAIN_H
+
+
+class main {
+
+};
+
+
+#endif //PORYGONLANG_MAIN_H