From 22e450e7e740d79f035f66e0c54d622925ae4c1d Mon Sep 17 00:00:00 2001 From: Deukhoofd Date: Sat, 18 May 2019 20:35:51 +0200 Subject: [PATCH] Initial commit, adds very basic Lexing --- .gitignore | 72 +++++++++++ .gitmodules | 3 + .idea/PorygonLang.iml | 2 + .idea/codeStyles/codeStyleConfig.xml | 5 + .idea/misc.xml | 15 +++ .idea/modules.xml | 8 ++ .idea/vcs.xml | 7 ++ CMakeLists.txt | 13 ++ extern/Catch2 | 1 + library.cpp | 18 +++ library.h | 6 + src/Parser/Lexer.cpp | 173 +++++++++++++++++++++++++++ src/Parser/Lexer.hpp | 26 ++++ src/Parser/Token.hpp | 37 ++++++ src/Parser/TokenKind.hpp | 12 ++ src/main.cpp | 3 + src/main.h++ | 10 ++ 17 files changed, 411 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100755 .idea/PorygonLang.iml create mode 100644 .idea/codeStyles/codeStyleConfig.xml create mode 100755 .idea/misc.xml create mode 100755 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100755 CMakeLists.txt create mode 160000 extern/Catch2 create mode 100755 library.cpp create mode 100755 library.h create mode 100644 src/Parser/Lexer.cpp create mode 100644 src/Parser/Lexer.hpp create mode 100644 src/Parser/Token.hpp create mode 100644 src/Parser/TokenKind.hpp create mode 100644 src/main.cpp create mode 100644 src/main.h++ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e81e6c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,72 @@ + +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + + +.directory diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..1b392af --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "extern/Catch2"] + path = extern/Catch2 + url = git@github.com:catchorg/Catch2.git diff --git a/.idea/PorygonLang.iml b/.idea/PorygonLang.iml new file mode 100755 index 0000000..f08604b --- /dev/null +++ b/.idea/PorygonLang.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100755 index 0000000..d959e79 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100755 index 0000000..77e9569 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..b27de7c --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100755 index 0000000..69d95ff --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.13) +project(PorygonLang) + +set(CMAKE_CXX_STANDARD 17) + +add_subdirectory(extern/Catch2) + +add_library(PorygonLang library.cpp library.h src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp) +add_executable(PorygonLangTests library.cpp library.h src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp) +target_link_libraries(PorygonLangTests Catch2::Catch2) +target_compile_definitions(PorygonLangTests PRIVATE TESTS_BUILD) + +include(CTest) diff --git a/extern/Catch2 b/extern/Catch2 new file mode 160000 index 0000000..e680c4b --- /dev/null +++ b/extern/Catch2 @@ -0,0 +1 @@ +Subproject commit e680c4b9fb1d699bfad239e42ce7643d7cf00371 diff --git a/library.cpp b/library.cpp new file mode 100755 index 0000000..afd96d6 --- /dev/null +++ b/library.cpp @@ -0,0 +1,18 @@ +#include "library.h" +#include +#define CATCH_CONFIG_MAIN + +unsigned int Factorial( unsigned int number ) { + return number <= 1 ? number : Factorial(number-1)*number; +} + +#ifdef TESTS_BUILD +#include + +TEST_CASE( "Factorials are computed", "[factorial]" ) { + REQUIRE( Factorial(1) == 1 ); + REQUIRE( Factorial(2) == 2 ); + REQUIRE( Factorial(3) == 6 ); + REQUIRE( Factorial(10) == 3628800 ); +} +#endif \ No newline at end of file diff --git a/library.h b/library.h new file mode 100755 index 0000000..74856d7 --- /dev/null +++ b/library.h @@ -0,0 +1,6 @@ +#ifndef PORYGONLANG_LIBRARY_H +#define PORYGONLANG_LIBRARY_H + +int theAnswer(); + +#endif //PORYGONLANG_LIBRARY_H \ No newline at end of file diff --git a/src/Parser/Lexer.cpp b/src/Parser/Lexer.cpp new file mode 100644 index 0000000..307426d --- /dev/null +++ b/src/Parser/Lexer.cpp @@ -0,0 +1,173 @@ +#include +#include + +#include "Lexer.hpp" + +Lexer::Lexer(string script) { + Lexer::Script = std::move(script); + Lexer::Position = 0; +} + +vector Lexer::Lex() { + vector tokens; + while (true){ + IToken* next = Lexer::LexNext(Lexer::Next()); + tokens.push_back(next); + if (next->GetKind() == TokenKind::EndOfFile) + break; + } + return tokens; +} + +char Lexer::Peek(){ + if (Lexer::Position > Lexer::Script.length()) + return '\0'; + return Lexer::Script[Lexer::Position]; +} + +char Lexer::Next(){ + char next = Peek(); + Lexer::Position++; + return next; +} + +IToken* Lexer::LexNext(char c){ + switch (c) { + case '\0': + return new SimpleToken(TokenKind::EndOfFile); + case ' ': + case '\t': + case '\n': + case '\r': + return new SimpleToken(TokenKind::WhiteSpace); + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + return LexNumber(c); + default: + throw; + } +} + +int CharToInt(char c){ + switch (c){ + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + default: return -1; + } +} + +IToken* Lexer::LexNumber(char c){ + long int_value = CharToInt(c); + double float_value = 0; + short decimal_index = 0; + bool has_point = false; + bool is_searching = true; + while (is_searching){ + char next = Lexer::Next(); + int next_val = CharToInt(next); + if (next_val == -1){ + switch (next){ + case '_': continue; + case '.': + has_point = true; + decimal_index = 0; + float_value = int_value; + continue; + default: + is_searching = false; + continue; + } + } + else{ + if (has_point){ + decimal_index++; + float_value += next_val / pow(10, decimal_index); + } + else { + int_value *= 10; + int_value += next_val; + } + } + } + if (has_point){ + throw; + } + else{ + return new IntegerToken(int_value); + } +} + + +#ifdef TESTS_BUILD +#include + +TEST_CASE( "When at end of script return terminator", "[lexer]" ) { + Lexer lexer = Lexer(""); + REQUIRE(lexer.Peek() == '\0'); +} + +TEST_CASE( "Peek doesn't advance", "[lexer]" ) { + Lexer lexer = Lexer("5 + 5"); + REQUIRE(lexer.Peek() == '5'); + REQUIRE(lexer.Peek() == '5'); + REQUIRE(lexer.Peek() == '5'); +} + +TEST_CASE( "Next does advance", "[lexer]" ) { + Lexer lexer = Lexer("5 + 5"); + REQUIRE(lexer.Next() == '5'); + REQUIRE(lexer.Next() == ' '); + REQUIRE(lexer.Next() == '+'); + REQUIRE(lexer.Next() == ' '); + REQUIRE(lexer.Next() == '5'); + REQUIRE(lexer.Next() == '\0'); +} + +TEST_CASE( "Lex Null Terminator as EOF", "[lexer]" ) { + Lexer lexer = Lexer(""); + REQUIRE(lexer.LexNext('\0') -> GetKind() == TokenKind::EndOfFile); +} + +TEST_CASE( "Lex Whitespace", "[lexer]" ) { + Lexer lexer = Lexer(""); + CHECK(lexer.LexNext(' ') -> GetKind() == TokenKind::WhiteSpace); + CHECK(lexer.LexNext('\t') -> GetKind() == TokenKind::WhiteSpace); + CHECK(lexer.LexNext('\n') -> GetKind() == TokenKind::WhiteSpace); + CHECK(lexer.LexNext('\r') -> GetKind() == TokenKind::WhiteSpace); +} + +TEST_CASE( "Lex Basic Integers", "[lexer]" ) { + Lexer lexer = Lexer(""); + CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('2') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('3') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('4') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('5') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('6') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('7') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('8') -> GetKind() == TokenKind::Integer); + CHECK(lexer.LexNext('9') -> GetKind() == TokenKind::Integer); +} + +TEST_CASE( "Lex Longer Integers", "[lexer]" ) { + long integers[] {0,1,5,9,10,50,100,1000,99999,6484,62163,48862}; + for (int integer : integers){ + Lexer lexer = Lexer(std::to_string(integer)); + auto tokens = lexer.Lex(); + REQUIRE(tokens.size() == 2); + IToken* firstToken = tokens[0]; + REQUIRE(firstToken -> GetKind() == TokenKind::Integer); + auto* integerToken = (IntegerToken *)firstToken; + CHECK(integerToken -> Value == integer); + } +} + +#endif \ No newline at end of file diff --git a/src/Parser/Lexer.hpp b/src/Parser/Lexer.hpp new file mode 100644 index 0000000..75d993e --- /dev/null +++ b/src/Parser/Lexer.hpp @@ -0,0 +1,26 @@ +#ifndef PORYGONLANG_LEXER_HPP +#define PORYGONLANG_LEXER_HPP +#include +#include +#include "Token.hpp" + +using namespace std; + +class Lexer { + string Script; +#ifdef TESTS_BUILD +public: +#endif + unsigned int Position; + char Peek(); + char Next(); + IToken* LexNext(char c); + IToken* LexNumber(char c); +public: + vector Lex(); + explicit Lexer(string script); + +}; + + +#endif //PORYGONLANG_LEXER_HPP diff --git a/src/Parser/Token.hpp b/src/Parser/Token.hpp new file mode 100644 index 0000000..445aec5 --- /dev/null +++ b/src/Parser/Token.hpp @@ -0,0 +1,37 @@ +#ifndef PORYGONLANG_TOKEN_HPP +#define PORYGONLANG_TOKEN_HPP + +#include "TokenKind.hpp" + +class IToken{ +public: + virtual TokenKind GetKind() = 0; +}; + +class SimpleToken : public IToken{ +public: + TokenKind Kind; + + explicit SimpleToken(TokenKind type){ + Kind = type; + } + + TokenKind GetKind() override{ + return Kind; + } +}; + +class IntegerToken : public IToken{ +public: + long Value; + + explicit IntegerToken(long value){ + Value = value; + } + + TokenKind GetKind() override{ + return TokenKind::Integer; + } +}; + +#endif //PORYGONLANG_TOKEN_HPP diff --git a/src/Parser/TokenKind.hpp b/src/Parser/TokenKind.hpp new file mode 100644 index 0000000..ebbf145 --- /dev/null +++ b/src/Parser/TokenKind.hpp @@ -0,0 +1,12 @@ +#ifndef PORYGONLANG_TOKENKIND_HPP +#define PORYGONLANG_TOKENKIND_HPP + +enum TokenKind{ + EndOfFile, + WhiteSpace, + + Integer, +}; + + +#endif //PORYGONLANG_TOKENKIND_HPP diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..489d364 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,3 @@ +#include "main.h++" + + diff --git a/src/main.h++ b/src/main.h++ new file mode 100644 index 0000000..4fd6c40 --- /dev/null +++ b/src/main.h++ @@ -0,0 +1,10 @@ +#ifndef PORYGONLANG_MAIN_H +#define PORYGONLANG_MAIN_H + + +class main { + +}; + + +#endif //PORYGONLANG_MAIN_H