diff --git a/src/Parser/Lexer.cpp b/src/Parser/Lexer.cpp index 882e0c3..ef9823b 100644 --- a/src/Parser/Lexer.cpp +++ b/src/Parser/Lexer.cpp @@ -5,12 +5,21 @@ #include "Lexer.hpp" -Lexer::Lexer(string scriptString, class Script* script) { - this -> _scriptString = std::move(scriptString); +Lexer::Lexer(string* scriptString, class Script* script) { + this -> _scriptString = scriptString; + this->_scriptSize = scriptString->size(); this -> ScriptData = script; - this -> Position = 0; + this -> _position = 0; } +Lexer::Lexer(string scriptString, class Script *script) { + this -> _scriptString = &scriptString; + this->_scriptSize = scriptString.size(); + this -> ScriptData = script; + this -> _position = 0; +} + + vector Lexer::Lex() { vector tokens; while (true){ @@ -27,50 +36,50 @@ vector Lexer::Lex() { } char Lexer::Peek(){ - if (Lexer::Position > this -> _scriptString.length()) + if (Lexer::_position >= this -> _scriptSize) return '\0'; - return this -> _scriptString[Lexer::Position]; + return this -> _scriptString->at(Lexer::_position); } char Lexer::Next(){ char next = Peek(); - Lexer::Position++; + Lexer::_position++; return next; } IToken* Lexer::LexNext(char c){ switch (c) { case '\0': - return new SimpleToken(TokenKind::EndOfFile, this -> Position - 1, 1); + return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1); case ' ': case '\t': case '\n': case '\r': case '\v': case '\f': - return new SimpleToken(TokenKind::WhiteSpace, this -> Position - 1, 1); + return new SimpleToken(TokenKind::WhiteSpace, this -> _position - 1, 1); case '+': - return new SimpleToken(TokenKind::PlusToken, this -> Position - 1, 1); + return new SimpleToken(TokenKind::PlusToken, this -> _position - 1, 1); case '-': - return new SimpleToken(TokenKind::MinusToken, this -> Position - 1, 1); + return new SimpleToken(TokenKind::MinusToken, this -> _position - 1, 1); case '/': - return new SimpleToken(TokenKind::SlashToken, this -> Position - 1, 1); + return new SimpleToken(TokenKind::SlashToken, this -> _position - 1, 1); case '*': - return new SimpleToken(TokenKind::StarToken, this -> Position - 1, 1); + return new SimpleToken(TokenKind::StarToken, this -> _position - 1, 1); case '(': - return new SimpleToken(TokenKind::OpenParenthesis, this -> Position - 1, 1); + return new SimpleToken(TokenKind::OpenParenthesis, this -> _position - 1, 1); case ')': - return new SimpleToken(TokenKind::CloseParenthesis, this -> Position - 1, 1); + return new SimpleToken(TokenKind::CloseParenthesis, this -> _position - 1, 1); case ',': - return new SimpleToken(TokenKind::CommaToken, this -> Position - 1, 1); + return new SimpleToken(TokenKind::CommaToken, this -> _position - 1, 1); case '=': if (Lexer::Peek() == '='){ Lexer::Next(); - return new SimpleToken(TokenKind::EqualityToken, this -> Position - 2, 2); + return new SimpleToken(TokenKind::EqualityToken, this -> _position - 2, 2); } - return new SimpleToken(TokenKind::AssignmentToken, this -> Position - 1, 1); + return new SimpleToken(TokenKind::AssignmentToken, this -> _position - 1, 1); case '~': if (Lexer::Peek() == '='){ Lexer::Next(); - return new SimpleToken(TokenKind::InequalityToken, this -> Position - 2, 2); + return new SimpleToken(TokenKind::InequalityToken, this -> _position - 2, 2); } - this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1); - return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1); + this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1); + return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexNumber(c); case '"': @@ -83,8 +92,8 @@ IToken* Lexer::LexNext(char c){ if (isalpha(c)){ return LexIdentifierOrKeyword(); } - this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1); - return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1); + this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1); + return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1); } } @@ -110,7 +119,7 @@ IToken* Lexer::LexNumber(char c){ short decimal_index = 0; bool has_point = false; bool is_searching = true; - unsigned int start = this -> Position - 1; + unsigned int start = this -> _position - 1; unsigned int length = 1; while (is_searching){ char next = this -> Peek(); @@ -154,14 +163,8 @@ IToken* Lexer::LexNumber(char c){ } } -unsigned constexpr const_hash(char const *input) { - return *input ? - static_cast(*input) + 33 * const_hash(input + 1) : - 5381; -} - IToken * Lexer::LexIdentifierOrKeyword() { - auto start = this -> Position - 1; + auto start = this -> _position - 1; auto end = start; while (true){ char next = this -> Peek(); @@ -175,27 +178,27 @@ IToken * Lexer::LexIdentifierOrKeyword() { } } - string s = this -> _scriptString.substr(start, end - start + 1); - switch (const_hash(s.c_str())){ - case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3); - case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5); - case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2); - case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4); - case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6); - case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3); - case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5); - case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3); - case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8); - case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2); - case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2); - case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5); - case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3); - case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3); - case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2); - case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6); - case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4); - case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4); - case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5); + string s = this -> _scriptString->substr(start, end - start + 1); + switch (HashedString::ConstHash(s.c_str())){ + case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3); + case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5); + case HashedString::ConstHash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2); + case HashedString::ConstHash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4); + case HashedString::ConstHash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6); + case HashedString::ConstHash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3); + case HashedString::ConstHash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5); + case HashedString::ConstHash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3); + case HashedString::ConstHash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8); + case HashedString::ConstHash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2); + case HashedString::ConstHash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2); + case HashedString::ConstHash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5); + case HashedString::ConstHash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3); + case HashedString::ConstHash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3); + case HashedString::ConstHash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2); + case HashedString::ConstHash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6); + case HashedString::ConstHash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4); + case HashedString::ConstHash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4); + case HashedString::ConstHash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5); default: return new IdentifierToken(s, start, s.length()); } } @@ -216,7 +219,7 @@ const unordered_map ControlCharacters{ }; IToken* Lexer::LexString(char c){ - auto start = this -> Position - 1; + auto start = this -> _position - 1; auto end = start; char last = c; while (true){ @@ -229,11 +232,11 @@ IToken* Lexer::LexString(char c){ } auto closeToken = this -> Next(); if (closeToken != c){ - this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->Position - 1, 1); + this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->_position - 1, 1); return new SimpleToken(TokenKind::BadToken, start, end -start + 1); } - string s = this -> _scriptString.substr(start + 1, end - start); + string s = this -> _scriptString->substr(start + 1, end - start); stringstream stream; for (int i = 0; i < s.size(); i++){ c = s[i]; @@ -251,4 +254,5 @@ IToken* Lexer::LexString(char c){ } } return new StringToken(stream.str(), start, end - start ); -} \ No newline at end of file +} + diff --git a/src/Parser/Lexer.hpp b/src/Parser/Lexer.hpp index dc1045f..caa9334 100644 --- a/src/Parser/Lexer.hpp +++ b/src/Parser/Lexer.hpp @@ -8,11 +8,12 @@ using namespace std; class Lexer { - string _scriptString; + string* _scriptString; #ifdef TESTS_BUILD public: #endif - unsigned int Position; + unsigned int _position; + unsigned int _scriptSize; char Peek(); char Next(); IToken* LexNext(char c); @@ -23,6 +24,7 @@ public: Script* ScriptData; vector Lex(); + explicit Lexer(string* scriptString, class Script* script); explicit Lexer(string scriptString, class Script* script); }; diff --git a/src/Script.cpp b/src/Script.cpp index 7d3fdcc..c17ea7a 100644 --- a/src/Script.cpp +++ b/src/Script.cpp @@ -33,7 +33,7 @@ Script::~Script() { } void Script::Parse(string script) { - auto lexer = Lexer(std::move(script), this); + auto lexer = Lexer(&script, this); auto lexResult = lexer.Lex(); auto parser = Parser(lexResult, this); auto parseResult = parser.Parse();