Minor optimizations in lexer

2019-06-05 19:11:56 +02:00
parent bda26b0ddf
commit d49692a17e
3 changed files with 64 additions and 58 deletions
--- a/src/Parser/Lexer.cpp
+++ b/src/Parser/Lexer.cpp
@@ -5,12 +5,21 @@

 #include "Lexer.hpp"

-Lexer::Lexer(string scriptString, class Script* script) {
-    this -> _scriptString = std::move(scriptString);
+Lexer::Lexer(string* scriptString, class Script* script) {
+    this -> _scriptString = scriptString;
+    this->_scriptSize = scriptString->size();
    this -> ScriptData = script;
-    this -> Position = 0;
+    this -> _position = 0;
 }

+Lexer::Lexer(string scriptString, class Script *script) {
+    this -> _scriptString = &scriptString;
+    this->_scriptSize = scriptString.size();
+    this -> ScriptData = script;
+    this -> _position = 0;
+}
+
+
 vector<IToken*> Lexer::Lex() {
    vector<IToken*> tokens;
    while (true){
@@ -27,50 +36,50 @@ vector<IToken*> Lexer::Lex() {
 }

 char Lexer::Peek(){
-    if (Lexer::Position > this -> _scriptString.length())
+    if (Lexer::_position >= this -> _scriptSize)
        return '\0';
-    return this -> _scriptString[Lexer::Position];
+    return this -> _scriptString->at(Lexer::_position);
 }

 char Lexer::Next(){
    char next = Peek();
-    Lexer::Position++;
+    Lexer::_position++;
    return next;
 }

 IToken* Lexer::LexNext(char c){
    switch (c) {
        case '\0':
-            return new SimpleToken(TokenKind::EndOfFile, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
        case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
-            return new SimpleToken(TokenKind::WhiteSpace, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::WhiteSpace, this -> _position - 1, 1);
        case '+':
-            return new SimpleToken(TokenKind::PlusToken, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::PlusToken, this -> _position - 1, 1);
        case '-':
-            return new SimpleToken(TokenKind::MinusToken, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::MinusToken, this -> _position - 1, 1);
        case '/':
-            return new SimpleToken(TokenKind::SlashToken, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::SlashToken, this -> _position - 1, 1);
        case '*':
-            return new SimpleToken(TokenKind::StarToken, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::StarToken, this -> _position - 1, 1);
        case '(':
-            return new SimpleToken(TokenKind::OpenParenthesis, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::OpenParenthesis, this -> _position - 1, 1);
        case ')':
-            return new SimpleToken(TokenKind::CloseParenthesis, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::CloseParenthesis, this -> _position - 1, 1);
        case ',':
-            return new SimpleToken(TokenKind::CommaToken, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::CommaToken, this -> _position - 1, 1);
        case '=':
            if (Lexer::Peek() == '='){
                Lexer::Next();
-                return new SimpleToken(TokenKind::EqualityToken, this -> Position - 2, 2);
+                return new SimpleToken(TokenKind::EqualityToken, this -> _position - 2, 2);
            }
-            return new SimpleToken(TokenKind::AssignmentToken, this -> Position - 1, 1);
+            return new SimpleToken(TokenKind::AssignmentToken, this -> _position - 1, 1);
        case '~':
            if (Lexer::Peek() == '='){
                Lexer::Next();
-                return new SimpleToken(TokenKind::InequalityToken, this -> Position - 2, 2);
+                return new SimpleToken(TokenKind::InequalityToken, this -> _position - 2, 2);
            }
-            this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1);
-            return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1);
+            this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
+            return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
        case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
            return LexNumber(c);
        case '"':
@@ -83,8 +92,8 @@ IToken* Lexer::LexNext(char c){
            if (isalpha(c)){
                return LexIdentifierOrKeyword();
            }
-            this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1);
-            return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1);
+            this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
+            return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
    }
 }

@@ -110,7 +119,7 @@ IToken* Lexer::LexNumber(char c){
    short decimal_index = 0;
    bool has_point = false;
    bool is_searching = true;
-    unsigned int start = this -> Position - 1;
+    unsigned int start = this -> _position - 1;
    unsigned int length = 1;
    while (is_searching){
        char next = this -> Peek();
@@ -154,14 +163,8 @@ IToken* Lexer::LexNumber(char c){
    }
 }

-unsigned constexpr const_hash(char const *input) {
-    return *input ?
-           static_cast<unsigned int>(*input) + 33 * const_hash(input + 1) :
-           5381;
-}
-
 IToken * Lexer::LexIdentifierOrKeyword() {
-    auto start = this -> Position - 1;
+    auto start = this -> _position - 1;
    auto end = start;
    while (true){
        char next = this -> Peek();
@@ -175,27 +178,27 @@ IToken * Lexer::LexIdentifierOrKeyword() {
        }
    }

-    string s = this -> _scriptString.substr(start, end - start + 1);
-    switch (const_hash(s.c_str())){
-        case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
-        case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
-        case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
-        case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
-        case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
-        case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
-        case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
-        case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
-        case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
-        case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
-        case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
-        case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
-        case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
-        case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
-        case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
-        case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
-        case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
-        case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
-        case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
+    string s = this -> _scriptString->substr(start, end - start + 1);
+    switch (HashedString::ConstHash(s.c_str())){
+        case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
+        case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
+        case HashedString::ConstHash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
+        case HashedString::ConstHash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
+        case HashedString::ConstHash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
+        case HashedString::ConstHash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
+        case HashedString::ConstHash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
+        case HashedString::ConstHash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
+        case HashedString::ConstHash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
+        case HashedString::ConstHash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
+        case HashedString::ConstHash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
+        case HashedString::ConstHash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
+        case HashedString::ConstHash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
+        case HashedString::ConstHash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
+        case HashedString::ConstHash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
+        case HashedString::ConstHash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
+        case HashedString::ConstHash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
+        case HashedString::ConstHash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
+        case HashedString::ConstHash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
        default: return new IdentifierToken(s, start, s.length());
    }
 }
@@ -216,7 +219,7 @@ const unordered_map<char, char> ControlCharacters{
 };

 IToken* Lexer::LexString(char c){
-    auto start = this -> Position - 1;
+    auto start = this -> _position - 1;
    auto end = start;
    char last = c;
    while (true){
@@ -229,11 +232,11 @@ IToken* Lexer::LexString(char c){
    }
    auto closeToken = this -> Next();
    if (closeToken != c){
-        this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->Position - 1, 1);
+        this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->_position - 1, 1);
        return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
    }

-    string s = this -> _scriptString.substr(start + 1, end - start);
+    string s = this -> _scriptString->substr(start + 1, end - start);
    stringstream stream;
    for (int i = 0; i < s.size(); i++){
        c = s[i];
@@ -251,4 +254,5 @@ IToken* Lexer::LexString(char c){
        }
    }
    return new StringToken(stream.str(), start, end - start );
-}
+}
+
--- a/src/Parser/Lexer.hpp
+++ b/src/Parser/Lexer.hpp
@@ -8,11 +8,12 @@
 using namespace std;

 class Lexer {
-    string _scriptString;
+    string* _scriptString;
 #ifdef TESTS_BUILD
 public:
 #endif
-    unsigned int Position;
+    unsigned int _position;
+    unsigned int _scriptSize;
    char Peek();
    char Next();
    IToken* LexNext(char c);
@@ -23,6 +24,7 @@ public:
    Script* ScriptData;

    vector<IToken*> Lex();
+    explicit Lexer(string* scriptString, class Script* script);
    explicit Lexer(string scriptString, class Script* script);

 };
--- a/src/Script.cpp
+++ b/src/Script.cpp
@@ -33,7 +33,7 @@ Script::~Script() {
 }

 void Script::Parse(string script) {
-    auto lexer = Lexer(std::move(script), this);
+    auto lexer = Lexer(&script, this);
    auto lexResult = lexer.Lex();
    auto parser = Parser(lexResult, this);
    auto parseResult = parser.Parse();