Minor optimizations in lexer

This commit is contained in:
Deukhoofd 2019-06-05 19:11:56 +02:00
parent bda26b0ddf
commit d49692a17e
No known key found for this signature in database
GPG Key ID: B4C087AC81641654
3 changed files with 64 additions and 58 deletions

View File

@ -5,12 +5,21 @@
#include "Lexer.hpp" #include "Lexer.hpp"
Lexer::Lexer(string scriptString, class Script* script) { Lexer::Lexer(string* scriptString, class Script* script) {
this -> _scriptString = std::move(scriptString); this -> _scriptString = scriptString;
this->_scriptSize = scriptString->size();
this -> ScriptData = script; this -> ScriptData = script;
this -> Position = 0; this -> _position = 0;
} }
Lexer::Lexer(string scriptString, class Script *script) {
this -> _scriptString = &scriptString;
this->_scriptSize = scriptString.size();
this -> ScriptData = script;
this -> _position = 0;
}
vector<IToken*> Lexer::Lex() { vector<IToken*> Lexer::Lex() {
vector<IToken*> tokens; vector<IToken*> tokens;
while (true){ while (true){
@ -27,50 +36,50 @@ vector<IToken*> Lexer::Lex() {
} }
char Lexer::Peek(){ char Lexer::Peek(){
if (Lexer::Position > this -> _scriptString.length()) if (Lexer::_position >= this -> _scriptSize)
return '\0'; return '\0';
return this -> _scriptString[Lexer::Position]; return this -> _scriptString->at(Lexer::_position);
} }
char Lexer::Next(){ char Lexer::Next(){
char next = Peek(); char next = Peek();
Lexer::Position++; Lexer::_position++;
return next; return next;
} }
IToken* Lexer::LexNext(char c){ IToken* Lexer::LexNext(char c){
switch (c) { switch (c) {
case '\0': case '\0':
return new SimpleToken(TokenKind::EndOfFile, this -> Position - 1, 1); return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f': case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
return new SimpleToken(TokenKind::WhiteSpace, this -> Position - 1, 1); return new SimpleToken(TokenKind::WhiteSpace, this -> _position - 1, 1);
case '+': case '+':
return new SimpleToken(TokenKind::PlusToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::PlusToken, this -> _position - 1, 1);
case '-': case '-':
return new SimpleToken(TokenKind::MinusToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::MinusToken, this -> _position - 1, 1);
case '/': case '/':
return new SimpleToken(TokenKind::SlashToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::SlashToken, this -> _position - 1, 1);
case '*': case '*':
return new SimpleToken(TokenKind::StarToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::StarToken, this -> _position - 1, 1);
case '(': case '(':
return new SimpleToken(TokenKind::OpenParenthesis, this -> Position - 1, 1); return new SimpleToken(TokenKind::OpenParenthesis, this -> _position - 1, 1);
case ')': case ')':
return new SimpleToken(TokenKind::CloseParenthesis, this -> Position - 1, 1); return new SimpleToken(TokenKind::CloseParenthesis, this -> _position - 1, 1);
case ',': case ',':
return new SimpleToken(TokenKind::CommaToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::CommaToken, this -> _position - 1, 1);
case '=': case '=':
if (Lexer::Peek() == '='){ if (Lexer::Peek() == '='){
Lexer::Next(); Lexer::Next();
return new SimpleToken(TokenKind::EqualityToken, this -> Position - 2, 2); return new SimpleToken(TokenKind::EqualityToken, this -> _position - 2, 2);
} }
return new SimpleToken(TokenKind::AssignmentToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::AssignmentToken, this -> _position - 1, 1);
case '~': case '~':
if (Lexer::Peek() == '='){ if (Lexer::Peek() == '='){
Lexer::Next(); Lexer::Next();
return new SimpleToken(TokenKind::InequalityToken, this -> Position - 2, 2); return new SimpleToken(TokenKind::InequalityToken, this -> _position - 2, 2);
} }
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1); this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
return LexNumber(c); return LexNumber(c);
case '"': case '"':
@ -83,8 +92,8 @@ IToken* Lexer::LexNext(char c){
if (isalpha(c)){ if (isalpha(c)){
return LexIdentifierOrKeyword(); return LexIdentifierOrKeyword();
} }
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1); this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1); return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
} }
} }
@ -110,7 +119,7 @@ IToken* Lexer::LexNumber(char c){
short decimal_index = 0; short decimal_index = 0;
bool has_point = false; bool has_point = false;
bool is_searching = true; bool is_searching = true;
unsigned int start = this -> Position - 1; unsigned int start = this -> _position - 1;
unsigned int length = 1; unsigned int length = 1;
while (is_searching){ while (is_searching){
char next = this -> Peek(); char next = this -> Peek();
@ -154,14 +163,8 @@ IToken* Lexer::LexNumber(char c){
} }
} }
unsigned constexpr const_hash(char const *input) {
return *input ?
static_cast<unsigned int>(*input) + 33 * const_hash(input + 1) :
5381;
}
IToken * Lexer::LexIdentifierOrKeyword() { IToken * Lexer::LexIdentifierOrKeyword() {
auto start = this -> Position - 1; auto start = this -> _position - 1;
auto end = start; auto end = start;
while (true){ while (true){
char next = this -> Peek(); char next = this -> Peek();
@ -175,27 +178,27 @@ IToken * Lexer::LexIdentifierOrKeyword() {
} }
} }
string s = this -> _scriptString.substr(start, end - start + 1); string s = this -> _scriptString->substr(start, end - start + 1);
switch (const_hash(s.c_str())){ switch (HashedString::ConstHash(s.c_str())){
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3); case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5); case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2); case HashedString::ConstHash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4); case HashedString::ConstHash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6); case HashedString::ConstHash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3); case HashedString::ConstHash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5); case HashedString::ConstHash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3); case HashedString::ConstHash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8); case HashedString::ConstHash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2); case HashedString::ConstHash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2); case HashedString::ConstHash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5); case HashedString::ConstHash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3); case HashedString::ConstHash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3); case HashedString::ConstHash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2); case HashedString::ConstHash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6); case HashedString::ConstHash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4); case HashedString::ConstHash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4); case HashedString::ConstHash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5); case HashedString::ConstHash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
default: return new IdentifierToken(s, start, s.length()); default: return new IdentifierToken(s, start, s.length());
} }
} }
@ -216,7 +219,7 @@ const unordered_map<char, char> ControlCharacters{
}; };
IToken* Lexer::LexString(char c){ IToken* Lexer::LexString(char c){
auto start = this -> Position - 1; auto start = this -> _position - 1;
auto end = start; auto end = start;
char last = c; char last = c;
while (true){ while (true){
@ -229,11 +232,11 @@ IToken* Lexer::LexString(char c){
} }
auto closeToken = this -> Next(); auto closeToken = this -> Next();
if (closeToken != c){ if (closeToken != c){
this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->Position - 1, 1); this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->_position - 1, 1);
return new SimpleToken(TokenKind::BadToken, start, end -start + 1); return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
} }
string s = this -> _scriptString.substr(start + 1, end - start); string s = this -> _scriptString->substr(start + 1, end - start);
stringstream stream; stringstream stream;
for (int i = 0; i < s.size(); i++){ for (int i = 0; i < s.size(); i++){
c = s[i]; c = s[i];
@ -251,4 +254,5 @@ IToken* Lexer::LexString(char c){
} }
} }
return new StringToken(stream.str(), start, end - start ); return new StringToken(stream.str(), start, end - start );
} }

View File

@ -8,11 +8,12 @@
using namespace std; using namespace std;
class Lexer { class Lexer {
string _scriptString; string* _scriptString;
#ifdef TESTS_BUILD #ifdef TESTS_BUILD
public: public:
#endif #endif
unsigned int Position; unsigned int _position;
unsigned int _scriptSize;
char Peek(); char Peek();
char Next(); char Next();
IToken* LexNext(char c); IToken* LexNext(char c);
@ -23,6 +24,7 @@ public:
Script* ScriptData; Script* ScriptData;
vector<IToken*> Lex(); vector<IToken*> Lex();
explicit Lexer(string* scriptString, class Script* script);
explicit Lexer(string scriptString, class Script* script); explicit Lexer(string scriptString, class Script* script);
}; };

View File

@ -33,7 +33,7 @@ Script::~Script() {
} }
void Script::Parse(string script) { void Script::Parse(string script) {
auto lexer = Lexer(std::move(script), this); auto lexer = Lexer(&script, this);
auto lexResult = lexer.Lex(); auto lexResult = lexer.Lex();
auto parser = Parser(lexResult, this); auto parser = Parser(lexResult, this);
auto parseResult = parser.Parse(); auto parseResult = parser.Parse();