#include #include #include #include #include "Lexer.hpp" Lexer::Lexer(string* scriptString, class Script* script) { this -> _scriptString = scriptString; this->_scriptSize = scriptString->size(); this -> ScriptData = script; this -> _position = 0; } Lexer::Lexer(const string& scriptString, class Script *script) { this -> _scriptString = new string(scriptString); this->_scriptSize = scriptString.size(); this -> ScriptData = script; this -> _position = 0; } vector Lexer::Lex() { vector tokens; while (true){ IToken* next = this -> LexNext(this -> Next()); auto nextKind = next -> GetKind(); if (nextKind != TokenKind::WhiteSpace) tokens.push_back(next); else delete next; if (nextKind == TokenKind::EndOfFile) break; } return tokens; } char Lexer::Peek(){ if (Lexer::_position >= this -> _scriptSize) return '\0'; return this -> _scriptString->at(Lexer::_position); } char Lexer::Next(){ char next = Peek(); Lexer::_position++; return next; } IToken* Lexer::LexNext(char c){ switch (c) { case '\0': return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1); case ' ': case '\t': case '\n': case '\r': case '\v': case '\f': return new SimpleToken(TokenKind::WhiteSpace, this -> _position - 1, 1); case '+': return new SimpleToken(TokenKind::PlusToken, this -> _position - 1, 1); case '-': return new SimpleToken(TokenKind::MinusToken, this -> _position - 1, 1); case '/': return new SimpleToken(TokenKind::SlashToken, this -> _position - 1, 1); case '*': return new SimpleToken(TokenKind::StarToken, this -> _position - 1, 1); case '(': return new SimpleToken(TokenKind::OpenParenthesis, this -> _position - 1, 1); case ')': return new SimpleToken(TokenKind::CloseParenthesis, this -> _position - 1, 1); case '[': return new SimpleToken(TokenKind::OpenSquareBracket, this -> _position - 1, 1); case ']': return new SimpleToken(TokenKind::CloseSquareBracket, this -> _position - 1, 1); case '{': return new SimpleToken(TokenKind::OpenCurlyBracket, this -> _position - 1, 1); case '}': return new SimpleToken(TokenKind::CloseCurlyBracket, this -> _position - 1, 1); case ',': return new SimpleToken(TokenKind::CommaToken, this -> _position - 1, 1); case '.': return new SimpleToken(TokenKind::PeriodToken, this -> _position - 1, 1); case '=': if (Lexer::Peek() == '='){ Lexer::Next(); return new SimpleToken(TokenKind::EqualityToken, this -> _position - 2, 2); } return new SimpleToken(TokenKind::AssignmentToken, this -> _position - 1, 1); case '<': if (Lexer::Peek() == '='){ Lexer::Next(); return new SimpleToken(TokenKind::LessEquals, this -> _position - 2, 2); } return new SimpleToken(TokenKind::Less, this -> _position - 1, 1); case '>': if (Lexer::Peek() == '='){ Lexer::Next(); return new SimpleToken(TokenKind::GreaterEquals, this -> _position - 2, 2); } return new SimpleToken(TokenKind::Greater, this -> _position - 1, 1); case '~': if (Lexer::Peek() == '='){ Lexer::Next(); return new SimpleToken(TokenKind::InequalityToken, this -> _position - 2, 2); } this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1); return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexNumber(c); case '"': return LexString(c); case '\'': return LexString(c); case '_': return LexIdentifierOrKeyword(); default: if (isalpha(c)){ return LexIdentifierOrKeyword(); } this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1); return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1); } } int CharToInt(char c){ switch (c){ case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; default: return -1; } } IToken* Lexer::LexNumber(char c){ long int_value = CharToInt(c); double float_value = 0; short decimal_index = 0; bool has_point = false; bool is_searching = true; unsigned int start = this -> _position - 1; unsigned int length = 1; while (is_searching){ char next = this -> Peek(); int next_val = CharToInt(next); if (next_val == -1){ switch (next){ case '_': this -> Next(); length++; continue; case '.': this -> Next(); has_point = true; decimal_index = 0; float_value = int_value; length++; continue; default: is_searching = false; continue; } } else{ this -> Next(); length++; if (has_point){ decimal_index++; float_value += next_val / pow(10, decimal_index); } else { int_value *= 10; int_value += next_val; } } } if (has_point){ return new FloatToken(float_value, start, length); } else{ return new IntegerToken(int_value, start, length); } } IToken * Lexer::LexIdentifierOrKeyword() { auto start = this -> _position - 1; auto end = start; while (true){ char next = this -> Peek(); if (next == '\0') break; if (isalpha(next) || next == '_'){ this -> Next(); end++; } else{ break; } } string s = this -> _scriptString->substr(start, end - start + 1); switch (HashedString::ConstHash(s.c_str())){ case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3); case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5); case HashedString::ConstHash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2); case HashedString::ConstHash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4); case HashedString::ConstHash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6); case HashedString::ConstHash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3); case HashedString::ConstHash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5); case HashedString::ConstHash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3); case HashedString::ConstHash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8); case HashedString::ConstHash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2); case HashedString::ConstHash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2); case HashedString::ConstHash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5); case HashedString::ConstHash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3); case HashedString::ConstHash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3); case HashedString::ConstHash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2); case HashedString::ConstHash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6); case HashedString::ConstHash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4); case HashedString::ConstHash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4); case HashedString::ConstHash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5); default: return new IdentifierToken(s, start, s.length()); } } const unordered_map ControlCharacters{ {'0', '\0'}, {'a', '\a'}, {'b', '\b'}, {'t', '\t'}, {'n', '\n'}, {'v', '\v'}, {'f', '\f'}, {'r', '\r'}, {'"', '\"'}, {'\'', '\''}, {'\?', '\?'}, {'\\', '\\'}, }; IToken* Lexer::LexString(char c){ auto start = this -> _position - 1; auto end = start; char last = c; while (true){ char next = this -> Peek(); if (next == '\0') break; if (next == c && last != '\\') break; this -> Next(); end++; last = next; } auto closeToken = this -> Next(); if (closeToken != c){ this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->_position - 1, 1); return new SimpleToken(TokenKind::BadToken, start, end -start + 1); } string s = this -> _scriptString->substr(start + 1, end - start); stringstream stream; for (int i = 0; i < s.size(); i++){ c = s[i]; if (c == '\\'){ i++; c = s[i]; if (ControlCharacters.find(c) != ControlCharacters.end()) { stream << ControlCharacters.at(c); } else{ this -> ScriptData->Diagnostics->LogError(DiagnosticCode::InvalidStringControlCharacter, start + 1 + i, 1); stream << c; } } else{ stream << c; } } return new StringToken(stream.str(), start, end - start ); }