#include #include #include "Lexer.hpp" Lexer::Lexer(string scriptString, class Script* script) { this -> ScriptString = std::move(scriptString); this -> ScriptData = script; this -> Position = 0; } vector Lexer::Lex() { vector tokens; while (true){ IToken* next = this -> LexNext(this -> Next()); tokens.push_back(next); if (next->GetKind() == TokenKind::EndOfFile) break; } return tokens; } char Lexer::Peek(){ if (Lexer::Position > this -> ScriptString.length()) return '\0'; return this -> ScriptString[Lexer::Position]; } char Lexer::Next(){ char next = Peek(); Lexer::Position++; return next; } IToken* Lexer::LexNext(char c){ switch (c) { case '\0': return new SimpleToken(TokenKind::EndOfFile, this -> Position - 1, 1); case ' ': case '\t': case '\n': case '\r': case '\v': case '\f': return new SimpleToken(TokenKind::WhiteSpace, this -> Position - 1, 1); case '+': return new SimpleToken(TokenKind::PlusToken, this -> Position - 1, 1); case '-': return new SimpleToken(TokenKind::MinusToken, this -> Position - 1, 1); case '/': return new SimpleToken(TokenKind::SlashToken, this -> Position - 1, 1); case '*': return new SimpleToken(TokenKind::StarToken, this -> Position - 1, 1); case '=': if (Lexer::Peek() == '='){ Lexer::Next(); return new SimpleToken(TokenKind::EqualityToken, this -> Position - 2, 2); } return new SimpleToken(TokenKind::AssignmentToken, this -> Position - 1, 1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexNumber(c); case '_': return LexIdentifierOrKeyword(c); default: if (isalpha(c)){ return LexIdentifierOrKeyword(c); } this -> ScriptData->Diagnostics.LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1); return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1); } } int CharToInt(char c){ switch (c){ case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; default: return -1; } } IToken* Lexer::LexNumber(char c){ long int_value = CharToInt(c); double float_value = 0; short decimal_index = 0; bool has_point = false; bool is_searching = true; unsigned int start = this -> Position - 1; unsigned int length = 1; while (is_searching){ char next = this -> Peek(); int next_val = CharToInt(next); if (next_val == -1){ switch (next){ case '_': this -> Next(); length++; continue; case '.': this -> Next(); has_point = true; decimal_index = 0; float_value = int_value; length++; continue; default: is_searching = false; continue; } } else{ this -> Next(); length++; if (has_point){ decimal_index++; float_value += next_val / pow(10, decimal_index); } else { int_value *= 10; int_value += next_val; } } } if (has_point){ return new FloatToken(float_value, start, length); } else{ return new IntegerToken(int_value, start, length); } } unsigned constexpr const_hash(char const *input) { return *input ? static_cast(*input) + 33 * const_hash(input + 1) : 5381; } IToken* Lexer::LexIdentifierOrKeyword(char c){ vector charVec(1, c); auto start = this -> Position - 1; while (true){ char next = this -> Peek(); if (next == '\0') break; if (isalpha(next) || next == '_'){ this -> Next(); charVec.push_back(next); } else{ break; } } string s = string(charVec.begin(), charVec.end()); switch (const_hash(s.c_str())){ case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3); case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5); case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2); case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4); case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6); case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3); case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5); case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3); case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8); case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2); case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2); case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5); case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3); case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3); case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2); case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6); case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4); case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4); case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5); default: return new IdentifierToken(s, start, s.length()); } }