Move Lexer to u16string handling, for unicode support
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
|
||||
#include "Lexer.hpp"
|
||||
|
||||
Lexer::Lexer(const string& scriptString, class Script* script)
|
||||
Lexer::Lexer(const u16string& scriptString, class Script* script)
|
||||
: _scriptString(scriptString)
|
||||
{
|
||||
this->_scriptSize = scriptString.size();
|
||||
@@ -29,19 +29,19 @@ vector<const IToken*> Lexer::Lex() {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
char Lexer::Peek(){
|
||||
char16_t Lexer::Peek(){
|
||||
if (Lexer::_position >= this -> _scriptSize)
|
||||
return '\0';
|
||||
return this -> _scriptString.at(Lexer::_position);
|
||||
}
|
||||
|
||||
char Lexer::Next(){
|
||||
char next = Peek();
|
||||
char16_t Lexer::Next(){
|
||||
char16_t next = Peek();
|
||||
Lexer::_position++;
|
||||
return next;
|
||||
}
|
||||
|
||||
IToken* Lexer::LexNext(char c){
|
||||
IToken* Lexer::LexNext(char16_t c){
|
||||
switch (c) {
|
||||
case '\0':
|
||||
return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
|
||||
@@ -113,7 +113,7 @@ IToken* Lexer::LexNext(char c){
|
||||
}
|
||||
}
|
||||
|
||||
int CharToInt(char c){
|
||||
int CharToInt(char16_t c){
|
||||
switch (c){
|
||||
case '0': return 0;
|
||||
case '1': return 1;
|
||||
@@ -129,7 +129,7 @@ int CharToInt(char c){
|
||||
}
|
||||
}
|
||||
|
||||
IToken* Lexer::LexNumber(char c){
|
||||
IToken* Lexer::LexNumber(char16_t c){
|
||||
long int_value = CharToInt(c);
|
||||
double float_value = 0;
|
||||
short decimal_index = 0;
|
||||
@@ -138,7 +138,7 @@ IToken* Lexer::LexNumber(char c){
|
||||
unsigned int start = this -> _position - 1;
|
||||
unsigned int length = 1;
|
||||
while (is_searching){
|
||||
char next = this -> Peek();
|
||||
char16_t next = this -> Peek();
|
||||
int next_val = CharToInt(next);
|
||||
if (next_val == -1){
|
||||
switch (next){
|
||||
@@ -183,7 +183,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
|
||||
auto start = this -> _position - 1;
|
||||
auto end = start;
|
||||
while (true){
|
||||
char next = this -> Peek();
|
||||
char16_t next = this -> Peek();
|
||||
if (next == '\0') break;
|
||||
if (isalpha(next) || next == '_'){
|
||||
this -> Next();
|
||||
@@ -194,7 +194,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
|
||||
}
|
||||
}
|
||||
|
||||
string s = this -> _scriptString.substr(start, end - start + 1);
|
||||
u16string s = this -> _scriptString.substr(start, end - start + 1);
|
||||
switch (HashedString::ConstHash(s.c_str())){
|
||||
case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
|
||||
case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
|
||||
@@ -219,7 +219,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
|
||||
}
|
||||
}
|
||||
|
||||
const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
|
||||
const unordered_map<char16_t, char16_t> ControlCharacters{ // NOLINT(cert-err58-cpp)
|
||||
{'0', '\0'},
|
||||
{'a', '\a'},
|
||||
{'b', '\b'},
|
||||
@@ -234,12 +234,12 @@ const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
|
||||
{'\\', '\\'},
|
||||
};
|
||||
|
||||
IToken* Lexer::LexString(char c){
|
||||
IToken* Lexer::LexString(char16_t c){
|
||||
auto start = this -> _position - 1;
|
||||
auto end = start;
|
||||
char last = c;
|
||||
char16_t last = c;
|
||||
while (true){
|
||||
char next = this -> Peek();
|
||||
char16_t next = this -> Peek();
|
||||
if (next == '\0') break;
|
||||
if (next == c && last != '\\') break;
|
||||
this -> Next();
|
||||
@@ -252,8 +252,8 @@ IToken* Lexer::LexString(char c){
|
||||
return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
|
||||
}
|
||||
|
||||
string s = this -> _scriptString.substr(start + 1, end - start);
|
||||
stringstream stream;
|
||||
u16string s = this -> _scriptString.substr(start + 1, end - start);
|
||||
std::basic_ostringstream<char16_t > stream;
|
||||
for (int i = 0; i < s.size(); i++){
|
||||
c = s[i];
|
||||
if (c == '\\'){
|
||||
|
||||
@@ -8,23 +8,23 @@
|
||||
using namespace std;
|
||||
|
||||
class Lexer {
|
||||
const string& _scriptString;
|
||||
const u16string& _scriptString;
|
||||
#ifdef TESTS_BUILD
|
||||
public:
|
||||
#endif
|
||||
unsigned int _position;
|
||||
unsigned int _scriptSize;
|
||||
char Peek();
|
||||
char Next();
|
||||
IToken* LexNext(char c);
|
||||
IToken* LexNumber(char c);
|
||||
char16_t Peek();
|
||||
char16_t Next();
|
||||
IToken* LexNext(char16_t c);
|
||||
IToken* LexNumber(char16_t c);
|
||||
IToken* LexIdentifierOrKeyword();
|
||||
IToken* LexString(char c);
|
||||
IToken* LexString(char16_t c);
|
||||
public:
|
||||
Script* ScriptData;
|
||||
|
||||
vector<const IToken*> Lex();
|
||||
explicit Lexer(const string& scriptString, class Script* script);
|
||||
explicit Lexer(const u16string& scriptString, class Script* script);
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ public:
|
||||
};
|
||||
|
||||
class LiteralStringExpression : public ParsedExpression{
|
||||
const string _value;
|
||||
const u16string _value;
|
||||
public:
|
||||
const ParsedExpressionKind GetKind() const final{
|
||||
return ParsedExpressionKind::LiteralString;
|
||||
@@ -111,7 +111,7 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
const string& GetValue() const{
|
||||
const u16string& GetValue() const{
|
||||
return _value;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -91,10 +91,10 @@ public:
|
||||
};
|
||||
|
||||
class StringToken : public IToken{
|
||||
const string _value;
|
||||
const u16string _value;
|
||||
public:
|
||||
|
||||
explicit StringToken(string value, unsigned int position, unsigned int length)
|
||||
explicit StringToken(u16string value, unsigned int position, unsigned int length)
|
||||
: IToken(position, length),
|
||||
_value(std::move(value))
|
||||
{
|
||||
@@ -104,7 +104,7 @@ public:
|
||||
return TokenKind::String;
|
||||
}
|
||||
|
||||
const string& GetValue() const{
|
||||
const u16string& GetValue() const{
|
||||
return _value;
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user