Move Lexer to u16string handling, for unicode support
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -104,9 +104,9 @@ public:
|
||||
};
|
||||
|
||||
class BoundLiteralStringExpression : public BoundExpression{
|
||||
const string _value;
|
||||
const u16string _value;
|
||||
public:
|
||||
BoundLiteralStringExpression(string value, unsigned int start, unsigned int length)
|
||||
BoundLiteralStringExpression(u16string value, unsigned int start, unsigned int length)
|
||||
: BoundExpression(start, length, make_shared<StringScriptType>(true, HashedString::ConstHash(value.c_str()))),
|
||||
_value(value)
|
||||
{
|
||||
@@ -116,7 +116,7 @@ public:
|
||||
return BoundExpressionKind ::LiteralString;
|
||||
}
|
||||
|
||||
const string GetValue() const{
|
||||
const u16string GetValue() const{
|
||||
return _value;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -96,7 +96,7 @@ shared_ptr<BooleanEvalValue> Evaluator::EvaluateBooleanBinary(const BoundBinaryE
|
||||
shared_ptr<StringEvalValue> Evaluator::EvaluateStringBinary(const BoundBinaryExpression* expression){
|
||||
if (expression->GetOperation() != BoundBinaryOperation::Concatenation)
|
||||
throw;
|
||||
std::ostringstream strs;
|
||||
std::basic_ostringstream<char16_t > strs;
|
||||
auto left = this -> EvaluateStringExpression(expression->GetLeft());
|
||||
strs << *left->EvaluateString();
|
||||
auto right = this -> EvaluateExpression(expression->GetRight());
|
||||
|
||||
@@ -20,7 +20,7 @@ extern "C" {
|
||||
return v->EvaluateBool();
|
||||
}
|
||||
|
||||
const char* EvaluateEvalValueString(EvalValue* v){
|
||||
const char16_t * EvaluateEvalValueString(EvalValue* v){
|
||||
return v->EvaluateString() -> c_str();
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ extern "C" {
|
||||
return new BooleanEvalValue(b);
|
||||
}
|
||||
|
||||
EvalValue* CreateStringEvalValue(const char* s){
|
||||
EvalValue* CreateStringEvalValue(const char16_t * s){
|
||||
return new StringEvalValue(s);
|
||||
}
|
||||
}
|
||||
@@ -47,11 +47,12 @@ extern "C" {
|
||||
|
||||
|
||||
TEST_CASE( "Evaluate String", "[integration]" ) {
|
||||
auto script = Script::Create("\"foo bar\"");
|
||||
auto script = Script::Create(u"\"foo bar\"");
|
||||
REQUIRE(!script->Diagnostics -> HasErrors());
|
||||
script->Evaluate();
|
||||
auto lastValue = script->GetLastValue();
|
||||
REQUIRE(std::strcmp(EvaluateEvalValueString(lastValue), "foo bar") == 0);
|
||||
auto s = u16string(EvaluateEvalValueString(lastValue));
|
||||
REQUIRE(s == u"foo bar");
|
||||
delete script;
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ public:
|
||||
virtual bool EvaluateBool() const{
|
||||
throw EvaluationException("Can't evaluate this EvalValue as bool.");
|
||||
}
|
||||
virtual const std::string* EvaluateString() const {
|
||||
virtual const std::u16string* EvaluateString() const {
|
||||
throw EvaluationException("Can't evaluate this EvalValue as string.");
|
||||
}
|
||||
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
using namespace std;
|
||||
|
||||
class StringEvalValue : public EvalValue{
|
||||
string _value;
|
||||
u16string _value;
|
||||
size_t _hash;
|
||||
public:
|
||||
explicit StringEvalValue(string s){
|
||||
explicit StringEvalValue(u16string s){
|
||||
_value = move(s);
|
||||
_hash = HashedString::ConstHash (_value.c_str());
|
||||
}
|
||||
@@ -27,7 +27,7 @@ public:
|
||||
return this->_hash == b->GetHashCode();
|
||||
};
|
||||
|
||||
const string* EvaluateString() const final{
|
||||
const u16string* EvaluateString() const final{
|
||||
return &_value;
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ public:
|
||||
shared_ptr<EvalValue> IndexValue(EvalValue* val) final{
|
||||
// Porygon is 1-indexed, so we convert to that.
|
||||
auto l = val->EvaluateInteger() - 1;
|
||||
return make_shared<StringEvalValue>(string(1, _value[l]));
|
||||
return make_shared<StringEvalValue>(u16string(1, _value[l]));
|
||||
}
|
||||
|
||||
std::size_t GetHashCode() final{
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
#include "Lexer.hpp"
|
||||
|
||||
Lexer::Lexer(const string& scriptString, class Script* script)
|
||||
Lexer::Lexer(const u16string& scriptString, class Script* script)
|
||||
: _scriptString(scriptString)
|
||||
{
|
||||
this->_scriptSize = scriptString.size();
|
||||
@@ -29,19 +29,19 @@ vector<const IToken*> Lexer::Lex() {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
char Lexer::Peek(){
|
||||
char16_t Lexer::Peek(){
|
||||
if (Lexer::_position >= this -> _scriptSize)
|
||||
return '\0';
|
||||
return this -> _scriptString.at(Lexer::_position);
|
||||
}
|
||||
|
||||
char Lexer::Next(){
|
||||
char next = Peek();
|
||||
char16_t Lexer::Next(){
|
||||
char16_t next = Peek();
|
||||
Lexer::_position++;
|
||||
return next;
|
||||
}
|
||||
|
||||
IToken* Lexer::LexNext(char c){
|
||||
IToken* Lexer::LexNext(char16_t c){
|
||||
switch (c) {
|
||||
case '\0':
|
||||
return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
|
||||
@@ -113,7 +113,7 @@ IToken* Lexer::LexNext(char c){
|
||||
}
|
||||
}
|
||||
|
||||
int CharToInt(char c){
|
||||
int CharToInt(char16_t c){
|
||||
switch (c){
|
||||
case '0': return 0;
|
||||
case '1': return 1;
|
||||
@@ -129,7 +129,7 @@ int CharToInt(char c){
|
||||
}
|
||||
}
|
||||
|
||||
IToken* Lexer::LexNumber(char c){
|
||||
IToken* Lexer::LexNumber(char16_t c){
|
||||
long int_value = CharToInt(c);
|
||||
double float_value = 0;
|
||||
short decimal_index = 0;
|
||||
@@ -138,7 +138,7 @@ IToken* Lexer::LexNumber(char c){
|
||||
unsigned int start = this -> _position - 1;
|
||||
unsigned int length = 1;
|
||||
while (is_searching){
|
||||
char next = this -> Peek();
|
||||
char16_t next = this -> Peek();
|
||||
int next_val = CharToInt(next);
|
||||
if (next_val == -1){
|
||||
switch (next){
|
||||
@@ -183,7 +183,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
|
||||
auto start = this -> _position - 1;
|
||||
auto end = start;
|
||||
while (true){
|
||||
char next = this -> Peek();
|
||||
char16_t next = this -> Peek();
|
||||
if (next == '\0') break;
|
||||
if (isalpha(next) || next == '_'){
|
||||
this -> Next();
|
||||
@@ -194,7 +194,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
|
||||
}
|
||||
}
|
||||
|
||||
string s = this -> _scriptString.substr(start, end - start + 1);
|
||||
u16string s = this -> _scriptString.substr(start, end - start + 1);
|
||||
switch (HashedString::ConstHash(s.c_str())){
|
||||
case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
|
||||
case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
|
||||
@@ -219,7 +219,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
|
||||
}
|
||||
}
|
||||
|
||||
const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
|
||||
const unordered_map<char16_t, char16_t> ControlCharacters{ // NOLINT(cert-err58-cpp)
|
||||
{'0', '\0'},
|
||||
{'a', '\a'},
|
||||
{'b', '\b'},
|
||||
@@ -234,12 +234,12 @@ const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
|
||||
{'\\', '\\'},
|
||||
};
|
||||
|
||||
IToken* Lexer::LexString(char c){
|
||||
IToken* Lexer::LexString(char16_t c){
|
||||
auto start = this -> _position - 1;
|
||||
auto end = start;
|
||||
char last = c;
|
||||
char16_t last = c;
|
||||
while (true){
|
||||
char next = this -> Peek();
|
||||
char16_t next = this -> Peek();
|
||||
if (next == '\0') break;
|
||||
if (next == c && last != '\\') break;
|
||||
this -> Next();
|
||||
@@ -252,8 +252,8 @@ IToken* Lexer::LexString(char c){
|
||||
return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
|
||||
}
|
||||
|
||||
string s = this -> _scriptString.substr(start + 1, end - start);
|
||||
stringstream stream;
|
||||
u16string s = this -> _scriptString.substr(start + 1, end - start);
|
||||
std::basic_ostringstream<char16_t > stream;
|
||||
for (int i = 0; i < s.size(); i++){
|
||||
c = s[i];
|
||||
if (c == '\\'){
|
||||
|
||||
@@ -8,23 +8,23 @@
|
||||
using namespace std;
|
||||
|
||||
class Lexer {
|
||||
const string& _scriptString;
|
||||
const u16string& _scriptString;
|
||||
#ifdef TESTS_BUILD
|
||||
public:
|
||||
#endif
|
||||
unsigned int _position;
|
||||
unsigned int _scriptSize;
|
||||
char Peek();
|
||||
char Next();
|
||||
IToken* LexNext(char c);
|
||||
IToken* LexNumber(char c);
|
||||
char16_t Peek();
|
||||
char16_t Next();
|
||||
IToken* LexNext(char16_t c);
|
||||
IToken* LexNumber(char16_t c);
|
||||
IToken* LexIdentifierOrKeyword();
|
||||
IToken* LexString(char c);
|
||||
IToken* LexString(char16_t c);
|
||||
public:
|
||||
Script* ScriptData;
|
||||
|
||||
vector<const IToken*> Lex();
|
||||
explicit Lexer(const string& scriptString, class Script* script);
|
||||
explicit Lexer(const u16string& scriptString, class Script* script);
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ public:
|
||||
};
|
||||
|
||||
class LiteralStringExpression : public ParsedExpression{
|
||||
const string _value;
|
||||
const u16string _value;
|
||||
public:
|
||||
const ParsedExpressionKind GetKind() const final{
|
||||
return ParsedExpressionKind::LiteralString;
|
||||
@@ -111,7 +111,7 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
const string& GetValue() const{
|
||||
const u16string& GetValue() const{
|
||||
return _value;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -91,10 +91,10 @@ public:
|
||||
};
|
||||
|
||||
class StringToken : public IToken{
|
||||
const string _value;
|
||||
const u16string _value;
|
||||
public:
|
||||
|
||||
explicit StringToken(string value, unsigned int position, unsigned int length)
|
||||
explicit StringToken(u16string value, unsigned int position, unsigned int length)
|
||||
: IToken(position, length),
|
||||
_value(std::move(value))
|
||||
{
|
||||
@@ -104,7 +104,7 @@ public:
|
||||
return TokenKind::String;
|
||||
}
|
||||
|
||||
const string& GetValue() const{
|
||||
const u16string& GetValue() const{
|
||||
return _value;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,18 +1,29 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <iterator>
|
||||
#include <locale>
|
||||
#include <unordered_map>
|
||||
#include <codecvt>
|
||||
#include "Script.hpp"
|
||||
#include "Parser/Lexer.hpp"
|
||||
#include "Parser/Parser.hpp"
|
||||
#include "Binder/Binder.hpp"
|
||||
|
||||
Script* Script::Create(const string& script) {
|
||||
Script* Script::Create(const u16string& script) {
|
||||
auto s = new Script();
|
||||
s -> Parse(script);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::u16string To_UTF16(const string &s)
|
||||
{
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> conv;
|
||||
return conv.from_bytes(s);
|
||||
}
|
||||
Script *Script::Create(const string &script) {
|
||||
return Script::Create(To_UTF16(script));
|
||||
}
|
||||
|
||||
Script::Script() {
|
||||
Diagnostics = new DiagnosticsHolder();
|
||||
_evaluator = new Evaluator(this);
|
||||
@@ -32,7 +43,7 @@ Script::~Script() {
|
||||
delete this->_scriptVariables;
|
||||
}
|
||||
|
||||
void Script::Parse(const string& script) {
|
||||
void Script::Parse(const u16string& script) {
|
||||
auto lexer = Lexer(script, this);
|
||||
auto lexResult = lexer.Lex();
|
||||
auto parser = Parser(lexResult, this);
|
||||
@@ -54,11 +65,11 @@ void Script::Parse(const string& script) {
|
||||
delete parseResult;
|
||||
}
|
||||
|
||||
EvalValue *Script::GetVariable(const string &key) {
|
||||
EvalValue *Script::GetVariable(const u16string &key) {
|
||||
return _scriptVariables -> at(HashedString(key).GetHash()).get();
|
||||
}
|
||||
|
||||
bool Script::HasVariable(const string &key) {
|
||||
bool Script::HasVariable(const u16string &key) {
|
||||
auto f = _scriptVariables->find(HashedString(key).GetHash());
|
||||
return f != _scriptVariables->end();
|
||||
}
|
||||
@@ -67,18 +78,19 @@ EvalValue *Script::GetLastValue() {
|
||||
return _evaluator->GetLastValue();
|
||||
}
|
||||
|
||||
bool Script::HasFunction(const string &key) {
|
||||
bool Script::HasFunction(const u16string &key) {
|
||||
auto f = _scriptVariables->find(HashedString(key).GetHash());
|
||||
return f != _scriptVariables->end() && f.operator->()->second->GetTypeClass() == TypeClass ::Function;
|
||||
}
|
||||
|
||||
shared_ptr<EvalValue> Script::CallFunction(const string &key, const vector<EvalValue *>& variables) {
|
||||
shared_ptr<EvalValue> Script::CallFunction(const u16string &key, const vector<EvalValue *>& variables) {
|
||||
auto var = (ScriptFunctionEvalValue*)GetVariable(key);
|
||||
return this->_evaluator->EvaluateFunction(var, variables);
|
||||
}
|
||||
|
||||
|
||||
extern "C" {
|
||||
Script* CreateScript(char * s){
|
||||
Script* CreateScript(char16_t * s){
|
||||
return Script::Create(s);
|
||||
}
|
||||
|
||||
@@ -90,19 +102,19 @@ extern "C" {
|
||||
return script->GetLastValue();
|
||||
}
|
||||
|
||||
bool HasVariable(Script* script, const char* key){
|
||||
bool HasVariable(Script* script, const char16_t* key){
|
||||
return script->HasVariable(key);
|
||||
}
|
||||
|
||||
EvalValue* GetVariable(Script* script, const char* key){
|
||||
EvalValue* GetVariable(Script* script, const char16_t* key){
|
||||
return script->GetVariable(key);
|
||||
}
|
||||
|
||||
bool HasFunction(Script* script, const char* key){
|
||||
bool HasFunction(Script* script, const char16_t* key){
|
||||
return script->HasFunction(key);
|
||||
}
|
||||
|
||||
EvalValue* CallFunction(Script* script, const char* key, EvalValue* parameters[], int parameterCount){
|
||||
EvalValue* CallFunction(Script* script, const char16_t* key, EvalValue* parameters[], int parameterCount){
|
||||
std::vector<EvalValue*> v(parameters, parameters + parameterCount);
|
||||
return script->CallFunction(key, v).get();
|
||||
}
|
||||
|
||||
@@ -23,8 +23,9 @@ class Script {
|
||||
shared_ptr<ScriptType> _returnType;
|
||||
|
||||
explicit Script();
|
||||
void Parse(const string& script);
|
||||
void Parse(const u16string& script);
|
||||
public:
|
||||
static Script* Create(const u16string& script);
|
||||
static Script* Create(const string& script);
|
||||
DiagnosticsHolder* Diagnostics;
|
||||
|
||||
@@ -42,11 +43,11 @@ public:
|
||||
|
||||
EvalValue* GetLastValue();
|
||||
|
||||
EvalValue* GetVariable(const string& key);
|
||||
bool HasVariable(const string& key);
|
||||
EvalValue* GetVariable(const u16string& key);
|
||||
bool HasVariable(const u16string& key);
|
||||
|
||||
shared_ptr<EvalValue> CallFunction(const string& key, const vector<EvalValue*>& variables);
|
||||
bool HasFunction(const string& key);
|
||||
shared_ptr<EvalValue> CallFunction(const u16string& key, const vector<EvalValue*>& variables);
|
||||
bool HasFunction(const u16string& key);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -7,12 +7,22 @@
|
||||
class HashedString{
|
||||
const uint32_t _hash;
|
||||
public:
|
||||
explicit HashedString(const std::string& s) : _hash(ConstHash(s.c_str())){
|
||||
explicit HashedString(const std::u16string& s) : _hash(ConstHash(s.c_str())){
|
||||
}
|
||||
explicit HashedString(char16_t const *input) : _hash(ConstHash(input)){
|
||||
}
|
||||
|
||||
explicit HashedString(char const *input) : _hash(ConstHash(input)){
|
||||
}
|
||||
|
||||
HashedString(const HashedString& b) = default;
|
||||
|
||||
static uint32_t constexpr ConstHash(char16_t const *input) {
|
||||
return *input ?
|
||||
static_cast<uint32_t>(*input) + 33 * ConstHash(input + 1) :
|
||||
5381;
|
||||
}
|
||||
|
||||
static uint32_t constexpr ConstHash(char const *input) {
|
||||
return *input ?
|
||||
static_cast<uint32_t>(*input) + 33 * ConstHash(input + 1) :
|
||||
|
||||
Reference in New Issue
Block a user