Initial Commit

2020-09-17 21:59:10 +02:00
commit 22435ca3fb
130 changed files with 91018 additions and 0 deletions
--- a/server/src/Native/angelscript/source/as_tokenizer.cpp
+++ b/server/src/Native/angelscript/source/as_tokenizer.cpp
@@ -0,0 +1,475 @@
+/*
+   AngelCode Scripting Library
+   Copyright (c) 2003-2015 Andreas Jonsson
+
+   This software is provided 'as-is', without any express or implied 
+   warranty. In no event will the authors be held liable for any 
+   damages arising from the use of this software.
+
+   Permission is granted to anyone to use this software for any 
+   purpose, including commercial applications, and to alter it and 
+   redistribute it freely, subject to the following restrictions:
+
+   1. The origin of this software must not be misrepresented; you 
+      must not claim that you wrote the original software. If you use
+      this software in a product, an acknowledgment in the product 
+      documentation would be appreciated but is not required.
+
+   2. Altered source versions must be plainly marked as such, and 
+      must not be misrepresented as being the original software.
+
+   3. This notice may not be removed or altered from any source 
+      distribution.
+
+   The original version of this library can be located at:
+   http://www.angelcode.com/angelscript/
+
+   Andreas Jonsson
+   andreas@angelcode.com
+*/
+
+
+//
+// as_tokenizer.cpp
+//
+// This class identifies tokens from the script code
+//
+
+#include "as_config.h"
+#include "as_scriptengine.h"
+#include "as_tokenizer.h"
+#include "as_tokendef.h"
+
+#if !defined(AS_NO_MEMORY_H)
+#include <memory.h>
+#endif
+#include <string.h> // strcmp()
+
+BEGIN_AS_NAMESPACE
+
+asCTokenizer::asCTokenizer()
+{
+	engine = 0;
+	memset(keywordTable, 0, sizeof(keywordTable));
+
+	// Initialize the jump table
+	for( asUINT n = 0; n < numTokenWords; n++ )
+	{
+		const sTokenWord& current = tokenWords[n];
+		unsigned char start = current.word[0];
+
+		// Create new jump table entry if none exists
+		if( !keywordTable[start] )
+		{
+			// Surely there won't ever be more than 32 keywords starting with
+			// the same character. Right?
+			keywordTable[start] = asNEWARRAY(const sTokenWord*, 32);
+			memset(keywordTable[start], 0, sizeof(sTokenWord*)*32);
+		}
+
+		// Add the token sorted from longest to shortest so
+		// we check keywords greedily.
+		const sTokenWord** tok = keywordTable[start];
+		unsigned insert = 0, index = 0;
+		while( tok[index] )
+		{
+			if(tok[index]->wordLength >= current.wordLength)
+				++insert;
+			++index;
+		}
+
+		while( index > insert )
+		{
+			tok[index] = tok[index - 1];
+			--index;
+		}
+
+		tok[insert] = &current;
+	}
+}
+
+asCTokenizer::~asCTokenizer()
+{
+	// Deallocate the jump table
+	for( asUINT n = 0; n < 256; n++ )
+	{
+		if( keywordTable[n] )
+			asDELETEARRAY(keywordTable[n]);
+	}
+}
+
+// static
+const char *asCTokenizer::GetDefinition(int tokenType)
+{
+	if( tokenType == ttUnrecognizedToken			) return "<unrecognized token>";
+	if( tokenType == ttEnd							) return "<end of file>";
+	if( tokenType == ttWhiteSpace					) return "<white space>";
+	if( tokenType == ttOnelineComment				) return "<one line comment>";
+	if( tokenType == ttMultilineComment				) return "<multiple lines comment>";
+	if( tokenType == ttIdentifier					) return "<identifier>";
+	if( tokenType == ttIntConstant					) return "<integer constant>";
+	if( tokenType == ttFloatConstant				) return "<float constant>";
+	if( tokenType == ttDoubleConstant				) return "<double constant>";
+	if( tokenType == ttStringConstant				) return "<string constant>";
+	if( tokenType == ttMultilineStringConstant      ) return "<multiline string constant>";
+	if( tokenType == ttNonTerminatedStringConstant	) return "<nonterminated string constant>";
+	if( tokenType == ttBitsConstant					) return "<bits constant>";
+	if( tokenType == ttHeredocStringConstant		) return "<heredoc string constant>";
+
+	for( asUINT n = 0; n < numTokenWords; n++ )
+		if( tokenWords[n].tokenType == tokenType )
+			return tokenWords[n].word;
+
+	return 0;
+}
+
+bool asCTokenizer::IsDigitInRadix(char ch, int radix) const
+{
+	if( ch >= '0' && ch <= '9' ) return (ch -= '0') < radix;
+	if( ch >= 'A' && ch <= 'Z' ) return (ch -= 'A'-10) < radix;
+	if( ch >= 'a' && ch <= 'z' ) return (ch -= 'a'-10) < radix;
+	return false;
+}
+
+eTokenType asCTokenizer::GetToken(const char *source, size_t sourceLength, size_t *tokenLength, asETokenClass *tc) const
+{
+	asASSERT(source != 0);
+	asASSERT(tokenLength != 0);
+
+	eTokenType tokenType;
+	size_t     tlen;
+	asETokenClass t = ParseToken(source, sourceLength, tlen, tokenType);
+	if( tc          ) *tc          = t;
+	if( tokenLength ) *tokenLength = tlen;
+
+	return tokenType;
+}
+
+asETokenClass asCTokenizer::ParseToken(const char *source, size_t sourceLength, size_t &tokenLength, eTokenType &tokenType) const
+{
+	if( IsWhiteSpace(source, sourceLength, tokenLength, tokenType) ) return asTC_WHITESPACE;
+	if( IsComment(source, sourceLength, tokenLength, tokenType)    ) return asTC_COMMENT;
+	if( IsConstant(source, sourceLength, tokenLength, tokenType)   ) return asTC_VALUE;
+	if( IsIdentifier(source, sourceLength, tokenLength, tokenType) ) return asTC_IDENTIFIER;
+	if( IsKeyWord(source, sourceLength, tokenLength, tokenType)    ) return asTC_KEYWORD;
+
+	// If none of the above this is an unrecognized token
+	// We can find the length of the token by advancing
+	// one step and trying to identify a token there
+	tokenType   = ttUnrecognizedToken;
+	tokenLength = 1;
+
+	return asTC_UNKNOWN;
+}
+
+bool asCTokenizer::IsWhiteSpace(const char *source, size_t sourceLength, size_t &tokenLength, eTokenType &tokenType) const
+{
+	// Treat UTF8 byte-order-mark (EF BB BF) as whitespace
+	if( sourceLength >= 3 && 
+		asBYTE(source[0]) == 0xEFu &&
+		asBYTE(source[1]) == 0xBBu &&
+		asBYTE(source[2]) == 0xBFu )
+	{
+		tokenType   = ttWhiteSpace;
+		tokenLength = 3;
+		return true;
+	}
+
+	// Group all other white space characters into one
+	size_t n;
+	int numWsChars = (int)strlen(whiteSpace);
+	for( n = 0; n < sourceLength; n++ )
+	{
+		bool isWhiteSpace = false;
+		for( int w = 0; w < numWsChars; w++ )
+		{
+			if( source[n] == whiteSpace[w] )
+			{
+				isWhiteSpace = true;
+				break;
+			}
+		}
+		if( !isWhiteSpace )	break;
+	}
+
+	if( n > 0 )
+	{
+		tokenType   = ttWhiteSpace;
+		tokenLength = n;
+		return true;
+	}
+
+	return false;
+}
+
+bool asCTokenizer::IsComment(const char *source, size_t sourceLength, size_t &tokenLength, eTokenType &tokenType) const
+{
+	if( sourceLength < 2 )
+		return false;
+
+	if( source[0] != '/' )
+		return false;
+
+	if( source[1] == '/' )
+	{
+		// One-line comment
+
+		// Find the length
+		size_t n;
+		for( n = 2; n < sourceLength; n++ )
+		{
+			if( source[n] == '\n' )
+				break;
+		}
+
+		tokenType   = ttOnelineComment;
+		tokenLength = n < sourceLength ? n+1 : n;
+
+		return true;
+	}
+
+	if( source[1] == '*' )
+	{
+		// Multi-line comment
+
+		// Find the length
+		size_t n;
+		for( n = 2; n < sourceLength-1; )
+		{
+			if( source[n++] == '*' && source[n] == '/' )
+				break;
+		}
+
+		tokenType   = ttMultilineComment;
+		tokenLength = n+1;
+
+		return true;
+	}
+
+	return false;
+}
+
+bool asCTokenizer::IsConstant(const char *source, size_t sourceLength, size_t &tokenLength, eTokenType &tokenType) const
+{
+	// Starting with number
+	if( (source[0] >= '0' && source[0] <= '9') || (source[0] == '.' && sourceLength > 1 && source[1] >= '0' && source[1] <= '9') )
+	{
+		// Is it a based number?
+		if( source[0] == '0' && sourceLength > 1 )
+		{
+			// Determine the radix for the constant
+			int radix = 0;
+			switch( source[1] )
+ 			{
+			case 'b': case 'B': radix =  2; break;
+			case 'o': case 'O': radix =  8; break;
+			case 'd': case 'D': radix = 10; break;
+			case 'x': case 'X': radix = 16; break;
+ 			}
+
+			if( radix )
+			{
+				size_t n;
+				for( n = 2; n < sourceLength; n++ )
+					if( !IsDigitInRadix(source[n], radix) )
+						break;
+
+				tokenType   = ttBitsConstant;
+				tokenLength = n;
+				return true;
+			}
+		}
+
+		size_t n;
+		for( n = 0; n < sourceLength; n++ )
+		{
+			if( source[n] < '0' || source[n] > '9' )
+				break;
+		}
+
+		if( n < sourceLength && (source[n] == '.' || source[n] == 'e' || source[n] == 'E') )
+		{
+			if( source[n] == '.' )
+			{
+				n++;
+				for( ; n < sourceLength; n++ )
+				{
+					if( source[n] < '0' || source[n] > '9' )
+						break;
+				}
+			}
+
+			if( n < sourceLength && (source[n] == 'e' || source[n] == 'E') )
+			{
+				n++;
+				if( n < sourceLength && (source[n] == '-' || source[n] == '+') )
+					n++;
+
+				for( ; n < sourceLength; n++ )
+				{
+					if( source[n] < '0' || source[n] > '9' )
+						break;
+				}
+			}
+
+			if( n < sourceLength && (source[n] == 'f' || source[n] == 'F') )
+			{
+				tokenType   = ttFloatConstant;
+				tokenLength = n + 1;
+			}
+			else
+			{
+#ifdef AS_USE_DOUBLE_AS_FLOAT
+				tokenType   = ttFloatConstant;
+#else
+				tokenType   = ttDoubleConstant;
+#endif
+				tokenLength = n;
+			}
+			return true;
+		}
+
+		tokenType   = ttIntConstant;
+		tokenLength = n;
+		return true;
+	}
+
+	// String constant between double or single quotes
+	if( source[0] == '"' || source[0] == '\'' )
+	{
+		// Is it a normal string constant or a heredoc string constant?
+		if( sourceLength >= 6 && source[0] == '"' && source[1] == '"' && source[2] == '"' )
+		{
+			// Heredoc string constant (spans multiple lines, no escape sequences)
+
+			// Find the length
+			size_t n;
+			for( n = 3; n < sourceLength-2; n++ )
+			{
+				if( source[n] == '"' && source[n+1] == '"' && source[n+2] == '"' )
+					break;
+			}
+
+			tokenType   = ttHeredocStringConstant;
+			tokenLength = n+3;
+		}
+		else
+		{
+			// Normal string constant
+			tokenType = ttStringConstant;
+			char quote = source[0];
+			bool evenSlashes = true;
+			size_t n;
+			for( n = 1; n < sourceLength; n++ )
+			{
+#ifdef AS_DOUBLEBYTE_CHARSET
+				// Double-byte characters are only allowed for ASCII
+				if( (source[n] & 0x80) && engine->ep.scanner == 0 )
+				{
+					// This is a leading character in a double byte character, 
+					// include both in the string and continue processing.
+					n++;
+					continue;
+				}
+#endif
+
+				if( source[n] == '\n' ) 
+					tokenType = ttMultilineStringConstant;
+				if( source[n] == quote && evenSlashes )
+				{
+					tokenLength = n+1;
+					return true;
+				}
+				if( source[n] == '\\' ) evenSlashes = !evenSlashes; else evenSlashes = true;
+			}
+
+			tokenType   = ttNonTerminatedStringConstant;
+			tokenLength = n;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+bool asCTokenizer::IsIdentifier(const char *source, size_t sourceLength, size_t &tokenLength, eTokenType &tokenType) const
+{
+	// char is unsigned by default on some architectures, e.g. ppc and arm
+	// Make sure the value is always treated as signed in the below comparisons
+	signed char c = source[0];
+
+	// Starting with letter or underscore
+	if( (c >= 'a' && c <= 'z') ||
+		(c >= 'A' && c <= 'Z') ||
+		c == '_' ||
+		(c < 0 && engine->ep.allowUnicodeIdentifiers) )
+	{
+		tokenType   = ttIdentifier;
+		tokenLength = 1;
+
+		for( size_t n = 1; n < sourceLength; n++ )
+		{
+			c = source[n];
+			if( (c >= 'a' && c <= 'z') ||
+				(c >= 'A' && c <= 'Z') ||
+				(c >= '0' && c <= '9') ||
+				c == '_' ||
+				(c < 0 && engine->ep.allowUnicodeIdentifiers) )
+				tokenLength++;
+			else
+				break;
+		}
+
+		// Make sure the identifier isn't a reserved keyword
+		if( IsKeyWord(source, tokenLength, tokenLength, tokenType) )
+			return false;
+
+		return true;
+	}
+
+	return false;
+}
+
+bool asCTokenizer::IsKeyWord(const char *source, size_t sourceLength, size_t &tokenLength, eTokenType &tokenType) const
+{
+	unsigned char start = source[0];
+	const sTokenWord **ptr = keywordTable[start];
+
+	if( !ptr )
+		return false;
+
+	for( ; *ptr; ++ptr )
+	{
+		size_t wlen = (*ptr)->wordLength;
+		if( sourceLength >= wlen && strncmp(source, (*ptr)->word, wlen) == 0 )
+		{
+			// Tokens that end with a character that can be part of an 
+			// identifier require an extra verification to guarantee that 
+			// we don't split an identifier token, e.g. the "!is" token 
+			// and the tokens "!" and "isTrue" in the "!isTrue" expression.
+			if( wlen < sourceLength &&
+				((source[wlen-1] >= 'a' && source[wlen-1] <= 'z') ||
+				 (source[wlen-1] >= 'A' && source[wlen-1] <= 'Z') ||
+				 (source[wlen-1] >= '0' && source[wlen-1] <= '9')) &&
+				((source[wlen] >= 'a' && source[wlen] <= 'z') ||
+				 (source[wlen] >= 'A' && source[wlen] <= 'Z') ||
+				 (source[wlen] >= '0' && source[wlen] <= '9') ||
+				 (source[wlen] == '_')) )
+			{
+				// The token doesn't really match, even though 
+				// the start of the source matches the token
+				continue;
+			}
+
+			tokenType = (*ptr)->tokenType;
+			tokenLength = wlen;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+END_AS_NAMESPACE
+