Initial commit, implements parser.

2021-05-15 16:53:53 +02:00
commit f1af568cb8
10 changed files with 1096 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target
+Cargo.lock
+.idea/
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "seraph_script"
+version = "0.1.0"
+authors = ["Deukhoofd <Deukhoofd@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+itertools = "0.10.0"
--- a/grammar.ebnf
+++ b/grammar.ebnf
@@ -0,0 +1,99 @@
+letter              ::= 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G'
+                    | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N'
+                    | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U'
+                    | 'V' | 'W' | 'X' | 'Y' | 'Z' | 'a' | 'b'
+                    | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i'
+                    | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p'
+                    | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w'
+                    | 'x' | 'y' | 'z' ;
+
+all_characters      ::= ;
+digit               ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ;
+hexadecimal_digit   ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' |
+                        '8' | '9' | 'A' | 'a' | 'B' | 'b' | 'C' | 'c' |
+                        'D' | 'd' | 'E' | 'e' | 'F' | 'f';
+octal_digit         ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7';
+binary_digit        ::= '0' | '1';
+
+character           ::= letter | digit | '_' ;
+
+identifier          ::= (letter | '_') { character };
+
+float               ::= digit  { digit } '.' digit {digit};
+dec_integer         ::= ['0D' | '0d'] digit {digit};
+hex_integer         ::= ('0X'|'0x') hexadecimal_digit {hexadecimal_digit};
+octal_integer       ::= ('0O'|'0o') octal_digit {octal_digit};
+binary_integer      ::= ('0B'|'0b') binary_digit {binary_digit};
+integer             ::= dec_integer | hex_integer | octal_integer | binary_integer;
+number              ::= integer | float;
+
+string              ::= ('\'' {all_characters} '\'' | '\"' {all_characters} '\"' | '\"\"\"' {all_characters} '\"\"\"');
+
+assignop            ::= '=' | '+=' | '-=' | '*=' | '/=' | '|=' | '&=' | '^=' | '%=' | '**=' | '<<=' | '>>=' | '>>>=';
+logicop             ::= '&&' | '||' | '^^' | 'and' | 'or' | 'xor';
+compop              ::= '==' | '!=' | '<' | '<=' | '>' | '>=' | 'is' | '!is';
+mathop              ::= '+' | '-' | '*' | '/' | '%' | '**';
+bitop               ::= '&' | '|' | '^' | '<<' | '>>' | '>>>';
+
+
+primtype            ::= 'void' | 'int' | 'int8' | 'int16' | 'int32' | 'int64' | 'uint' | 'uint8' | 'uint16' |
+                        'uint32' | 'uint64' | 'float' | 'double' | 'bool';
+datatype            ::= (identifier | primtype | 'auto');
+scope               ::= ['::'] {identifier '::'} [identifier ['<' type {',' type} '>'] '::'];
+type                ::= ['const'] scope datatype ['<' type {',' type} '>'] { ('[' ']') | ('@' ['const']) };
+# ternary is defined further below due to a circular dependency: ternary->expr->exprterm->initlist->assign
+assign              ::= ternary [ assignop assign ];
+initlist            ::= '{' [assign | initlist] {',' [assign | initlist]} '}';
+exprpreop           ::= '-' | '+' | '!' | '++' | '--' | '~' | '@';
+arglist             ::= '(' [identifier ':'] assign {',' [identifier ':'] assign} ')';
+funccall            ::= scope identifier arglist;
+constructcall       ::= type arglist;
+varaccess           ::= scope | identifier;
+cast                ::= 'cast' '<' type '>' '(' assign ')';
+literal             ::= number | string | 'true' | 'false' | 'null';
+typemod             ::= ['&' ['in' | 'out' | 'inout']];
+# statblock is defined further below, as statements are higher level than expressions.
+lambda              ::= 'function' '(' [[type typemod] identifier {',' [type typemod] identifier}] ')' statblock;
+
+exprvalue           ::= 'void' | constructcall | funccall | varaccess | cast | literal | '(' assign ')' | lambda;
+exprpostop          ::= ('.' (funccall | identifier)) | ('[' [identifier ':'] assign {',' [identifier ':' assign} ']') | arglist | '++' | '--';
+exprterm            ::= ([type '='] initlist) | ({exprpreop} exprvalue {exprpostop});
+expr                ::= exprterm {(mathop | compop | logicop | bitop) exprterm};
+ternary             ::= expr ['?' assign : assign];
+
+return              ::= 'return' [assign] ';';
+exprstat            ::= assign ';';
+continue            ::= 'continue' ';';
+break               ::= 'break' ';';
+
+# As these are all statements using other statements, they use the statement and statblock types defined further below.
+if                  ::= 'if' '(' assign ')' statement ['else' statement];
+for                 ::= 'for' '(' (var | exprstat) exprstat [assign {',' assign}] ')' statement;
+while               ::= 'while' '(' assign ')' statement;
+dowhile             ::= 'do' statement 'while' '(' assign ')' ';';
+try                 ::= 'try' statblock 'catch' statblock;
+case                ::= (('case' expr) | 'default') ':' {statement};
+switch              ::= 'switch' '(' assign ')' '{' {case} '}';
+
+statement           ::= (if | for | while | return | statblock | break | continue | dowhile | switch | exprstat | try );
+var                 ::= ['private'|'protected'] type identifier [( '=' (initlist | expr)) | arglist] {',' identifier [( '=' (initlist | expr)) | arglist]} ';';
+statblock           ::= '{' {var | statement} '}';
+
+funcattr            ::= {'override' | 'final' | 'explicit' | 'property'};
+paramlist           ::= '(' ['void' | (type typemod [identifier] ['=' expr] {',' type typemod [identifier] ['=' expr]})] ')';
+
+virtprop            ::= ['private' | 'protected'] type ['&'] identifier '{' {('get' | 'set') ['const'] funcattr (statblock | ';')} '}';
+func                ::= {'shared' | 'external'} ['private' | 'protected'] [((type ['&']) | '~')] identifier paramlist ['const'] funcattr (';' | statblock);
+funcdef             ::= {'external' | 'shared'} 'funcdef' type ['&'] identifier paramlist ';'
+class               ::= {'shared' | 'abstract' | 'final' | 'external'} 'class' identifier
+                        (';' | ([':' identifier {',' identifier}] '{' {virtprop | func | var | funcdef | class} '}'));
+mixin               ::= 'mixin' class;
+enum                ::= {'shared' | 'external'} 'enum' identifier [ ':' primtype ] (';' | ('{' identifier ['=' expr] {',' identifier ['=' expr]} '}'));
+import              ::= 'import' type ['&'] identifier paramlist funcattr 'from' string ';';
+typedef             ::= 'typedef' (primtype | identifier) identifier ';';
+
+interfacemethod     ::= type ['&'] identifier paramlist ['const'] ';';
+interface           ::= {'external' | 'shared'} 'interface' identifier (';' | ([':' identifier {',' identifier}] '{' {virtprop | interfacemethod} '}'));
+
+namespace           ::= 'namespace' identifier '{' script '}';
+script              ::= {import | enum | typedef | class | mixin | interface | funcdef | virtprop | var | func | namespace | ';'};
--- a/src/defines.rs
+++ b/src/defines.rs
@@ -0,0 +1,4 @@
+/// The size integers use internally to store literals and do compile time calculations.
+pub type LiteralInt = i64;
+/// The size floating point numbers use internally to store literals and do compile time calculations.
+pub type LiteralFloat = f64;
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -0,0 +1,5 @@
+#![feature(concat_idents)]
+#![feature(exclusive_range_pattern)]
+
+pub(crate) mod defines;
+pub mod parsing;
--- a/src/parsing/lex_numerical.rs
+++ b/src/parsing/lex_numerical.rs
@@ -0,0 +1,242 @@
+use crate::defines::{LiteralFloat, LiteralInt};
+use crate::parsing::lex_tokens::LexToken;
+use itertools::MultiPeek;
+use std::str::Chars;
+
+#[inline(always)]
+fn get_decimal_value(c: char) -> Option<LiteralInt> {
+    match c {
+        '0' => Some(0),
+        '1' => Some(1),
+        '2' => Some(2),
+        '3' => Some(3),
+        '4' => Some(4),
+        '5' => Some(5),
+        '6' => Some(6),
+        '7' => Some(7),
+        '8' => Some(8),
+        '9' => Some(9),
+        _ => None,
+    }
+}
+
+#[inline(always)]
+fn lex_numeric_default(chars: &mut MultiPeek<Chars>) -> LexToken {
+    let mut int_value: LiteralInt = 0;
+    let mut decimal_value: LiteralInt = 0;
+    let mut exponent_value: LiteralInt = 0;
+    let mut decimal_length: LiteralInt = 0;
+    let mut is_decimal = false;
+    let mut is_exponent = false;
+    let mut is_reading = true;
+
+    let mut c: Option<char> = chars.peek().cloned();
+    while c.is_some() && is_reading {
+        let v = get_decimal_value(c.unwrap());
+        match v {
+            None => {
+                if c.unwrap() == '_' {
+                    chars.next();
+                    c = chars.peek().cloned();
+                    continue;
+                }
+                if !is_decimal && c.unwrap() == '.' {
+                    is_decimal = true;
+                    chars.next();
+                    c = chars.peek().cloned();
+                    continue;
+                }
+                if is_decimal && c.unwrap() == 'e' || c.unwrap() == 'E' {
+                    is_decimal = false;
+                    is_exponent = true;
+                    chars.next();
+                    c = chars.peek().cloned();
+                    continue;
+                }
+                c = chars.peek().cloned();
+                is_reading = false;
+                continue;
+            }
+            Some(i) => {
+                chars.next();
+                if is_decimal {
+                    decimal_value *= 10;
+                    decimal_value += i;
+                    decimal_length += 1;
+                } else if is_exponent {
+                    exponent_value *= 10;
+                    exponent_value += i;
+                } else {
+                    int_value *= 10;
+                    int_value += i;
+                }
+            }
+        }
+        c = chars.peek().cloned();
+    }
+    chars.reset_peek();
+    if is_decimal || is_exponent {
+        let mut val = int_value as LiteralFloat
+            + (decimal_value as LiteralFloat / 10_i64.pow(decimal_length as u32) as LiteralFloat);
+        if is_exponent {
+            val *= exponent_value.pow(10) as LiteralFloat;
+        }
+        LexToken::FloatLiteral(val)
+    } else {
+        LexToken::IntegerLiteral(int_value)
+    }
+}
+
+#[inline(always)]
+fn get_hexadecimal_value(c: char) -> Option<LiteralInt> {
+    match c {
+        '0' => Some(0),
+        '1' => Some(1),
+        '2' => Some(2),
+        '3' => Some(3),
+        '4' => Some(4),
+        '5' => Some(5),
+        '6' => Some(6),
+        '7' => Some(7),
+        '8' => Some(8),
+        '9' => Some(9),
+        'A' | 'a' => Some(10),
+        'B' | 'b' => Some(11),
+        'C' | 'c' => Some(12),
+        'D' | 'd' => Some(13),
+        'E' | 'e' => Some(14),
+        'F' | 'f' => Some(15),
+        _ => None,
+    }
+}
+
+#[inline(always)]
+fn lex_numeric_hexadecimal(chars: &mut MultiPeek<Chars>) -> LexToken {
+    let mut int_value: LiteralInt = 0;
+    let mut reading = true;
+    let mut n = chars.peek().cloned();
+    while n.is_some() && reading {
+        match get_hexadecimal_value(n.unwrap()) {
+            Some(i) => {
+                int_value <<= 4;
+                int_value += i;
+                chars.next();
+            }
+            None => {
+                if n.unwrap() == '_' {
+                    chars.next();
+                } else {
+                    reading = false
+                }
+            }
+        }
+        n = chars.peek().cloned();
+    }
+    LexToken::IntegerLiteral(int_value)
+}
+
+#[inline(always)]
+fn get_octal_value(c: char) -> Option<LiteralInt> {
+    match c {
+        '0' => Some(0),
+        '1' => Some(1),
+        '2' => Some(2),
+        '3' => Some(3),
+        '4' => Some(4),
+        '5' => Some(5),
+        '6' => Some(6),
+        '7' => Some(7),
+        _ => None,
+    }
+}
+
+#[inline(always)]
+fn lex_numeric_octal(chars: &mut MultiPeek<Chars>) -> LexToken {
+    let mut int_value: LiteralInt = 0;
+    let mut reading = true;
+    let mut n = chars.peek().cloned();
+    while n.is_some() && reading {
+        match get_octal_value(n.unwrap()) {
+            Some(i) => {
+                int_value <<= 3;
+                int_value += i;
+                chars.next();
+            }
+            None => {
+                if n.unwrap() == '_' {
+                    chars.next();
+                } else {
+                    reading = false
+                }
+            }
+        }
+        n = chars.peek().cloned();
+    }
+    LexToken::IntegerLiteral(int_value)
+}
+
+#[inline(always)]
+fn get_binary_value(c: char) -> Option<LiteralInt> {
+    match c {
+        '0' => Some(0),
+        '1' => Some(1),
+        _ => None,
+    }
+}
+
+#[inline(always)]
+fn lex_numeric_binary(chars: &mut MultiPeek<Chars>) -> LexToken {
+    let mut int_value: LiteralInt = 0;
+    let mut reading = true;
+    let mut n = chars.peek().cloned();
+    while n.is_some() && reading {
+        match get_binary_value(n.unwrap()) {
+            Some(i) => {
+                int_value <<= 1;
+                int_value += i;
+                chars.next();
+            }
+            None => {
+                if n.unwrap() == '_' {
+                    chars.next();
+                } else {
+                    reading = false
+                }
+            }
+        }
+        n = chars.peek().cloned();
+    }
+    LexToken::IntegerLiteral(int_value)
+}
+
+#[inline(always)]
+pub fn lex_numeric(chars: &mut MultiPeek<Chars>) -> LexToken {
+    chars.reset_peek();
+    if chars.peek() == Some(&'0') {
+        match chars.peek() {
+            Some(&'D') | Some(&'d') => {
+                chars.next();
+                chars.next();
+                return lex_numeric_default(chars);
+            }
+            Some(&'X') | Some(&'x') => {
+                chars.next();
+                chars.next();
+                return lex_numeric_hexadecimal(chars);
+            }
+            Some(&'O') | Some(&'o') => {
+                chars.next();
+                chars.next();
+                return lex_numeric_octal(chars);
+            }
+            Some(&'B') | Some(&'b') => {
+                chars.next();
+                chars.next();
+                return lex_numeric_binary(chars);
+            }
+            _ => {}
+        }
+    }
+    chars.reset_peek();
+    lex_numeric_default(chars)
+}
--- a/src/parsing/lex_tokens.rs
+++ b/src/parsing/lex_tokens.rs
@@ -0,0 +1,138 @@
+use crate::defines::{LiteralFloat, LiteralInt};
+
+#[derive(PartialEq, Debug)]
+pub enum LexToken {
+    EndOfFile,
+    WhiteSpace,
+    Identifier(String),
+    IntegerLiteral(LiteralInt),
+    FloatLiteral(LiteralFloat),
+    StringLiteral(String),
+
+    Semicolon,
+    Colon,
+    OpenBracket,
+    CloseBracket,
+    OpenCurlyBracket,
+    CloseCurlyBracket,
+    OpenBlockBracket,
+    CloseBlockBracket,
+
+    // Keywords
+    AndKeyword,
+    AbstractKeyword,
+    AutoKeyword,
+    BoolKeyword,
+    BreakKeyword,
+    CaseKeyword,
+    CastKeyword,
+    CatchKeyword,
+    ClassKeyword,
+    ConstKeyword,
+    ContinueKeyword,
+    DefaultKeyword,
+    DoKeyword,
+    DoubleKeyword,
+    ElseKeyword,
+    EnumKeyword,
+    ExplicitKeyword,
+    ExternalKeyword,
+    FalseKeyword,
+    FinalKeyword,
+    FloatKeyword,
+    ForKeyword,
+    FromKeyword,
+    FuncDefKeyword,
+    FunctionKeyword,
+    GetKeyword,
+    IfKeyword,
+    ImportKeyword,
+    InKeyword,
+    InOutKeyword,
+    IntKeyword,
+    InterfaceKeyword,
+    Int8Keyword,
+    Int16Keyword,
+    Int32Keyword,
+    Int64Keyword,
+    IsKeyword,
+    MixinKeyword,
+    NamespaceKeyword,
+    NotKeyword,
+    NullKeyword,
+    OrKeyword,
+    OutKeyword,
+    OverrideKeyword,
+    PrivateKeyword,
+    PropertyKeyword,
+    ProtectedKeyword,
+    ReturnKeyword,
+    SetKeyword,
+    SharedKeyword,
+    SuperKeyword,
+    SwitchKeyword,
+    ThisKeyword,
+    TrueKeyword,
+    TryKeyword,
+    TypeDefKeyword,
+    UintKeyword,
+    Uint8Keyword,
+    Uint16Keyword,
+    Uint32Keyword,
+    Uint64Keyword,
+    VoidKeyword,
+    WhileKeyword,
+    XorKeyword,
+
+    // AssignOp
+    Equals,
+    PlusEquals,
+    MinusEquals,
+    StarEquals,
+    SlashEquals,
+    LineEquals,
+    AmpersandEquals,
+    RoofEquals,
+    PercentEquals,
+    StarStarEquals,
+    LeftLeftEquals,
+    RightRightEquals,
+    RightRightRightEquals,
+
+    // LogicOp
+    AmpersandAmpersand,
+    LineLine,
+    RoofRoof,
+
+    // CompOp
+    EqualsEquals,
+    NotEquals,
+    NotIsKeyword,
+    GreaterThan,
+    GreaterThanEquals,
+    LessThan,
+    LessThanEquals,
+
+    // MathOp
+    Plus,
+    Minus,
+    Star,
+    Slash,
+    Percent,
+    StarStar,
+
+    // BitOp
+    Ampersand,
+    VerticalLine,
+    Roof,
+    LeftLeft,
+    RightRight,
+    RightRightRight,
+
+    // ExprPreOp
+    ExclamationMark,
+    PlusPlus,
+    MinusMinus,
+    Tilde,
+    AtSymbol,
+}
--- a/src/parsing/lexer.rs
+++ b/src/parsing/lexer.rs
@@ -0,0 +1,350 @@
+use super::lex_numerical::lex_numeric;
+use crate::parsing::lex_tokens::LexToken;
+use itertools::{Itertools, MultiPeek};
+use std::str::Chars;
+
+#[inline(always)]
+fn lex_and_consume(chars: &mut MultiPeek<Chars>, eq: LexToken) -> LexToken {
+    chars.next();
+    eq
+}
+
+#[inline(always)]
+fn lex_eq_or(chars: &mut MultiPeek<Chars>, eq: LexToken, or: LexToken) -> LexToken {
+    chars.next();
+    if let Some('=') = chars.peek() {
+        chars.next();
+        eq
+    } else {
+        or
+    }
+}
+
+#[inline(always)]
+fn lex_eq_rep_or(
+    chars: &mut MultiPeek<Chars>,
+    v: char,
+    eq: LexToken,
+    rep: LexToken,
+    or: LexToken,
+) -> LexToken {
+    chars.next();
+    return match chars.peek() {
+        Some(c) => {
+            if *c == v {
+                chars.next();
+                rep
+            } else if *c == '=' {
+                chars.next();
+                eq
+            } else {
+                or
+            }
+        }
+        None => or,
+    };
+}
+
+type LT = LexToken;
+
+fn lex_keyword_or_identifier(chars: &mut MultiPeek<Chars>) -> LexToken {
+    let mut reading = true;
+    let mut length = 1;
+    while reading {
+        match chars.peek() {
+            Some(c) => match c {
+                'a'..'z' | 'A'..'Z' | '_' | '0'..'9' => {
+                    length += 1;
+                }
+                _ => {
+                    reading = false;
+                }
+            },
+            None => {
+                reading = false;
+            }
+        };
+    }
+    chars.reset_peek();
+    let c: String = chars.take(length).collect();
+
+    match c.as_str() {
+        "and" => LT::AndKeyword,
+        "abstract" => LT::AbstractKeyword,
+        "auto" => LT::AutoKeyword,
+        "bool" => LT::BoolKeyword,
+        "break" => LT::BreakKeyword,
+        "case" => LT::CaseKeyword,
+        "cast" => LT::CastKeyword,
+        "catch" => LT::CatchKeyword,
+        "class" => LT::ClassKeyword,
+        "const" => LT::ConstKeyword,
+        "continue" => LT::ContinueKeyword,
+        "default" => LT::DefaultKeyword,
+        "do" => LT::DoKeyword,
+        "double" => LT::DoubleKeyword,
+        "else" => LT::ElseKeyword,
+        "enum" => LT::EnumKeyword,
+        "explicit" => LT::ExplicitKeyword,
+        "external" => LT::ExternalKeyword,
+        "false" => LT::FalseKeyword,
+        "final" => LT::FinalKeyword,
+        "float" => LT::FloatKeyword,
+        "for" => LT::ForKeyword,
+        "from" => LT::FromKeyword,
+        "funcdef" => LT::FuncDefKeyword,
+        "function" => LT::FunctionKeyword,
+        "get" => LT::GetKeyword,
+        "if" => LT::IfKeyword,
+        "import" => LT::ImportKeyword,
+        "in" => LT::InKeyword,
+        "inout" => LT::InOutKeyword,
+        "int" => LT::IntKeyword,
+        "interface" => LT::InterfaceKeyword,
+        "int8" => LT::Int8Keyword,
+        "int16" => LT::Int16Keyword,
+        "int32" => LT::Int32Keyword,
+        "int64" => LT::Int64Keyword,
+        "is" => LT::IsKeyword,
+        "mixin" => LT::MixinKeyword,
+        "namespace" => LT::NamespaceKeyword,
+        "not" => LT::NotKeyword,
+        "null" => LT::NullKeyword,
+        "or" => LT::OrKeyword,
+        "out" => LT::OutKeyword,
+        "override" => LT::OverrideKeyword,
+        "private" => LT::PrivateKeyword,
+        "property" => LT::PropertyKeyword,
+        "protected" => LT::ProtectedKeyword,
+        "return" => LT::ReturnKeyword,
+        "set" => LT::SetKeyword,
+        "shared" => LT::SharedKeyword,
+        "super" => LT::SuperKeyword,
+        "switch" => LT::SwitchKeyword,
+        "this" => LT::ThisKeyword,
+        "true" => LT::TrueKeyword,
+        "try" => LT::TryKeyword,
+        "typedef" => LT::TypeDefKeyword,
+        "uint" => LT::UintKeyword,
+        "uint8" => LT::Uint8Keyword,
+        "uint16" => LT::Uint16Keyword,
+        "uint32" => LT::Uint32Keyword,
+        "uint64" => LT::Uint64Keyword,
+        "void" => LT::VoidKeyword,
+        "while" => LT::WhileKeyword,
+        "xor" => LT::XorKeyword,
+        _ => LT::Identifier(c),
+    }
+}
+
+fn lex_string(chars: &mut MultiPeek<Chars>, opening_char: &char, heredoc: bool) -> LexToken {
+    chars.next();
+    if heredoc {
+        chars.next();
+        chars.next();
+    }
+    let mut length: i32 = 0;
+    let mut string_length = 0;
+    let mut last_was_control = false;
+
+    // We loop twice here. In the first loop we get the number of characters to read, the number of
+    // characters the string should be, and whether it's valid. This reduces the amount of allocations
+    // we need to do to read a string.
+    loop {
+        let p = chars.peek();
+        match p {
+            None => {
+                // TODO: log error. Strings need to be closed, EOF should error.
+                unimplemented!();
+            }
+            Some(&'\\') if !last_was_control => {
+                last_was_control = true;
+                length += 1;
+            }
+            Some(c) => {
+                if c == opening_char && !last_was_control {
+                    if heredoc {
+                        if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') {
+                            break;
+                        } else {
+                            length += 1;
+                            string_length += 1;
+                        }
+                    } else {
+                        break;
+                    }
+                }
+                length += 1;
+                string_length += 1;
+                last_was_control = false;
+            }
+        }
+    }
+    chars.reset_peek();
+    let mut s: String = String::with_capacity(string_length);
+    for _ in 0..length {
+        let p = chars.next().unwrap();
+        match p {
+            '\\' => {
+                if last_was_control {
+                    s.push('\\');
+                } else {
+                    last_was_control = true;
+                    continue;
+                }
+            }
+            '0' if last_was_control => s.push('\0'),
+            'n' if last_was_control => s.push('\n'),
+            'r' if last_was_control => s.push('\r'),
+            't' if last_was_control => s.push('\t'),
+            _ => s.push(p),
+        };
+        last_was_control = false;
+    }
+    assert_eq!(s.len(), string_length);
+    chars.reset_peek();
+    chars.next();
+    if heredoc {
+        chars.next();
+        chars.next();
+    }
+
+    LT::StringLiteral(s)
+}
+
+pub fn lex(s: &str) -> Vec<LT> {
+    let mut tokens: Vec<LT> = Vec::new();
+    let mut chars = s.chars().multipeek();
+    let mut reading = true;
+    while reading {
+        let p = chars.peek().cloned();
+        match p {
+            Some(c) => match c {
+                ' ' | '\t' | '\r' | '\n' => {
+                    chars.next();
+                    tokens.push(LT::WhiteSpace);
+                }
+                '=' => tokens.push(lex_eq_or(&mut chars, LT::EqualsEquals, LT::Equals)),
+                '+' => tokens.push(lex_eq_rep_or(
+                    &mut chars,
+                    '+',
+                    LT::PlusEquals,
+                    LT::PlusPlus,
+                    LT::Plus,
+                )),
+                '-' => tokens.push(lex_eq_rep_or(
+                    &mut chars,
+                    '-',
+                    LT::MinusEquals,
+                    LT::MinusMinus,
+                    LT::Minus,
+                )),
+                '*' => {
+                    if chars.peek() == Some(&'*') {
+                        chars.next();
+                        tokens.push(lex_eq_or(&mut chars, LT::StarStarEquals, LT::StarStar))
+                    } else {
+                        tokens.push(lex_eq_or(&mut chars, LT::StarEquals, LT::Star))
+                    }
+                }
+                '/' => tokens.push(lex_eq_or(&mut chars, LT::SlashEquals, LT::Slash)),
+                '%' => tokens.push(lex_eq_or(&mut chars, LT::PercentEquals, LT::Percent)),
+                '|' => tokens.push(lex_eq_rep_or(
+                    &mut chars,
+                    '|',
+                    LT::LineEquals,
+                    LT::LineLine,
+                    LT::VerticalLine,
+                )),
+                '&' => tokens.push(lex_eq_rep_or(
+                    &mut chars,
+                    '&',
+                    LT::AmpersandEquals,
+                    LT::AmpersandAmpersand,
+                    LT::Ampersand,
+                )),
+                '^' => tokens.push(lex_eq_rep_or(
+                    &mut chars,
+                    '^',
+                    LT::RoofEquals,
+                    LT::RoofRoof,
+                    LT::Roof,
+                )),
+                '<' => {
+                    if chars.peek() == Some(&'<') {
+                        chars.next();
+                        tokens.push(lex_eq_or(&mut chars, LT::LeftLeftEquals, LT::LeftLeft))
+                    } else {
+                        tokens.push(lex_eq_or(&mut chars, LT::LessThanEquals, LT::LessThan))
+                    }
+                }
+                '>' => {
+                    if chars.peek() == Some(&'>') {
+                        if chars.peek() == Some(&'>') {
+                            chars.next();
+                            chars.next();
+                            tokens.push(lex_eq_or(
+                                &mut chars,
+                                LT::RightRightRightEquals,
+                                LT::RightRightRight,
+                            ))
+                        } else {
+                            chars.next();
+                            tokens.push(lex_eq_or(&mut chars, LT::RightRightEquals, LT::RightRight))
+                        }
+                    } else {
+                        tokens.push(lex_eq_or(
+                            &mut chars,
+                            LT::GreaterThanEquals,
+                            LT::GreaterThan,
+                        ))
+                    }
+                }
+                '!' => {
+                    let next = chars.peek();
+                    if next == Some(&'=') {
+                        chars.next();
+                        chars.next();
+                        tokens.push(LT::NotEquals);
+                    } else if next == Some(&'i') && chars.peek() == Some(&'s') {
+                        chars.next();
+                        chars.next();
+                        chars.next();
+                        tokens.push(LT::NotIsKeyword);
+                    } else {
+                        chars.next();
+                        tokens.push(LT::ExclamationMark);
+                    }
+                }
+
+                '~' => tokens.push(lex_and_consume(&mut chars, LT::Tilde)),
+                '@' => tokens.push(lex_and_consume(&mut chars, LT::AtSymbol)),
+                ';' => tokens.push(lex_and_consume(&mut chars, LT::Semicolon)),
+                ':' => tokens.push(lex_and_consume(&mut chars, LT::Colon)),
+
+                '(' => tokens.push(lex_and_consume(&mut chars, LT::OpenBracket)),
+                ')' => tokens.push(lex_and_consume(&mut chars, LT::CloseBracket)),
+                '{' => tokens.push(lex_and_consume(&mut chars, LT::OpenCurlyBracket)),
+                '}' => tokens.push(lex_and_consume(&mut chars, LT::CloseCurlyBracket)),
+                '[' => tokens.push(lex_and_consume(&mut chars, LT::OpenBlockBracket)),
+                ']' => tokens.push(lex_and_consume(&mut chars, LT::CloseBlockBracket)),
+
+                '0'..'9' => tokens.push(lex_numeric(&mut chars)),
+                'a'..'z' | 'A'..'Z' | '_' => tokens.push(lex_keyword_or_identifier(&mut chars)),
+                '\'' => tokens.push(lex_string(&mut chars, &'\'', false)),
+                '"' if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') => {
+                    tokens.push(lex_string(&mut chars, &'"', true))
+                }
+                '"' => tokens.push(lex_string(&mut chars, &'"', false)),
+
+                // TODO: Definitely not unreachable. Log a proper error here.
+                _ => unreachable!(),
+            },
+            None => {
+                tokens.push(LT::EndOfFile);
+                reading = false;
+            }
+        }
+    }
+    tokens
+}
--- a/src/parsing/lexer_tests.rs
+++ b/src/parsing/lexer_tests.rs
@@ -0,0 +1,241 @@
+use super::lex_tokens::LexToken;
+use super::lexer::lex;
+
+macro_rules! lex_token_test {
+    ( $a: ident, $b: expr, $c: expr) => {
+        #[test]
+        fn $a() {
+            let tokens = lex($b);
+            assert_eq!(tokens.len(), 2);
+            assert_eq!(tokens[0], $c);
+            assert_eq!(tokens[1], LexToken::EndOfFile);
+        }
+    };
+}
+
+macro_rules! lex_identifier_test {
+    ( $a: ident, $b: expr) => {
+        #[test]
+        fn $a() {
+            let tokens = lex($b);
+            assert_eq!(tokens.len(), 2);
+            assert_eq!(tokens[0], LexToken::Identifier($b.to_string()));
+            assert_eq!(tokens[1], LexToken::EndOfFile);
+        }
+    };
+}
+
+macro_rules! lex_integer_test {
+    ( $a: ident, $b: expr, $c: expr) => {
+        #[test]
+        fn $a() {
+            let tokens = lex($b);
+            assert_eq!(tokens.len(), 2);
+            assert_eq!(tokens[0], LexToken::IntegerLiteral($c));
+            assert_eq!(tokens[1], LexToken::EndOfFile);
+        }
+    };
+}
+
+macro_rules! lex_float_test {
+    ( $a: ident, $b: expr, $c: expr) => {
+        #[test]
+        fn $a() {
+            let tokens = lex($b);
+            assert_eq!(tokens.len(), 2);
+            assert_eq!(tokens[0], LexToken::FloatLiteral($c));
+            assert_eq!(tokens[1], LexToken::EndOfFile);
+        }
+    };
+}
+
+macro_rules! lex_string_test {
+    ( $a: ident, $b: expr, $c: expr) => {
+        #[test]
+        fn $a() {
+            let tokens = lex($b);
+            assert_eq!(tokens.len(), 2);
+            assert_eq!(tokens[0], LexToken::StringLiteral($c.to_string()));
+            assert_eq!(tokens[1], LexToken::EndOfFile);
+        }
+    };
+}
+
+lex_token_test!(lex_space, " ", LexToken::WhiteSpace);
+lex_token_test!(lex_tab, "\t", LexToken::WhiteSpace);
+lex_token_test!(lex_return_line, "\r", LexToken::WhiteSpace);
+lex_token_test!(lex_newline, "\n", LexToken::WhiteSpace);
+
+lex_token_test!(lex_equals, "=", LexToken::Equals);
+lex_token_test!(lex_equals_equals, "==", LexToken::EqualsEquals);
+lex_token_test!(lex_plus, "+", LexToken::Plus);
+lex_token_test!(lex_plus_plus, "++", LexToken::PlusPlus);
+lex_token_test!(lex_plus_equals, "+=", LexToken::PlusEquals);
+lex_token_test!(lex_minus, "-", LexToken::Minus);
+lex_token_test!(lex_minus_minus, "--", LexToken::MinusMinus);
+lex_token_test!(lex_minus_equals, "-=", LexToken::MinusEquals);
+lex_token_test!(lex_star, "*", LexToken::Star);
+lex_token_test!(lex_star_equals, "*=", LexToken::StarEquals);
+lex_token_test!(lex_star_star, "**", LexToken::StarStar);
+lex_token_test!(lex_star_star_equals, "**=", LexToken::StarStarEquals);
+lex_token_test!(lex_slash, "/", LexToken::Slash);
+lex_token_test!(lex_slash_equals, "/=", LexToken::SlashEquals);
+lex_token_test!(lex_percent, "%", LexToken::Percent);
+lex_token_test!(lex_percent_equals, "%=", LexToken::PercentEquals);
+
+lex_token_test!(lex_exclamation_mark, "!", LexToken::ExclamationMark);
+lex_token_test!(lex_not_equals, "!=", LexToken::NotEquals);
+lex_token_test!(lex_not_is_keyword, "!is", LexToken::NotIsKeyword);
+
+lex_token_test!(lex_vert_line, "|", LexToken::VerticalLine);
+lex_token_test!(lex_vert_line_equals, "|=", LexToken::LineEquals);
+lex_token_test!(lex_line_line, "||", LexToken::LineLine);
+
+lex_token_test!(lex_ampersand, "&", LexToken::Ampersand);
+lex_token_test!(lex_ampersand_equals, "&=", LexToken::AmpersandEquals);
+lex_token_test!(lex_ampersand_ampersand, "&&", LexToken::AmpersandAmpersand);
+lex_token_test!(lex_less_than, "<", LexToken::LessThan);
+lex_token_test!(lex_less_than_equals, "<=", LexToken::LessThanEquals);
+lex_token_test!(lex_left_left, "<<", LexToken::LeftLeft);
+lex_token_test!(lex_left_left_equals, "<<=", LexToken::LeftLeftEquals);
+
+lex_token_test!(lex_greater_than, ">", LexToken::GreaterThan);
+lex_token_test!(lex_greater_than_equals, ">=", LexToken::GreaterThanEquals);
+lex_token_test!(lex_right_right, ">>", LexToken::RightRight);
+lex_token_test!(lex_right_right_equals, ">>=", LexToken::RightRightEquals);
+lex_token_test!(lex_right_right_right, ">>>", LexToken::RightRightRight);
+lex_token_test!(
+    lex_right_right_right_equals,
+    ">>>=",
+    LexToken::RightRightRightEquals
+);
+
+lex_token_test!(lex_tilde, "~", LexToken::Tilde);
+lex_token_test!(lex_at_symbol, "@", LexToken::AtSymbol);
+
+lex_token_test!(lex_and_keyword, "and", LexToken::AndKeyword);
+lex_token_test!(lex_abstract_keyword, "abstract", LexToken::AbstractKeyword);
+lex_token_test!(lex_auto_keyword, "auto", LexToken::AutoKeyword);
+lex_token_test!(lex_bool_keyword, "bool", LexToken::BoolKeyword);
+lex_token_test!(lex_break_keyword, "break", LexToken::BreakKeyword);
+lex_token_test!(lex_case_keyword, "case", LexToken::CaseKeyword);
+lex_token_test!(lex_cast_keyword, "cast", LexToken::CastKeyword);
+lex_token_test!(lex_catch_keyword, "catch", LexToken::CatchKeyword);
+lex_token_test!(lex_class_keyword, "class", LexToken::ClassKeyword);
+lex_token_test!(lex_const_keyword, "const", LexToken::ConstKeyword);
+lex_token_test!(lex_continue_keyword, "continue", LexToken::ContinueKeyword);
+lex_token_test!(lex_default_keyword, "default", LexToken::DefaultKeyword);
+lex_token_test!(lex_do_keyword, "do", LexToken::DoKeyword);
+lex_token_test!(lex_double_keyword, "double", LexToken::DoubleKeyword);
+lex_token_test!(lex_else_keyword, "else", LexToken::ElseKeyword);
+lex_token_test!(lex_enum_keyword, "enum", LexToken::EnumKeyword);
+lex_token_test!(lex_explicit_keyword, "explicit", LexToken::ExplicitKeyword);
+lex_token_test!(lex_external_keyword, "external", LexToken::ExternalKeyword);
+lex_token_test!(lex_false_keyword, "false", LexToken::FalseKeyword);
+lex_token_test!(lex_final_keyword, "final", LexToken::FinalKeyword);
+lex_token_test!(lex_float_keyword, "float", LexToken::FloatKeyword);
+lex_token_test!(lex_for_keyword, "for", LexToken::ForKeyword);
+lex_token_test!(lex_from_keyword, "from", LexToken::FromKeyword);
+lex_token_test!(lex_funcdef_keyword, "funcdef", LexToken::FuncDefKeyword);
+lex_token_test!(lex_function_keyword, "function", LexToken::FunctionKeyword);
+lex_token_test!(lex_get_keyword, "get", LexToken::GetKeyword);
+lex_token_test!(lex_if_keyword, "if", LexToken::IfKeyword);
+lex_token_test!(lex_import_keyword, "import", LexToken::ImportKeyword);
+lex_token_test!(lex_in_keyword, "in", LexToken::InKeyword);
+lex_token_test!(lex_inout_keyword, "inout", LexToken::InOutKeyword);
+lex_token_test!(lex_int_keyword, "int", LexToken::IntKeyword);
+lex_token_test!(
+    lex_interface_keyword,
+    "interface",
+    LexToken::InterfaceKeyword
+);
+lex_token_test!(lex_int8_keyword, "int8", LexToken::Int8Keyword);
+lex_token_test!(lex_int16_keyword, "int16", LexToken::Int16Keyword);
+lex_token_test!(lex_int32_keyword, "int32", LexToken::Int32Keyword);
+lex_token_test!(lex_int64_keyword, "int64", LexToken::Int64Keyword);
+lex_token_test!(lex_is_keyword, "is", LexToken::IsKeyword);
+lex_token_test!(lex_mixin_keyword, "mixin", LexToken::MixinKeyword);
+lex_token_test!(
+    lex_namespace_keyword,
+    "namespace",
+    LexToken::NamespaceKeyword
+);
+lex_token_test!(lex_not_keyword, "not", LexToken::NotKeyword);
+lex_token_test!(lex_null_keyword, "null", LexToken::NullKeyword);
+lex_token_test!(lex_or_keyword, "or", LexToken::OrKeyword);
+lex_token_test!(lex_out_keyword, "out", LexToken::OutKeyword);
+lex_token_test!(lex_override_keyword, "override", LexToken::OverrideKeyword);
+lex_token_test!(lex_private_keyword, "private", LexToken::PrivateKeyword);
+lex_token_test!(lex_property_keyword, "property", LexToken::PropertyKeyword);
+lex_token_test!(
+    lex_protected_keyword,
+    "protected",
+    LexToken::ProtectedKeyword
+);
+
+lex_token_test!(lex_return_keyword, "return", LexToken::ReturnKeyword);
+lex_token_test!(lex_set_keyword, "set", LexToken::SetKeyword);
+lex_token_test!(lex_shared_keyword, "shared", LexToken::SharedKeyword);
+lex_token_test!(lex_super_keyword, "super", LexToken::SuperKeyword);
+lex_token_test!(lex_switch_keyword, "switch", LexToken::SwitchKeyword);
+lex_token_test!(lex_this_keyword, "this", LexToken::ThisKeyword);
+lex_token_test!(lex_true_keyword, "true", LexToken::TrueKeyword);
+lex_token_test!(lex_try_keyword, "try", LexToken::TryKeyword);
+lex_token_test!(lex_typedef_keyword, "typedef", LexToken::TypeDefKeyword);
+lex_token_test!(lex_uint_keyword, "uint", LexToken::UintKeyword);
+lex_token_test!(lex_uint8_keyword, "uint8", LexToken::Uint8Keyword);
+lex_token_test!(lex_uint16_keyword, "uint16", LexToken::Uint16Keyword);
+lex_token_test!(lex_uint32_keyword, "uint32", LexToken::Uint32Keyword);
+
+lex_token_test!(lex_void_keyword, "void", LexToken::VoidKeyword);
+lex_token_test!(lex_while_keyword, "while", LexToken::WhileKeyword);
+lex_token_test!(lex_xor_keyword, "xor", LexToken::XorKeyword);
+
+lex_identifier_test!(lex_basic_identifier_foo, "foo");
+lex_identifier_test!(lex_basic_identifier_foobar, "foobar");
+
+lex_integer_test!(lex_zero, "0", 0);
+lex_integer_test!(lex_one_two_three_four, "1234", 1234);
+lex_integer_test!(lex_specific_one_two_three_four, "0d1234", 1234);
+lex_integer_test!(lex_decimal_with_underline, "123_456", 123456);
+lex_integer_test!(lex_specific_decimal_with_underline, "0D123_456", 123456);
+lex_integer_test!(lex_hexadecimal_0f, "0X0F", 15);
+lex_integer_test!(lex_hexadecimal_ff, "0xff", 255);
+lex_integer_test!(lex_hexadecimal_ff_ff, "0xff_ff", 65535);
+lex_integer_test!(lex_octal_112, "0o112", 74);
+lex_integer_test!(lex_binary_1110, "0b1110", 14);
+lex_integer_test!(lex_binary_01110, "0b01110", 14);
+
+lex_float_test!(lex_zero_float, "0.0", 0.0);
+lex_float_test!(lex_half, "0.5", 0.5);
+lex_float_test!(lex_point_0_5, "0.05", 0.05);
+lex_float_test!(lex_half_with_exponent, "0.5e10", 0.5e10);
+
+lex_string_test!(lex_simple_string, "\"foo\"", "foo");
+lex_string_test!(lex_simple_string_single_quote, "\'foo\'", "foo");
+lex_string_test!(lex_string_with_escape, "\"fo\\\"o\"", "fo\"o");
+lex_string_test!(lex_string_with_new_line, "\"fo\\no\"", "fo\no");
+lex_string_test!(lex_heredoc_string, "\"\"\"foo\"\"\"", "foo");
+lex_string_test!(lex_heredoc_string_with_quote, "\"\"\"fo\"o\"\"\"", "fo\"o");
+
+#[test]
+fn lex_two_identifier() {
+    let tokens = lex("foo bar");
+    assert_eq!(tokens.len(), 4);
+    assert_eq!(tokens[0], LexToken::Identifier("foo".to_string()));
+    assert_eq!(tokens[1], LexToken::WhiteSpace);
+    assert_eq!(tokens[2], LexToken::Identifier("bar".to_string()));
+    assert_eq!(tokens[3], LexToken::EndOfFile);
+}
+
+#[test]
+fn lex_multiple_tokens_with_not_is() {
+    let tokens = lex("a !is b");
+    assert_eq!(tokens.len(), 6);
+    assert_eq!(tokens[0], LexToken::Identifier("a".to_string()));
+    assert_eq!(tokens[1], LexToken::WhiteSpace);
+    assert_eq!(tokens[2], LexToken::NotIsKeyword);
+    assert_eq!(tokens[3], LexToken::WhiteSpace);
+    assert_eq!(tokens[4], LexToken::Identifier("b".to_string()));
+    assert_eq!(tokens[5], LexToken::EndOfFile);
+}
--- a/src/parsing/mod.rs
+++ b/src/parsing/mod.rs
@@ -0,0 +1,6 @@
+mod lex_numerical;
+pub mod lex_tokens;
+pub mod lexer;
+
+#[cfg(test)]
+mod lexer_tests;