From 2db53ce0db23c90c0eb4edc1617495c1d7d77fd3 Mon Sep 17 00:00:00 2001 From: 0x4261756D <38735823+0x4261756D@users.noreply.github.com> Date: Wed, 7 Jun 2023 02:53:15 +0200 Subject: [PATCH] Split tokenizer into its own file --- src/main.rs | 1190 +--------------------------------------------- src/tokenizer.rs | 1186 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1190 insertions(+), 1186 deletions(-) create mode 100644 src/tokenizer.rs diff --git a/src/main.rs b/src/main.rs index c6623d5..b23daca 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,9 @@ +pub mod tokenizer; + use std::{env, fs}; +use crate::tokenizer::{Token, tokenize}; + fn main() { let args: Vec = env::args().collect(); @@ -21,1189 +25,3 @@ fn compile(file_content: &String) -> Result<(), &'static str> println!("{:?}", tokens); return Ok(()); } -#[derive(Debug, Clone)] -enum Token -{ - Name(String), - And, Break, Do, Else, Elseif, End, - False, For, Function, Goto, If, In, - Local, Nil, Not, Or, Repeat, Return, - Then, True, Until, While, - Plus, Minus, Star, Slash, Percent, Caret, Hash, - Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash, - EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals, - RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon, - Semicolon, Colon, Comma, Dot, DotDot, DotDotDot, - IntLiteral(String), - HexLiteral(String), - StringLiteral(String), -} - -#[derive(Debug, Clone, Copy, PartialEq)] -enum TokenizerState -{ - Start, - Quote, SingleQuote, Name, Number, Zero, - A, B, D, E, F, G, I, L, N, O, R, T, U, W, - Plus, Minus, Star, Slash, Percent, Caret, Hash, - Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, - Colon, Semicolon, Comma, Dot, - - An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh, - LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot, - SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber, - - And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi, - DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, - BigCommentLongBracketStart, SmallComment, - - Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber, - BigComment, BigCommentLongBracketEnd, - - Break, Elsei, False, Funct, Local, Repea, Retur, Until, While, - - Elseif, Functi, Repeat, Return, - - Functio, - - Function, -} - -fn tokenize_update_index_and_state(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState) -{ - *last_index = index as i32; - *state = new_state; -} -fn tokenize_terminal_no_str(last_index: &mut i32, index: usize, token: &mut Option, state: &mut TokenizerState, new_token: Option, new_state: TokenizerState) -{ - tokenize_update_index_and_state(last_index, index, state, new_state); - *token = new_token; -} -fn tokenize_terminal_no_token(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState, token_str: &mut String, ch: char) -{ - tokenize_update_index_and_state(last_index, index, state, new_state); - token_str.push(ch); -} -fn tokenize_terminal(last_index: &mut i32, index: usize, token: &mut Option, state: &mut TokenizerState, new_token: Option, new_state: TokenizerState, token_str: &mut String, ch: char) -{ - tokenize_terminal_no_str(last_index, index, token, state, new_token, new_state); - token_str.push(ch); -} -fn tokenize_backtrack(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str> -{ - return tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, token.clone().unwrap()); -} -fn tokenize_backtrack_name(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str> -{ - if *last_index == -1 || token.is_none() - { - println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); - return Err("Lexerr"); - } - *index = *last_index as usize; - *last_index = -1; - tokens.push(Token::Name(token_str.clone())); - *token = None; - token_str.clear(); - *state = TokenizerState::Start; - return Ok(()); -} -fn tokenize_backtrack_custom_token(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState, new_token: Token) -> Result<(), &'static str> -{ - if *last_index == -1 || token.is_none() - { - println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); - return Err("Lexerr"); - } - *index = *last_index as usize; - *last_index = -1; - tokens.push(new_token); - *token = None; - token_str.clear(); - *state = TokenizerState::Start; - return Ok(()); -} -fn tokenize_alphanumeric_nonstart(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState, ch: char) -> Result<(), &'static str> -{ - if ch.is_ascii_alphanumeric() || ch == '_' - { - tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name); - token_str.push(ch); - } - else - { - tokenize_backtrack_name(last_index, index, tokens, token, token_str, state)?; - } - return Ok(()); -} -fn tokenize_alphanumeric_nonstart_custom(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState, ch: char, new_token: Token) -> Result<(), &'static str> -{ - if ch.is_ascii_alphanumeric() || ch == '_' - { - tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name); - token_str.push(ch); - } - else - { - tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, new_token)?; - } - return Ok(()); -} -fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, index: &mut usize, token: &mut Option, token_str: &mut String, tokens: &mut Vec, long_bracket_level: &mut u32) -> Result<(), &'static str> -{ - match state - { - TokenizerState::Start => - { - match ch - { - '-' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Minus), TokenizerState::Minus), - 'a' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("a".to_string())), TokenizerState::A, token_str, ch), - 'b' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("b".to_string())), TokenizerState::B, token_str, ch), - 'd' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("d".to_string())), TokenizerState::D, token_str, ch), - 'e' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("e".to_string())), TokenizerState::E, token_str, ch), - 'f' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("f".to_string())), TokenizerState::F, token_str, ch), - 'i' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("i".to_string())), TokenizerState::I, token_str, ch), - 'g' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("g".to_string())), TokenizerState::G, token_str, ch), - 'l' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("l".to_string())), TokenizerState::L, token_str, ch), - 'n' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("n".to_string())), TokenizerState::N, token_str, ch), - 'o' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("o".to_string())), TokenizerState::O, token_str, ch), - 'r' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("r".to_string())), TokenizerState::R, token_str, ch), - 't' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("t".to_string())), TokenizerState::T, token_str, ch), - 'u' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("u".to_string())), TokenizerState::U, token_str, ch), - 'w' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("w".to_string())), TokenizerState::W, token_str, ch), - ',' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Comma), TokenizerState::Comma), - '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Equals), TokenizerState::Equals), - '(' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundOpen), TokenizerState::RoundOpen), - ')' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundClosed), TokenizerState::RoundClosed), - '.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Dot), TokenizerState::Dot), - ':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Colon), TokenizerState::Colon), - '{' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyOpen), TokenizerState::CurlyOpen), - '}' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyClosed), TokenizerState::CurlyClosed), - '[' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareOpen), TokenizerState::SquareOpen), - ']' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareClosed), TokenizerState::SquareClosed), - '+' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Plus), TokenizerState::Plus), - '~' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Tilde), TokenizerState::Tilde), - '>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Gt), TokenizerState::Gt), - '<' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Lt), TokenizerState::Lt), - '#' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Hash), TokenizerState::Hash), - '|' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Pipe), TokenizerState::Pipe), - '&' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Ampersand), TokenizerState::Ampersand), - '%' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Percent), TokenizerState::Percent), - '*' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Star), TokenizerState::Star), - '/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Slash), TokenizerState::Slash), - ';' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Semicolon), TokenizerState::Semicolon), - '^' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Caret), TokenizerState::Caret), - '0' => tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral("0".to_string())), TokenizerState::Zero, token_str, ch), - '"' => - { - *token = None; - *state = TokenizerState::Quote; - } - '\'' => - { - *token = None; - *state = TokenizerState::SingleQuote; - } - _ => - { - if ch.is_whitespace() { } - else if ch.is_ascii_alphabetic() || ch == '_' - { - tokenize_terminal(last_index, *index, token, state, Some(Token::Name(token_str.clone())), TokenizerState::Name, token_str, ch); - } - else if ch.is_numeric() && ch.is_ascii() - { - tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral(token_str.clone())), TokenizerState::Number, token_str, ch); - } - else - { - todo!("State {:?}, Char {}", state, ch); - } - } - } - } - TokenizerState::Quote => - { - match ch - { - '\\' => - { - *state = TokenizerState::QuoteBackslash; - } - '"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String), - _ => - { - token_str.push(ch); - } - } - } - TokenizerState::QuoteBackslash => - { - match ch - { - 'a' => - { - token_str.push('\u{0007}'); - *state = TokenizerState::Quote; - } - 'b' => - { - token_str.push('\u{0008}'); - *state = TokenizerState::Quote; - } - 't' => - { - token_str.push('\t'); - *state = TokenizerState::Quote; - } - 'n' | '\n' => - { - token_str.push('\n'); - *state = TokenizerState::Quote; - } - 'v' => - { - token_str.push('\u{000b}'); - *state = TokenizerState::Quote; - } - 'f' => - { - token_str.push('\u{000c}'); - *state = TokenizerState::Quote; - } - 'r' => - { - token_str.push('\r'); - *state = TokenizerState::Quote; - } - '\\' => - { - token_str.push('\\'); - *state = TokenizerState::Quote; - } - '"' => - { - token_str.push('\"'); - *state = TokenizerState::Quote; - } - '\'' => - { - token_str.push('\''); - *state = TokenizerState::Quote; - } - 'z' => - { - *state = TokenizerState::QuoteBackslashZ; - } - _ => return Err("Unknown escape sequence"), - } - } - TokenizerState::QuoteBackslashZ => - { - match ch - { - '\\' => - { - *state = TokenizerState::QuoteBackslash; - } - '"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String), - _ => - { - if !ch.is_whitespace() - { - token_str.push(ch); - *state = TokenizerState::Quote; - } - } - } - } - TokenizerState::SingleQuote => - { - match ch - { - '\\' => - { - *state = TokenizerState::SingleQuoteBackslash; - } - '\'' => - { - *last_index = *index as i32; - *token = Some(Token::StringLiteral(token_str.clone())); - *state = TokenizerState::String; - } - _ => - { - token_str.push(ch); - } - } - } - TokenizerState::SingleQuoteBackslash => - { - match ch - { - 'a' => - { - token_str.push('\u{0007}'); - *state = TokenizerState::SingleQuote; - } - 'b' => - { - token_str.push('\u{0008}'); - *state = TokenizerState::SingleQuote; - } - 't' => - { - token_str.push('\t'); - *state = TokenizerState::SingleQuote; - } - 'n' | '\n' => - { - token_str.push('\n'); - *state = TokenizerState::SingleQuote; - } - 'v' => - { - token_str.push('\u{000b}'); - *state = TokenizerState::SingleQuote; - } - 'f' => - { - token_str.push('\u{000c}'); - *state = TokenizerState::SingleQuote; - } - 'r' => - { - token_str.push('\r'); - *state = TokenizerState::SingleQuote; - } - '\\' => - { - token_str.push('\\'); - *state = TokenizerState::SingleQuote; - } - '"' => - { - token_str.push('\"'); - *state = TokenizerState::SingleQuote; - } - '\'' => - { - token_str.push('\''); - *state = TokenizerState::SingleQuote; - } - 'z' => - { - *state = TokenizerState::SingleQuoteBackslashZ; - } - _ => return Err("Unknown escape sequence"), - } - } - TokenizerState::SingleQuoteBackslashZ => - { - match ch - { - '\\' => - { - *state = TokenizerState::SingleQuoteBackslash; - } - '\'' => - { - *last_index = *index as i32; - *token = Some(Token::StringLiteral(token_str.clone())); - *state = TokenizerState::String; - } - _ => - { - if !ch.is_whitespace() - { - token_str.push(ch); - *state = TokenizerState::SingleQuote; - } - } - } - } - TokenizerState::String => - { - let content = token_str.clone(); - tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, Token::StringLiteral(content))?; - } - TokenizerState::Name => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - TokenizerState::Zero => - { - match ch - { - 'x' => - { - token_str.push(ch); - *token = None; - *state = TokenizerState::HexNumberX; - } - _ => - { - if ch.is_numeric() && ch.is_ascii() - { - *last_index = *index as i32; - token_str.push(ch); - *token = Some(Token::IntLiteral(token_str.clone())); - } - else - { - tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; - } - } - } - } - TokenizerState::HexNumberX => - { - if ch.is_ascii() && ch.is_numeric() || match ch - { - 'A'..='F' | 'a'..='f' => true, - _ => false, - } - { - *last_index = *index as i32; - token_str.push(ch); - *token = Some(Token::HexLiteral(token_str.clone())); - *state = TokenizerState::HexNumber; - } - else - { - tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; - } - } - TokenizerState::HexNumber => - { - match ch - { - 'p' => - { - token_str.push(ch); - *token = None; - *state = TokenizerState::HexExpNumber; - } - _ => - { - if ch.is_ascii() && ch.is_numeric() || match ch - { - 'A'..='F' | 'a'..='f' => true, - _ => false, - } - { - *last_index = *index as i32; - token_str.push(ch); - *token = Some(Token::HexLiteral(token_str.clone())); - } - else - { - tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; - } - } - } - } - TokenizerState::Number => - { - match ch - { - 'e' => - { - token_str.push(ch); - *token = None; - *state = TokenizerState::ExpNumber; - } - _ => - { - if ch.is_numeric() && ch.is_ascii() - { - *last_index = *index as i32; - token_str.push(ch); - *token = Some(Token::IntLiteral(token_str.clone())); - } - else - { - tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; - } - } - } - } - TokenizerState::Comma | TokenizerState::RoundOpen | TokenizerState::RoundClosed | - TokenizerState::CurlyOpen | TokenizerState::CurlyClosed | TokenizerState::Plus | - TokenizerState::TildeEquals | TokenizerState::EqualsEquals | TokenizerState::Hash | - TokenizerState::GtEquals | TokenizerState::LtEquals | TokenizerState::SquareOpen | - TokenizerState::SquareClosed | TokenizerState::Pipe | TokenizerState::Ampersand | - TokenizerState::Percent | TokenizerState::Star | TokenizerState::Semicolon | - TokenizerState::Caret | TokenizerState::DotDotDot | TokenizerState::GtGt | - TokenizerState::LtLt | TokenizerState::SlashSlash => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - TokenizerState::Tilde => - { - match ch - { - '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::TildeEquals), TokenizerState::TildeEquals), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::Gt => - { - match ch - { - '>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtGt), TokenizerState::GtGt), - '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtEquals), TokenizerState::GtEquals), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::Lt => - { - match ch - { - '>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtLt), TokenizerState::LtLt), - '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtEquals), TokenizerState::LtEquals), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::Slash => - { - match ch - { - '/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SlashSlash), TokenizerState::SlashSlash), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::Dot => - { - match ch - { - '.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDot), TokenizerState::DotDot), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::DotDot => - { - match ch - { - '.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDotDot), TokenizerState::DotDotDot), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::Colon => - { - match ch - { - ':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::ColonColon), TokenizerState::ColonColon), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::Equals => - { - match ch - { - '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::EqualsEquals), TokenizerState::EqualsEquals), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::Minus => - { - match ch - { - '-' => tokenize_terminal_no_str(last_index, *index, token, state, None, TokenizerState::SmallCommentStart), - _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, - } - } - TokenizerState::SmallCommentStart => - { - match ch - { - '[' => - { - *token = None; - *state = TokenizerState::BigCommentLongBracketStart; - } - '\n' => - { - *state = TokenizerState::Start; - *last_index = -1; - } - _ => - { - *state = TokenizerState::SmallComment; - } - } - } - TokenizerState::SmallComment => - { - match ch - { - '\n' => - { - *state = TokenizerState::Start; - *last_index = -1; - } - _ => { } - } - } - TokenizerState::BigCommentLongBracketStart => - { - match ch - { - '=' => - { - *long_bracket_level += 1; - } - '[' => - { - *state = TokenizerState::BigComment; - } - _ => return Err("Malformed long bracket at the beginning of a big comment"), - } - } - TokenizerState::BigComment => - { - match ch - { - ']' => - { - *state = TokenizerState::BigCommentLongBracketEnd; - } - _ => { } - } - } - TokenizerState::BigCommentLongBracketEnd => - { - match ch - { - '=' => - { - if *long_bracket_level == 0 - { - return Err("Long bracket level too big when ending big comment"); - } - *long_bracket_level -= 1; - } - ']' => - { - if *long_bracket_level != 0 - { - return Err("Long bracket level too small when ending big comment"); - } - *state = TokenizerState::Start; - } - _ => return Err("Malformed long bracket when ending big comment"), - } - } - TokenizerState::A => - { - match ch - { - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::An, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::An => - { - match ch - { - 'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::And, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::And => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::And)?, - TokenizerState::W => - { - match ch - { - 'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Wh, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Wh => - { - match ch - { - 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whi, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Whi => - { - match ch - { - 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whil, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Whil => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::While, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::While => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::While)?, - TokenizerState::B => - { - match ch - { - 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Br, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Br => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Bre, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Bre => - { - match ch - { - 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Brea, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Brea => - { - match ch - { - 'k' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Break, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Break => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Break)?, - TokenizerState::G => - { - match ch - { - 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Go, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Go => - { - match ch - { - 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Got, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Got => - { - match ch - { - 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Goto, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Goto => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Goto)?, - TokenizerState::R => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Re, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Re => - { - match ch - { - 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ret, token_str, ch), - 'p' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Rep, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Ret => - { - match ch - { - 'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retu, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Retu => - { - match ch - { - 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retur, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Retur => - { - match ch - { - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Return, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Return => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Return)?, - TokenizerState::Rep => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repe, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Repe => - { - match ch - { - 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repea, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Repea => - { - match ch - { - 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repeat, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Repeat => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Repeat)?, - TokenizerState::N => - { - match ch - { - 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ni, token_str, ch), - 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::No, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::No => - { - match ch - { - 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Not, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Not => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Not)?, - TokenizerState::Ni => - { - match ch - { - 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Nil, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Nil => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Nil)?, - TokenizerState::T => - { - match ch - { - 'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Th, token_str, ch), - 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tr, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Th => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::The, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::The => - { - match ch - { - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Then, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Then => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Then)?, - TokenizerState::Tr => - { - match ch - { - 'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tru, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Tru => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::True, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::True => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::True)?, - TokenizerState::E => - { - match ch - { - 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::El, token_str, ch), - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::En, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::En => - { - match ch - { - 'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::End, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::End => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::End)?, - TokenizerState::El => - { - match ch - { - 's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Els, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Els => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Else, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Else => - { - match ch - { - 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elsei, token_str, ch), - _ => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Else)?, - } - } - TokenizerState::Elsei => - { - match ch - { - 'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elseif, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Elseif => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Elseif)?, - TokenizerState::O => - { - match ch - { - 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Or, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Or => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Or)?, - TokenizerState::D => - { - match ch - { - 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Do, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Do => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Do)?, - TokenizerState::I => - { - match ch - { - 'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::If, token_str, ch), - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::In, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::In => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::In)?, - TokenizerState::If => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::If)?, - TokenizerState::F => - { - match ch - { - 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fa, token_str, ch), - 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fo, token_str, ch), - 'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fu, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Fu => - { - match ch - { - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fun, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Fun => - { - match ch - { - 'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Func, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Func => - { - match ch - { - 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Funct, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Funct => - { - match ch - { - 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functi, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Functi => - { - match ch - { - 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functio, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Functio => - { - match ch - { - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Function, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Function => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Function)?, - TokenizerState::Fa => - { - match ch - { - 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fal, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Fal => - { - match ch - { - 's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fals, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Fals => - { - match ch - { - 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::False, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::False => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::False)?, - TokenizerState::Fo => - { - match ch - { - 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::For, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::For => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::For)?, - TokenizerState::L => - { - match ch - { - 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Lo, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Lo => - { - match ch - { - 'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loc, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Loc => - { - match ch - { - 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loca, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Loca => - { - match ch - { - 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Local, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Local => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Local)?, - TokenizerState::U => - { - match ch - { - 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Un, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Un => - { - match ch - { - 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unt, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Unt => - { - match ch - { - 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unti, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Unti => - { - match ch - { - 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Until, token_str, ch), - _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, - } - } - TokenizerState::Until => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Until)?, - _ => todo!("State: {:?}", state), - } - return Ok(()); -} - -fn tokenize(file_content: &String) -> Result, &'static str> -{ - let mut tokens: Vec = Vec::new(); - let mut state = TokenizerState::Start; - let char_vec: Vec = file_content.chars().collect(); - - let mut last_index: i32 = -1; - let mut index = 0; - let mut token: Option = None; - let mut token_str: String = String::new(); - let mut long_bracket_level = 0; - - while index < char_vec.len() - { - let ch = char_vec[index]; - tokenize_char(&mut state, ch, &mut last_index, &mut index, &mut token, &mut token_str, &mut tokens, &mut long_bracket_level)?; - index += 1; - } - match state - { - TokenizerState::Name => tokenize_backtrack_name(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state)?, - TokenizerState::End => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::End)?, - TokenizerState::And => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::And)?, - TokenizerState::Semicolon => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::Semicolon)?, - _ => todo!("state: {:?}", state), - } - - return Ok(tokens); -} \ No newline at end of file diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..92bd163 --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,1186 @@ +#[derive(Debug, Clone)] +pub enum Token +{ + Name(String), + And, Break, Do, Else, Elseif, End, + False, For, Function, Goto, If, In, + Local, Nil, Not, Or, Repeat, Return, + Then, True, Until, While, + Plus, Minus, Star, Slash, Percent, Caret, Hash, + Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash, + EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals, + RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon, + Semicolon, Colon, Comma, Dot, DotDot, DotDotDot, + IntLiteral(String), + HexLiteral(String), + StringLiteral(String), +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum TokenizerState +{ + Start, + Quote, SingleQuote, Name, Number, Zero, + A, B, D, E, F, G, I, L, N, O, R, T, U, W, + Plus, Minus, Star, Slash, Percent, Caret, Hash, + Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, + Colon, Semicolon, Comma, Dot, + + An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh, + LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot, + SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber, + + And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi, + DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, + BigCommentLongBracketStart, SmallComment, + + Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber, + BigComment, BigCommentLongBracketEnd, + + Break, Elsei, False, Funct, Local, Repea, Retur, Until, While, + + Elseif, Functi, Repeat, Return, + + Functio, + + Function, +} + +fn tokenize_update_index_and_state(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState) +{ + *last_index = index as i32; + *state = new_state; +} +fn tokenize_terminal_no_str(last_index: &mut i32, index: usize, token: &mut Option, state: &mut TokenizerState, new_token: Option, new_state: TokenizerState) +{ + tokenize_update_index_and_state(last_index, index, state, new_state); + *token = new_token; +} +fn tokenize_terminal_no_token(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState, token_str: &mut String, ch: char) +{ + tokenize_update_index_and_state(last_index, index, state, new_state); + token_str.push(ch); +} +fn tokenize_terminal(last_index: &mut i32, index: usize, token: &mut Option, state: &mut TokenizerState, new_token: Option, new_state: TokenizerState, token_str: &mut String, ch: char) +{ + tokenize_terminal_no_str(last_index, index, token, state, new_token, new_state); + token_str.push(ch); +} +fn tokenize_backtrack(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str> +{ + return tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, token.clone().unwrap()); +} +fn tokenize_backtrack_name(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str> +{ + if *last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + *index = *last_index as usize; + *last_index = -1; + tokens.push(Token::Name(token_str.clone())); + *token = None; + token_str.clear(); + *state = TokenizerState::Start; + return Ok(()); +} +fn tokenize_backtrack_custom_token(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState, new_token: Token) -> Result<(), &'static str> +{ + if *last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + *index = *last_index as usize; + *last_index = -1; + tokens.push(new_token); + *token = None; + token_str.clear(); + *state = TokenizerState::Start; + return Ok(()); +} +fn tokenize_alphanumeric_nonstart(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState, ch: char) -> Result<(), &'static str> +{ + if ch.is_ascii_alphanumeric() || ch == '_' + { + tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name); + token_str.push(ch); + } + else + { + tokenize_backtrack_name(last_index, index, tokens, token, token_str, state)?; + } + return Ok(()); +} +fn tokenize_alphanumeric_nonstart_custom(last_index: &mut i32, index: &mut usize, tokens: &mut Vec, token: &mut Option, token_str: &mut String, state: &mut TokenizerState, ch: char, new_token: Token) -> Result<(), &'static str> +{ + if ch.is_ascii_alphanumeric() || ch == '_' + { + tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name); + token_str.push(ch); + } + else + { + tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, new_token)?; + } + return Ok(()); +} +fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, index: &mut usize, token: &mut Option, token_str: &mut String, tokens: &mut Vec, long_bracket_level: &mut u32) -> Result<(), &'static str> +{ + match state + { + TokenizerState::Start => + { + match ch + { + '-' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Minus), TokenizerState::Minus), + 'a' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("a".to_string())), TokenizerState::A, token_str, ch), + 'b' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("b".to_string())), TokenizerState::B, token_str, ch), + 'd' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("d".to_string())), TokenizerState::D, token_str, ch), + 'e' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("e".to_string())), TokenizerState::E, token_str, ch), + 'f' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("f".to_string())), TokenizerState::F, token_str, ch), + 'i' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("i".to_string())), TokenizerState::I, token_str, ch), + 'g' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("g".to_string())), TokenizerState::G, token_str, ch), + 'l' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("l".to_string())), TokenizerState::L, token_str, ch), + 'n' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("n".to_string())), TokenizerState::N, token_str, ch), + 'o' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("o".to_string())), TokenizerState::O, token_str, ch), + 'r' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("r".to_string())), TokenizerState::R, token_str, ch), + 't' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("t".to_string())), TokenizerState::T, token_str, ch), + 'u' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("u".to_string())), TokenizerState::U, token_str, ch), + 'w' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("w".to_string())), TokenizerState::W, token_str, ch), + ',' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Comma), TokenizerState::Comma), + '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Equals), TokenizerState::Equals), + '(' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundOpen), TokenizerState::RoundOpen), + ')' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundClosed), TokenizerState::RoundClosed), + '.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Dot), TokenizerState::Dot), + ':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Colon), TokenizerState::Colon), + '{' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyOpen), TokenizerState::CurlyOpen), + '}' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyClosed), TokenizerState::CurlyClosed), + '[' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareOpen), TokenizerState::SquareOpen), + ']' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareClosed), TokenizerState::SquareClosed), + '+' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Plus), TokenizerState::Plus), + '~' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Tilde), TokenizerState::Tilde), + '>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Gt), TokenizerState::Gt), + '<' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Lt), TokenizerState::Lt), + '#' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Hash), TokenizerState::Hash), + '|' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Pipe), TokenizerState::Pipe), + '&' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Ampersand), TokenizerState::Ampersand), + '%' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Percent), TokenizerState::Percent), + '*' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Star), TokenizerState::Star), + '/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Slash), TokenizerState::Slash), + ';' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Semicolon), TokenizerState::Semicolon), + '^' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Caret), TokenizerState::Caret), + '0' => tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral("0".to_string())), TokenizerState::Zero, token_str, ch), + '"' => + { + *token = None; + *state = TokenizerState::Quote; + } + '\'' => + { + *token = None; + *state = TokenizerState::SingleQuote; + } + _ => + { + if ch.is_whitespace() { } + else if ch.is_ascii_alphabetic() || ch == '_' + { + tokenize_terminal(last_index, *index, token, state, Some(Token::Name(token_str.clone())), TokenizerState::Name, token_str, ch); + } + else if ch.is_numeric() && ch.is_ascii() + { + tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral(token_str.clone())), TokenizerState::Number, token_str, ch); + } + else + { + todo!("State {:?}, Char {}", state, ch); + } + } + } + } + TokenizerState::Quote => + { + match ch + { + '\\' => + { + *state = TokenizerState::QuoteBackslash; + } + '"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String), + _ => + { + token_str.push(ch); + } + } + } + TokenizerState::QuoteBackslash => + { + match ch + { + 'a' => + { + token_str.push('\u{0007}'); + *state = TokenizerState::Quote; + } + 'b' => + { + token_str.push('\u{0008}'); + *state = TokenizerState::Quote; + } + 't' => + { + token_str.push('\t'); + *state = TokenizerState::Quote; + } + 'n' | '\n' => + { + token_str.push('\n'); + *state = TokenizerState::Quote; + } + 'v' => + { + token_str.push('\u{000b}'); + *state = TokenizerState::Quote; + } + 'f' => + { + token_str.push('\u{000c}'); + *state = TokenizerState::Quote; + } + 'r' => + { + token_str.push('\r'); + *state = TokenizerState::Quote; + } + '\\' => + { + token_str.push('\\'); + *state = TokenizerState::Quote; + } + '"' => + { + token_str.push('\"'); + *state = TokenizerState::Quote; + } + '\'' => + { + token_str.push('\''); + *state = TokenizerState::Quote; + } + 'z' => + { + *state = TokenizerState::QuoteBackslashZ; + } + _ => return Err("Unknown escape sequence"), + } + } + TokenizerState::QuoteBackslashZ => + { + match ch + { + '\\' => + { + *state = TokenizerState::QuoteBackslash; + } + '"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String), + _ => + { + if !ch.is_whitespace() + { + token_str.push(ch); + *state = TokenizerState::Quote; + } + } + } + } + TokenizerState::SingleQuote => + { + match ch + { + '\\' => + { + *state = TokenizerState::SingleQuoteBackslash; + } + '\'' => + { + *last_index = *index as i32; + *token = Some(Token::StringLiteral(token_str.clone())); + *state = TokenizerState::String; + } + _ => + { + token_str.push(ch); + } + } + } + TokenizerState::SingleQuoteBackslash => + { + match ch + { + 'a' => + { + token_str.push('\u{0007}'); + *state = TokenizerState::SingleQuote; + } + 'b' => + { + token_str.push('\u{0008}'); + *state = TokenizerState::SingleQuote; + } + 't' => + { + token_str.push('\t'); + *state = TokenizerState::SingleQuote; + } + 'n' | '\n' => + { + token_str.push('\n'); + *state = TokenizerState::SingleQuote; + } + 'v' => + { + token_str.push('\u{000b}'); + *state = TokenizerState::SingleQuote; + } + 'f' => + { + token_str.push('\u{000c}'); + *state = TokenizerState::SingleQuote; + } + 'r' => + { + token_str.push('\r'); + *state = TokenizerState::SingleQuote; + } + '\\' => + { + token_str.push('\\'); + *state = TokenizerState::SingleQuote; + } + '"' => + { + token_str.push('\"'); + *state = TokenizerState::SingleQuote; + } + '\'' => + { + token_str.push('\''); + *state = TokenizerState::SingleQuote; + } + 'z' => + { + *state = TokenizerState::SingleQuoteBackslashZ; + } + _ => return Err("Unknown escape sequence"), + } + } + TokenizerState::SingleQuoteBackslashZ => + { + match ch + { + '\\' => + { + *state = TokenizerState::SingleQuoteBackslash; + } + '\'' => + { + *last_index = *index as i32; + *token = Some(Token::StringLiteral(token_str.clone())); + *state = TokenizerState::String; + } + _ => + { + if !ch.is_whitespace() + { + token_str.push(ch); + *state = TokenizerState::SingleQuote; + } + } + } + } + TokenizerState::String => + { + let content = token_str.clone(); + tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, Token::StringLiteral(content))?; + } + TokenizerState::Name => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + TokenizerState::Zero => + { + match ch + { + 'x' => + { + token_str.push(ch); + *token = None; + *state = TokenizerState::HexNumberX; + } + _ => + { + if ch.is_numeric() && ch.is_ascii() + { + *last_index = *index as i32; + token_str.push(ch); + *token = Some(Token::IntLiteral(token_str.clone())); + } + else + { + tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; + } + } + } + } + TokenizerState::HexNumberX => + { + if ch.is_ascii() && ch.is_numeric() || match ch + { + 'A'..='F' | 'a'..='f' => true, + _ => false, + } + { + *last_index = *index as i32; + token_str.push(ch); + *token = Some(Token::HexLiteral(token_str.clone())); + *state = TokenizerState::HexNumber; + } + else + { + tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; + } + } + TokenizerState::HexNumber => + { + match ch + { + 'p' => + { + token_str.push(ch); + *token = None; + *state = TokenizerState::HexExpNumber; + } + _ => + { + if ch.is_ascii() && ch.is_numeric() || match ch + { + 'A'..='F' | 'a'..='f' => true, + _ => false, + } + { + *last_index = *index as i32; + token_str.push(ch); + *token = Some(Token::HexLiteral(token_str.clone())); + } + else + { + tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; + } + } + } + } + TokenizerState::Number => + { + match ch + { + 'e' => + { + token_str.push(ch); + *token = None; + *state = TokenizerState::ExpNumber; + } + _ => + { + if ch.is_numeric() && ch.is_ascii() + { + *last_index = *index as i32; + token_str.push(ch); + *token = Some(Token::IntLiteral(token_str.clone())); + } + else + { + tokenize_backtrack(last_index, index, tokens, token, token_str, state)?; + } + } + } + } + TokenizerState::Comma | TokenizerState::RoundOpen | TokenizerState::RoundClosed | + TokenizerState::CurlyOpen | TokenizerState::CurlyClosed | TokenizerState::Plus | + TokenizerState::TildeEquals | TokenizerState::EqualsEquals | TokenizerState::Hash | + TokenizerState::GtEquals | TokenizerState::LtEquals | TokenizerState::SquareOpen | + TokenizerState::SquareClosed | TokenizerState::Pipe | TokenizerState::Ampersand | + TokenizerState::Percent | TokenizerState::Star | TokenizerState::Semicolon | + TokenizerState::Caret | TokenizerState::DotDotDot | TokenizerState::GtGt | + TokenizerState::LtLt | TokenizerState::SlashSlash => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + TokenizerState::Tilde => + { + match ch + { + '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::TildeEquals), TokenizerState::TildeEquals), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::Gt => + { + match ch + { + '>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtGt), TokenizerState::GtGt), + '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtEquals), TokenizerState::GtEquals), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::Lt => + { + match ch + { + '>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtLt), TokenizerState::LtLt), + '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtEquals), TokenizerState::LtEquals), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::Slash => + { + match ch + { + '/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SlashSlash), TokenizerState::SlashSlash), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::Dot => + { + match ch + { + '.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDot), TokenizerState::DotDot), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::DotDot => + { + match ch + { + '.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDotDot), TokenizerState::DotDotDot), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::Colon => + { + match ch + { + ':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::ColonColon), TokenizerState::ColonColon), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::Equals => + { + match ch + { + '=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::EqualsEquals), TokenizerState::EqualsEquals), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::Minus => + { + match ch + { + '-' => tokenize_terminal_no_str(last_index, *index, token, state, None, TokenizerState::SmallCommentStart), + _ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?, + } + } + TokenizerState::SmallCommentStart => + { + match ch + { + '[' => + { + *token = None; + *state = TokenizerState::BigCommentLongBracketStart; + } + '\n' => + { + *state = TokenizerState::Start; + *last_index = -1; + } + _ => + { + *state = TokenizerState::SmallComment; + } + } + } + TokenizerState::SmallComment => + { + match ch + { + '\n' => + { + *state = TokenizerState::Start; + *last_index = -1; + } + _ => { } + } + } + TokenizerState::BigCommentLongBracketStart => + { + match ch + { + '=' => + { + *long_bracket_level += 1; + } + '[' => + { + *state = TokenizerState::BigComment; + } + _ => return Err("Malformed long bracket at the beginning of a big comment"), + } + } + TokenizerState::BigComment => + { + match ch + { + ']' => + { + *state = TokenizerState::BigCommentLongBracketEnd; + } + _ => { } + } + } + TokenizerState::BigCommentLongBracketEnd => + { + match ch + { + '=' => + { + if *long_bracket_level == 0 + { + return Err("Long bracket level too big when ending big comment"); + } + *long_bracket_level -= 1; + } + ']' => + { + if *long_bracket_level != 0 + { + return Err("Long bracket level too small when ending big comment"); + } + *state = TokenizerState::Start; + } + _ => return Err("Malformed long bracket when ending big comment"), + } + } + TokenizerState::A => + { + match ch + { + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::An, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::An => + { + match ch + { + 'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::And, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::And => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::And)?, + TokenizerState::W => + { + match ch + { + 'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Wh, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Wh => + { + match ch + { + 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whi, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Whi => + { + match ch + { + 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whil, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Whil => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::While, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::While => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::While)?, + TokenizerState::B => + { + match ch + { + 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Br, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Br => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Bre, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Bre => + { + match ch + { + 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Brea, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Brea => + { + match ch + { + 'k' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Break, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Break => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Break)?, + TokenizerState::G => + { + match ch + { + 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Go, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Go => + { + match ch + { + 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Got, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Got => + { + match ch + { + 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Goto, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Goto => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Goto)?, + TokenizerState::R => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Re, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Re => + { + match ch + { + 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ret, token_str, ch), + 'p' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Rep, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Ret => + { + match ch + { + 'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retu, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Retu => + { + match ch + { + 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retur, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Retur => + { + match ch + { + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Return, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Return => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Return)?, + TokenizerState::Rep => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repe, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Repe => + { + match ch + { + 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repea, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Repea => + { + match ch + { + 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repeat, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Repeat => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Repeat)?, + TokenizerState::N => + { + match ch + { + 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ni, token_str, ch), + 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::No, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::No => + { + match ch + { + 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Not, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Not => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Not)?, + TokenizerState::Ni => + { + match ch + { + 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Nil, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Nil => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Nil)?, + TokenizerState::T => + { + match ch + { + 'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Th, token_str, ch), + 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tr, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Th => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::The, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::The => + { + match ch + { + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Then, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Then => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Then)?, + TokenizerState::Tr => + { + match ch + { + 'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tru, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Tru => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::True, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::True => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::True)?, + TokenizerState::E => + { + match ch + { + 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::El, token_str, ch), + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::En, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::En => + { + match ch + { + 'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::End, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::End => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::End)?, + TokenizerState::El => + { + match ch + { + 's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Els, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Els => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Else, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Else => + { + match ch + { + 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elsei, token_str, ch), + _ => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Else)?, + } + } + TokenizerState::Elsei => + { + match ch + { + 'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elseif, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Elseif => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Elseif)?, + TokenizerState::O => + { + match ch + { + 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Or, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Or => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Or)?, + TokenizerState::D => + { + match ch + { + 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Do, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Do => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Do)?, + TokenizerState::I => + { + match ch + { + 'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::If, token_str, ch), + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::In, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::In => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::In)?, + TokenizerState::If => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::If)?, + TokenizerState::F => + { + match ch + { + 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fa, token_str, ch), + 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fo, token_str, ch), + 'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fu, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Fu => + { + match ch + { + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fun, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Fun => + { + match ch + { + 'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Func, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Func => + { + match ch + { + 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Funct, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Funct => + { + match ch + { + 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functi, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Functi => + { + match ch + { + 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functio, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Functio => + { + match ch + { + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Function, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Function => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Function)?, + TokenizerState::Fa => + { + match ch + { + 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fal, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Fal => + { + match ch + { + 's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fals, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Fals => + { + match ch + { + 'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::False, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::False => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::False)?, + TokenizerState::Fo => + { + match ch + { + 'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::For, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::For => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::For)?, + TokenizerState::L => + { + match ch + { + 'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Lo, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Lo => + { + match ch + { + 'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loc, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Loc => + { + match ch + { + 'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loca, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Loca => + { + match ch + { + 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Local, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Local => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Local)?, + TokenizerState::U => + { + match ch + { + 'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Un, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Un => + { + match ch + { + 't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unt, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Unt => + { + match ch + { + 'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unti, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Unti => + { + match ch + { + 'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Until, token_str, ch), + _ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?, + } + } + TokenizerState::Until => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Until)?, + _ => todo!("State: {:?}", state), + } + return Ok(()); +} + +pub fn tokenize(file_content: &String) -> Result, &'static str> +{ + let mut tokens: Vec = Vec::new(); + let mut state = TokenizerState::Start; + let char_vec: Vec = file_content.chars().collect(); + + let mut last_index: i32 = -1; + let mut index = 0; + let mut token: Option = None; + let mut token_str: String = String::new(); + let mut long_bracket_level = 0; + + while index < char_vec.len() + { + let ch = char_vec[index]; + tokenize_char(&mut state, ch, &mut last_index, &mut index, &mut token, &mut token_str, &mut tokens, &mut long_bracket_level)?; + index += 1; + } + match state + { + TokenizerState::Name => tokenize_backtrack_name(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state)?, + TokenizerState::End => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::End)?, + TokenizerState::And => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::And)?, + TokenizerState::Semicolon => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::Semicolon)?, + _ => todo!("state: {:?}", state), + } + + return Ok(tokens); +} \ No newline at end of file