diff --git a/src/main.rs b/src/main.rs index b741640..69154fd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -43,7 +43,7 @@ enum Token enum TokenizerState { Start, - Quote, Name, Number, Zero, + Quote, SingleQuote, Name, Number, Zero, A, B, D, E, F, G, I, L, N, O, R, T, U, W, Plus, Minus, Star, Slash, Percent, Caret, Hash, Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, @@ -51,10 +51,10 @@ enum TokenizerState An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh, LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot, - SmallComment, QuoteBackslash, String, HexNumberX, ExpNumber, + SmallComment, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber, And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi, - DotDotDot, HexNumber, + DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, BigComment, Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber, @@ -240,6 +240,18 @@ fn tokenize(file_content: &String) -> Result, &'static str> token = Some(Token::CurlyClosed); state = TokenizerState::CurlyClosed; } + '[' => + { + last_index = index as i32; + token = Some(Token::SquareOpen); + state = TokenizerState::SquareOpen; + } + ']' => + { + last_index = index as i32; + token = Some(Token::SquareClosed); + state = TokenizerState::SquareClosed; + } '+' => { last_index = index as i32; @@ -277,10 +289,20 @@ fn tokenize(file_content: &String) -> Result, &'static str> token_str.push(ch); state = TokenizerState::Zero; } + '"' => + { + token = None; + state = TokenizerState::Quote; + } + '\'' => + { + token = None; + state = TokenizerState::SingleQuote; + } _ => { if ch.is_whitespace() { } - else if ch.is_ascii_alphabetic() + else if ch.is_ascii_alphabetic() || ch == '_' { last_index = index as i32; token_str.push(ch); @@ -301,6 +323,230 @@ fn tokenize(file_content: &String) -> Result, &'static str> } } } + TokenizerState::Quote => + { + match ch + { + '\\' => + { + state = TokenizerState::QuoteBackslash; + } + '"' => + { + last_index = index as i32; + token = Some(Token::StringLiteral(token_str.clone())); + state = TokenizerState::String; + } + _ => + { + token_str.push(ch); + } + } + } + TokenizerState::QuoteBackslash => + { + match ch + { + 'a' => + { + token_str.push('\u{0007}'); + state = TokenizerState::Quote; + } + 'b' => + { + token_str.push('\u{0008}'); + state = TokenizerState::Quote; + } + 't' => + { + token_str.push('\t'); + state = TokenizerState::Quote; + } + 'n' | '\n' => + { + token_str.push('\n'); + state = TokenizerState::Quote; + } + 'v' => + { + token_str.push('\u{000b}'); + state = TokenizerState::Quote; + } + 'f' => + { + token_str.push('\u{000c}'); + state = TokenizerState::Quote; + } + 'r' => + { + token_str.push('\r'); + state = TokenizerState::Quote; + } + '\\' => + { + token_str.push('\\'); + state = TokenizerState::Quote; + } + '"' => + { + token_str.push('\"'); + state = TokenizerState::Quote; + } + '\'' => + { + token_str.push('\''); + state = TokenizerState::Quote; + } + 'z' => + { + state = TokenizerState::QuoteBackslashZ; + } + _ => return Err("Unknown escape sequence"), + } + } + TokenizerState::QuoteBackslashZ => + { + match ch + { + '\\' => + { + state = TokenizerState::QuoteBackslash; + } + '"' => + { + last_index = index as i32; + token = Some(Token::StringLiteral(token_str.clone())); + state = TokenizerState::String; + } + _ => + { + if !ch.is_whitespace() + { + token_str.push(ch); + state = TokenizerState::Quote; + } + } + } + } + TokenizerState::SingleQuote => + { + match ch + { + '\\' => + { + state = TokenizerState::SingleQuoteBackslash; + } + '\'' => + { + last_index = index as i32; + token = Some(Token::StringLiteral(token_str.clone())); + state = TokenizerState::String; + } + _ => + { + token_str.push(ch); + } + } + } + TokenizerState::SingleQuoteBackslash => + { + match ch + { + 'a' => + { + token_str.push('\u{0007}'); + state = TokenizerState::SingleQuote; + } + 'b' => + { + token_str.push('\u{0008}'); + state = TokenizerState::SingleQuote; + } + 't' => + { + token_str.push('\t'); + state = TokenizerState::SingleQuote; + } + 'n' | '\n' => + { + token_str.push('\n'); + state = TokenizerState::SingleQuote; + } + 'v' => + { + token_str.push('\u{000b}'); + state = TokenizerState::SingleQuote; + } + 'f' => + { + token_str.push('\u{000c}'); + state = TokenizerState::SingleQuote; + } + 'r' => + { + token_str.push('\r'); + state = TokenizerState::SingleQuote; + } + '\\' => + { + token_str.push('\\'); + state = TokenizerState::SingleQuote; + } + '"' => + { + token_str.push('\"'); + state = TokenizerState::SingleQuote; + } + '\'' => + { + token_str.push('\''); + state = TokenizerState::SingleQuote; + } + 'z' => + { + state = TokenizerState::SingleQuoteBackslashZ; + } + _ => return Err("Unknown escape sequence"), + } + } + TokenizerState::SingleQuoteBackslashZ => + { + match ch + { + '\\' => + { + state = TokenizerState::SingleQuoteBackslash; + } + '\'' => + { + last_index = index as i32; + token = Some(Token::StringLiteral(token_str.clone())); + state = TokenizerState::String; + } + _ => + { + if !ch.is_whitespace() + { + token_str.push(ch); + state = TokenizerState::SingleQuote; + } + } + } + } + TokenizerState::String => + { + if last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + index = last_index as usize; + last_index = -1; + token = None; + tokens.push(Token::StringLiteral(token_str.clone())); + token_str.clear(); + state = TokenizerState::Start; + } TokenizerState::Name => { if ch.is_ascii_alphanumeric() || ch == '_' @@ -330,7 +576,6 @@ fn tokenize(file_content: &String) -> Result, &'static str> { 'x' => { - last_index = index as i32; token_str.push(ch); token = None; state = TokenizerState::HexNumberX; @@ -394,7 +639,6 @@ fn tokenize(file_content: &String) -> Result, &'static str> { 'p' => { - last_index = index as i32; token_str.push(ch); token = None; state = TokenizerState::HexExpNumber; @@ -434,7 +678,6 @@ fn tokenize(file_content: &String) -> Result, &'static str> { 'e' => { - last_index = index as i32; token_str.push(ch); token = None; state = TokenizerState::ExpNumber; @@ -467,7 +710,7 @@ fn tokenize(file_content: &String) -> Result, &'static str> TokenizerState::Comma | TokenizerState::RoundOpen | TokenizerState::RoundClosed | TokenizerState::CurlyOpen | TokenizerState::CurlyClosed | TokenizerState::Plus | TokenizerState::TildeEquals | TokenizerState::EqualsEquals | TokenizerState::Hash | - TokenizerState::GtEquals | TokenizerState::LtEquals => + TokenizerState::GtEquals | TokenizerState::LtEquals | TokenizerState::SquareOpen | TokenizerState::SquareClosed => { if last_index == -1 || token.is_none() { @@ -597,6 +840,46 @@ fn tokenize(file_content: &String) -> Result, &'static str> } } } + TokenizerState::DotDot => + { + match ch + { + '.' => + { + last_index = index as i32; + token = Some(Token::DotDotDot); + state = TokenizerState::DotDotDot; + } + _ => + { + if last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + index = last_index as usize; + last_index = -1; + tokens.push(token.clone().unwrap()); + token = None; + token_str.clear(); + state = TokenizerState::Start; + } + } + } + TokenizerState::DotDotDot => + { + if last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + index = last_index as usize; + last_index = -1; + token = None; + token_str.clear(); + state = TokenizerState::Start; + tokens.push(Token::And); + } TokenizerState::Colon => { match ch @@ -2818,6 +3101,67 @@ fn tokenize(file_content: &String) -> Result, &'static str> tokens.push(Token::False); } } + TokenizerState::Fo => + { + match ch + { + 'r' => + { + last_index = index as i32; + token = Some(Token::Name("for".to_string())); + token_str.push(ch); + state = TokenizerState::For; + } + _ => + { + if ch.is_ascii_alphanumeric() || ch == '_' + { + last_index = index as i32; + token_str.push(ch); + token = Some(Token::Name(token_str.clone())); + state = TokenizerState::Name; + } + else + { + if last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + index = last_index as usize; + last_index = -1; + tokens.push(token.unwrap().clone()); + token = None; + token_str.clear(); + state = TokenizerState::Start; + } + } + } + } + TokenizerState::For => + { + if ch.is_ascii_alphanumeric() || ch == '_' + { + last_index = index as i32; + token_str.push(ch); + token = Some(Token::Name(token_str.clone())); + state = TokenizerState::Name; + } + else + { + if last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + index = last_index as usize; + last_index = -1; + token = None; + token_str.clear(); + state = TokenizerState::Start; + tokens.push(Token::For); + } + } TokenizerState::L => { match ch @@ -2866,7 +3210,30 @@ fn tokenize(file_content: &String) -> Result, &'static str> token_str.push(ch); state = TokenizerState::Loc; } - _ => todo!("State {:?}, Char {}", state, ch) + _ => + { + if ch.is_ascii_alphanumeric() || ch == '_' + { + last_index = index as i32; + token_str.push(ch); + token = Some(Token::Name(token_str.clone())); + state = TokenizerState::Name; + } + else + { + if last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + index = last_index as usize; + last_index = -1; + tokens.push(token.unwrap().clone()); + token = None; + token_str.clear(); + state = TokenizerState::Start; + } + } } } TokenizerState::Loc => @@ -2917,7 +3284,30 @@ fn tokenize(file_content: &String) -> Result, &'static str> token_str.push(ch); state = TokenizerState::Local; } - _ => todo!("State {:?}, Char {}", state, ch) + _ => + { + if ch.is_ascii_alphanumeric() || ch == '_' + { + last_index = index as i32; + token_str.push(ch); + token = Some(Token::Name(token_str.clone())); + state = TokenizerState::Name; + } + else + { + if last_index == -1 || token.is_none() + { + println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens); + return Err("Lexerr"); + } + index = last_index as usize; + last_index = -1; + tokens.push(token.unwrap().clone()); + token = None; + token_str.clear(); + state = TokenizerState::Start; + } + } } } TokenizerState::Local =>