use std::env; use std::fs; #[derive(Debug)] enum Token { StringLit(String, i32, i32), Intrinsic(String, i32, i32), } fn main() { let args: Vec = env::args().collect(); match args[1].as_str() { "-c" | "--compile" => { let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); let tokens: Vec = tokenize(&file_content); println!("{:?}", tokens); } _ => panic!("Unknown option") } } fn tokenize(text: &str) -> Vec { let mut tokens: Vec = Vec::new(); let mut line = 1; let mut col = 1; let mut state = TokenizerState::Whitespace; let mut word = String::new(); for ch in text.chars() { match state { TokenizerState::Whitespace => { // If ch is whitespace, do nothing if !ch.is_whitespace() { match ch { '"' => { state = TokenizerState::Quote; } _ => { state = TokenizerState::Rest; word.push(ch); } } } } TokenizerState::Quote => { if ch == '"' { state = TokenizerState::Whitespace; tokens.push(Token::StringLit(word.clone(), line, col)); word.clear(); } else { word.push(ch); } } TokenizerState::Rest => { if ch.is_whitespace() { state = TokenizerState::Whitespace; let token: Token = match word.as_str() { "print" => Token::Intrinsic(word.clone(), line, col), _ => todo!("Unknown word {}", word) }; tokens.push(token); } else { match ch { '"' => panic!("Having '\"' in the middle of a word is not allowed"), _ => { word.push(ch); } } } } } col += 1; if ch == '\n' { col = 1; line += 1; } } match state { TokenizerState::Quote => { panic!("Encountered EOF before closing string"); } TokenizerState::Whitespace => {}, TokenizerState::Rest => { //TODO: extract this as it is duplicate work with Rest handling in the loop let token: Token = match word.as_str() { "print" => Token::Intrinsic(word.clone(), line, col), _ => todo!("Unknown word {}", word) }; tokens.push(token); } } tokens } enum TokenizerState { Whitespace, Quote, Rest, }