kurz/src/main.rs

use std::env;
use std::fs;

#[derive(Debug)]
enum Token
{
	StringLit(String, i32, i32),
	Intrinsic(String, i32, i32),
}

fn main()
{
	let args: Vec<String> = env::args().collect();
	match args[1].as_str()
	{
		"-c" | "--compile" =>
		{
			let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
			let tokens: Vec<Token> = tokenize(&file_content);
			println!("{:?}", tokens);
		}
		_ => panic!("Unknown option")
	}
}

fn tokenize(text: &str) -> Vec<Token>
{
	let mut tokens: Vec<Token> = Vec::new();
	let mut line = 1;
	let mut col = 1;
	let mut state = TokenizerState::Whitespace;
	let mut word = String::new();
	for ch in text.chars()
	{
		match state
		{
			TokenizerState::Whitespace =>
			{
				// If ch is whitespace, do nothing
				if !ch.is_whitespace()
				{
					match ch
					{
						'"' =>
						{
							state = TokenizerState::Quote;
						}
						_ =>
						{
							state = TokenizerState::Rest;
							word.push(ch);
						}
					}
				}
			}
			TokenizerState::Quote =>
			{
				if ch == '"'
				{
					state = TokenizerState::Whitespace;
					tokens.push(Token::StringLit(word.clone(), line, col));
					word.clear();
				}
				else
				{
					word.push(ch);
				}
			}
			TokenizerState::Rest =>
			{
				if ch.is_whitespace()
				{
					state = TokenizerState::Whitespace;
					let token: Token = match word.as_str()
					{
						"print" => Token::Intrinsic(word.clone(), line, col),
						_ => todo!("Unknown word {}", word)
					};
					tokens.push(token);
				}
				else
				{
					match ch
					{
						'"' => panic!("Having '\"' in the middle of a word is not allowed"),
						_ =>
						{
							word.push(ch);
						}
					}
				}
			}
		}
		col += 1;
		if ch == '\n'
		{
			col = 1;
			line += 1;
		}
	}
	match state
	{
		TokenizerState::Quote =>
		{
			panic!("Encountered EOF before closing string");
		}
		TokenizerState::Whitespace => {},
		TokenizerState::Rest =>
		{
			//TODO: extract this as it is duplicate work with Rest handling in the loop
			let token: Token = match word.as_str()
			{
				"print" => Token::Intrinsic(word.clone(), line, col),
				_ => todo!("Unknown word {}", word)
			};
			tokens.push(token);
		}
	}
	tokens
}

enum TokenizerState
{
	Whitespace,
	Quote,
	Rest,
}