kurz/src/main.rs

127 lines
2.2 KiB
Rust

use std::env;
use std::fs;
#[derive(Debug)]
enum Token
{
StringLit(String, i32, i32),
Intrinsic(String, i32, i32),
}
fn main()
{
let args: Vec<String> = env::args().collect();
match args[1].as_str()
{
"-c" | "--compile" =>
{
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
let tokens: Vec<Token> = tokenize(&file_content);
println!("{:?}", tokens);
}
_ => panic!("Unknown option")
}
}
fn tokenize(text: &str) -> Vec<Token>
{
let mut tokens: Vec<Token> = Vec::new();
let mut line = 1;
let mut col = 1;
let mut state = TokenizerState::Whitespace;
let mut word = String::new();
for ch in text.chars()
{
match state
{
TokenizerState::Whitespace =>
{
// If ch is whitespace, do nothing
if !ch.is_whitespace()
{
match ch
{
'"' =>
{
state = TokenizerState::Quote;
}
_ =>
{
state = TokenizerState::Rest;
word.push(ch);
}
}
}
}
TokenizerState::Quote =>
{
if ch == '"'
{
state = TokenizerState::Whitespace;
tokens.push(Token::StringLit(word.clone(), line, col));
word.clear();
}
else
{
word.push(ch);
}
}
TokenizerState::Rest =>
{
if ch.is_whitespace()
{
state = TokenizerState::Whitespace;
let token: Token = match word.as_str()
{
"print" => Token::Intrinsic(word.clone(), line, col),
_ => todo!("Unknown word {}", word)
};
tokens.push(token);
}
else
{
match ch
{
'"' => panic!("Having '\"' in the middle of a word is not allowed"),
_ =>
{
word.push(ch);
}
}
}
}
}
col += 1;
if ch == '\n'
{
col = 1;
line += 1;
}
}
match state
{
TokenizerState::Quote =>
{
panic!("Encountered EOF before closing string");
}
TokenizerState::Whitespace => {},
TokenizerState::Rest =>
{
//TODO: extract this as it is duplicate work with Rest handling in the loop
let token: Token = match word.as_str()
{
"print" => Token::Intrinsic(word.clone(), line, col),
_ => todo!("Unknown word {}", word)
};
tokens.push(token);
}
}
tokens
}
enum TokenizerState
{
Whitespace,
Quote,
Rest,
}