Initial commit, barebones tokenizer working

This commit is contained in:
0x4261756D 2022-11-29 02:04:01 +01:00
commit 9976ef9fe9
5 changed files with 144 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "kurz"
version = "0.1.0"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "kurz"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

127
src/main.rs Normal file
View File

@ -0,0 +1,127 @@
use std::env;
use std::fs;
#[derive(Debug)]
enum Token
{
StringLit(String, i32, i32),
Intrinsic(String, i32, i32),
}
fn main()
{
let args: Vec<String> = env::args().collect();
match args[1].as_str()
{
"-c" | "--compile" =>
{
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
let tokens: Vec<Token> = tokenize(&file_content);
println!("{:?}", tokens);
}
_ => panic!("Unknown option")
}
}
fn tokenize(text: &str) -> Vec<Token>
{
let mut tokens: Vec<Token> = Vec::new();
let mut line = 1;
let mut col = 1;
let mut state = TokenizerState::Whitespace;
let mut word = String::new();
for ch in text.chars()
{
match state
{
TokenizerState::Whitespace =>
{
// If ch is whitespace, do nothing
if !ch.is_whitespace()
{
match ch
{
'"' =>
{
state = TokenizerState::Quote;
}
_ =>
{
state = TokenizerState::Rest;
word.push(ch);
}
}
}
}
TokenizerState::Quote =>
{
if ch == '"'
{
state = TokenizerState::Whitespace;
tokens.push(Token::StringLit(word.clone(), line, col));
word.clear();
}
else
{
word.push(ch);
}
}
TokenizerState::Rest =>
{
if ch.is_whitespace()
{
state = TokenizerState::Whitespace;
let token: Token = match word.as_str()
{
"print" => Token::Intrinsic(word.clone(), line, col),
_ => todo!("Unknown word {}", word)
};
tokens.push(token);
}
else
{
match ch
{
'"' => panic!("Having '\"' in the middle of a word is not allowed"),
_ =>
{
word.push(ch);
}
}
}
}
}
col += 1;
if ch == '\n'
{
col = 1;
line += 1;
}
}
match state
{
TokenizerState::Quote =>
{
panic!("Encountered EOF before closing string");
}
TokenizerState::Whitespace => {},
TokenizerState::Rest =>
{
//TODO: extract this as it is duplicate work with Rest handling in the loop
let token: Token = match word.as_str()
{
"print" => Token::Intrinsic(word.clone(), line, col),
_ => todo!("Unknown word {}", word)
};
tokens.push(token);
}
}
tokens
}
enum TokenizerState
{
Whitespace,
Quote,
Rest,
}

1
test.qbl Normal file
View File

@ -0,0 +1 @@
"Hello, World!\n" print