From 9976ef9fe92a805da8735821205d4ac33588132d Mon Sep 17 00:00:00 2001 From: 0x4261756D <–38735823+0x4261756D@users.noreply.github.com> Date: Tue, 29 Nov 2022 02:04:01 +0100 Subject: [PATCH] Initial commit, barebones tokenizer working --- .gitignore | 1 + Cargo.lock | 7 +++ Cargo.toml | 8 ++++ src/main.rs | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++ test.qbl | 1 + 5 files changed, 144 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs create mode 100644 test.qbl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..a86cb12 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "kurz" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..bfed637 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "kurz" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e8ee900 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,127 @@ +use std::env; +use std::fs; + +#[derive(Debug)] +enum Token +{ + StringLit(String, i32, i32), + Intrinsic(String, i32, i32), +} + +fn main() +{ + let args: Vec = env::args().collect(); + match args[1].as_str() + { + "-c" | "--compile" => + { + let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); + let tokens: Vec = tokenize(&file_content); + println!("{:?}", tokens); + } + _ => panic!("Unknown option") + } +} + +fn tokenize(text: &str) -> Vec +{ + let mut tokens: Vec = Vec::new(); + let mut line = 1; + let mut col = 1; + let mut state = TokenizerState::Whitespace; + let mut word = String::new(); + for ch in text.chars() + { + match state + { + TokenizerState::Whitespace => + { + // If ch is whitespace, do nothing + if !ch.is_whitespace() + { + match ch + { + '"' => + { + state = TokenizerState::Quote; + } + _ => + { + state = TokenizerState::Rest; + word.push(ch); + } + } + } + } + TokenizerState::Quote => + { + if ch == '"' + { + state = TokenizerState::Whitespace; + tokens.push(Token::StringLit(word.clone(), line, col)); + word.clear(); + } + else + { + word.push(ch); + } + } + TokenizerState::Rest => + { + if ch.is_whitespace() + { + state = TokenizerState::Whitespace; + let token: Token = match word.as_str() + { + "print" => Token::Intrinsic(word.clone(), line, col), + _ => todo!("Unknown word {}", word) + }; + tokens.push(token); + } + else + { + match ch + { + '"' => panic!("Having '\"' in the middle of a word is not allowed"), + _ => + { + word.push(ch); + } + } + } + } + } + col += 1; + if ch == '\n' + { + col = 1; + line += 1; + } + } + match state + { + TokenizerState::Quote => + { + panic!("Encountered EOF before closing string"); + } + TokenizerState::Whitespace => {}, + TokenizerState::Rest => + { + //TODO: extract this as it is duplicate work with Rest handling in the loop + let token: Token = match word.as_str() + { + "print" => Token::Intrinsic(word.clone(), line, col), + _ => todo!("Unknown word {}", word) + }; + tokens.push(token); + } + } + tokens +} + +enum TokenizerState +{ + Whitespace, + Quote, + Rest, +} \ No newline at end of file diff --git a/test.qbl b/test.qbl new file mode 100644 index 0000000..26a6292 --- /dev/null +++ b/test.qbl @@ -0,0 +1 @@ +"Hello, World!\n" print \ No newline at end of file