Compare commits

...

3 Commits

Author SHA1 Message Date
0x4261756D
1e8322cc7c Merge branch 'main' of https://gittea.dev/0x4261756D/kurz 2022-12-14 01:48:14 +01:00
0x4261756D
6a2bc25eaf Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00
0x4261756D
9976ef9fe9 Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00
5 changed files with 332 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "kurz"
version = "0.1.0"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "kurz"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

306
src/main.rs Normal file
View File

@ -0,0 +1,306 @@
use core::panic;
use std::env;
use std::fs;
use std::process::exit;
#[derive(Debug, Clone, PartialEq)]
enum Token
{
StringLit(String, i32, i32),
IntLit(i64, i32, i32),
Keyword(String, i32, i32),
}
enum TokenizerState
{
Whitespace,
Quote,
Keyword,
Comment,
}
#[derive(Debug,Clone,Copy)]
enum Datatype
{
Int,
String,
Pointer,
Any,
}
#[derive(Debug)]
struct Function
{
name: String,
ins: Vec<Datatype>,
outs: Vec<Datatype>,
content: Vec<Token>
}
fn main()
{
let args: Vec<String> = env::args().collect();
if args.len() < 2
{
usage()
}
match args[1].as_str()
{
"-c" | "--compile" =>
{
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
let mut tokens: Vec<Token> = tokenize(&file_content);
println!("{:?}", tokens);
let functions: Vec<Function> = extract_functions(&mut tokens);
println!("{:?}", tokens);
println!("{:?}", functions);
}
_ => panic!("Unknown option {}", args[1])
}
}
fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
{
let mut functions: Vec<Function> = Vec::new();
let mut state = FunctionExtractionState::Outside;
let mut ins: Vec<Datatype> = Vec::new();
let mut outs: Vec<Datatype> = Vec::new();
let mut function_name = String::from("");
let mut content: Vec<Token> = Vec::new();
let mut indices_to_remove: Vec<usize> = Vec::new();
for (i, token) in tokens.iter().enumerate()
{
match state
{
FunctionExtractionState::Outside =>
{
if let Token::Keyword(name, _, _) = token
{
if name == &String::from("function")
{
state = FunctionExtractionState::Ins;
}
}
}
FunctionExtractionState::Ins =>
{
match token
{
Token::Keyword(name, line, col) =>
{
match name.as_str()
{
"int" => ins.push(Datatype::Int),
"str" => ins.push(Datatype::String),
"ptr" => ins.push(Datatype::Pointer),
"any" => ins.push(Datatype::Any),
"=>" => state = FunctionExtractionState::Outs,
_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col)
}
},
Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
}
}
FunctionExtractionState::Outs =>
{
match token
{
Token::Keyword(name, _, _) =>
{
match name.as_str()
{
"int" => outs.push(Datatype::Int),
"str" => outs.push(Datatype::String),
"ptr" => outs.push(Datatype::Pointer),
"any" => outs.push(Datatype::Any),
_ =>
{
if let Token::Keyword(name, _, _) = token
{
if functions.iter().any(|x| &x.name == name)
{
panic!("A function with name {} already exists", name);
}
function_name = name.clone();
}
else
{
panic!("Expected a function name") // TODO: Add location
}
state =FunctionExtractionState::OpenCurly;
}
}
},
Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
}
}
FunctionExtractionState::OpenCurly =>
{
if let Token::Keyword(name, line, col) = token
{
if name == "{"
{
state = FunctionExtractionState::Body
}
else
{
panic!("Expected '{{' to open the function's body at {}:{}", line, col)
}
}
else
{
panic!("Expected '{{' to open the function's body") // TODO: Add location
}
}
FunctionExtractionState::Body =>
{
if let Token::Keyword(name, _, _) = token
{
if name == "}"
{
state = FunctionExtractionState::Outside;
functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
function_name.clear();
ins.clear();
outs.clear();
content.clear();
indices_to_remove.push(i);
continue;
}
}
content.push(token.clone());
}
}
if state != FunctionExtractionState::Outside
{
indices_to_remove.push(i);
}
}
indices_to_remove.reverse();
for i in indices_to_remove
{
tokens.remove(i);
}
return functions;
}
#[derive(Debug, PartialEq)]
enum FunctionExtractionState
{
Outside,
Ins,
Outs,
OpenCurly,
Body,
}
fn usage()
{
println!("Usage: kurz -c path/to/file");
exit(0);
}
fn tokenize(text: &str) -> Vec<Token>
{
let mut tokens: Vec<Token> = Vec::new();
let mut line = 1;
let mut col = 1;
let mut state = TokenizerState::Whitespace;
let mut word = String::new();
let mut iter = text.chars().peekable();
while let Some(ch) = iter.next()
{
if ch == '/' && iter.peek() == Some(&'/')
{
state = TokenizerState::Comment;
}
match state
{
TokenizerState::Comment =>
{
if ch == '\n'
{
state = TokenizerState::Whitespace;
}
}
TokenizerState::Whitespace =>
{
// If ch is whitespace, do nothing
if !ch.is_whitespace()
{
match ch
{
'"' =>
{
state = TokenizerState::Quote;
}
_ =>
{
state = TokenizerState::Keyword;
word.push(ch);
}
}
}
}
TokenizerState::Quote =>
{
if ch == '"'
{
state = TokenizerState::Whitespace;
tokens.push(Token::StringLit(word.clone(), line, col));
word.clear();
}
else
{
word.push(ch);
}
}
TokenizerState::Keyword =>
{
if ch.is_whitespace()
{
state = TokenizerState::Whitespace;
if let Ok(number) = word.parse::<i64>()
{
tokens.push(Token::IntLit(number, line, col));
}
else
{
tokens.push(Token::Keyword(word.clone(), line, col));
}
word.clear();
}
else
{
match ch
{
'"' => panic!("Having '\"' in the middle of a word is not allowed"),
_ =>
{
word.push(ch);
}
}
}
}
}
col += 1;
if ch == '\n'
{
col = 1;
line += 1;
}
}
match state
{
TokenizerState::Quote =>
{
panic!("Encountered EOF before closing string");
}
TokenizerState::Whitespace | TokenizerState::Comment => {},
TokenizerState::Keyword =>
{
tokens.push(Token::Keyword(word.clone(), line, col));
}
}
tokens
}

10
test.qbl Normal file
View File

@ -0,0 +1,10 @@
"Hello, World!\n" print 43 foo foo deq
// Dequeues, enqueues 42 and 17, prints the head
function any => int foo
{
deq 42 17 print
}
"test2" print