2022-12-05 00:38:20 +01:00
|
|
|
use core::panic;
|
2022-12-14 04:12:03 +01:00
|
|
|
use std::collections::HashMap;
|
2022-11-29 02:04:01 +01:00
|
|
|
use std::env;
|
|
|
|
use std::fs;
|
2022-12-05 00:38:20 +01:00
|
|
|
use std::process::exit;
|
2022-11-29 02:04:01 +01:00
|
|
|
|
2022-12-05 00:38:20 +01:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2022-11-29 02:04:01 +01:00
|
|
|
enum Token
|
|
|
|
{
|
|
|
|
StringLit(String, i32, i32),
|
2022-12-05 00:38:20 +01:00
|
|
|
IntLit(i64, i32, i32),
|
|
|
|
Keyword(String, i32, i32),
|
|
|
|
}
|
2022-12-14 04:12:03 +01:00
|
|
|
|
2022-12-05 00:38:20 +01:00
|
|
|
enum TokenizerState
|
|
|
|
{
|
|
|
|
Whitespace,
|
|
|
|
Quote,
|
|
|
|
Keyword,
|
|
|
|
Comment,
|
|
|
|
}
|
|
|
|
|
2022-12-14 04:12:03 +01:00
|
|
|
#[derive(Debug,Clone,Copy,PartialEq)]
|
2022-12-05 00:38:20 +01:00
|
|
|
enum Datatype
|
|
|
|
{
|
|
|
|
Int,
|
|
|
|
String,
|
|
|
|
Pointer,
|
|
|
|
Any,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
struct Function
|
|
|
|
{
|
|
|
|
name: String,
|
|
|
|
ins: Vec<Datatype>,
|
|
|
|
outs: Vec<Datatype>,
|
|
|
|
content: Vec<Token>
|
2022-11-29 02:04:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fn main()
|
|
|
|
{
|
2022-12-14 04:12:03 +01:00
|
|
|
let intrinsics: HashMap<&str, (Vec<Datatype>, Vec<Datatype>)> = HashMap::from(
|
|
|
|
[
|
|
|
|
("print", (Vec::from([Datatype::Any]), Vec::new())),
|
|
|
|
("deq", (Vec::from([Datatype::Any]), Vec::new())),
|
|
|
|
]);
|
2022-11-29 02:04:01 +01:00
|
|
|
let args: Vec<String> = env::args().collect();
|
2022-12-05 00:38:20 +01:00
|
|
|
if args.len() < 2
|
|
|
|
{
|
|
|
|
usage()
|
|
|
|
}
|
2022-11-29 02:04:01 +01:00
|
|
|
match args[1].as_str()
|
|
|
|
{
|
|
|
|
"-c" | "--compile" =>
|
|
|
|
{
|
|
|
|
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
|
2022-12-05 00:38:20 +01:00
|
|
|
let mut tokens: Vec<Token> = tokenize(&file_content);
|
2022-11-29 02:04:01 +01:00
|
|
|
println!("{:?}", tokens);
|
2022-12-14 04:12:03 +01:00
|
|
|
let functions: Vec<Function> = extract_functions(&mut tokens, &intrinsics);
|
2022-12-05 00:38:20 +01:00
|
|
|
println!("{:?}", functions);
|
2022-12-14 04:12:03 +01:00
|
|
|
validate(&tokens, &functions, &intrinsics);
|
2022-11-29 02:04:01 +01:00
|
|
|
}
|
2022-12-05 00:38:20 +01:00
|
|
|
_ => panic!("Unknown option {}", args[1])
|
2022-11-29 02:04:01 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-14 04:12:03 +01:00
|
|
|
fn validate(tokens: &Vec<Token>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>)
|
|
|
|
{
|
|
|
|
validate_tokens(tokens, functions, intrinsics);
|
|
|
|
println!("Validating main scope succeeded");
|
|
|
|
for function in functions
|
|
|
|
{
|
|
|
|
validate_tokens(&function.content, functions, intrinsics);
|
|
|
|
println!("Validating function {} succeeded", function.name);
|
|
|
|
}
|
|
|
|
validate_queue_integrity(tokens, &Vec::new(), &Vec::new(), functions, intrinsics);
|
|
|
|
println!("Validating queue integrity for main scope succeeded");
|
|
|
|
for function in functions
|
|
|
|
{
|
|
|
|
validate_queue_integrity(&function.content, &function.ins, &function.outs, functions, intrinsics);
|
|
|
|
println!("Validating queue integrity for function {} succeeded", function.name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const CONTROL_STRUCTURE_NAMES: [&'static str; 3] = ["if", "else", "while"];
|
|
|
|
|
|
|
|
fn validate_tokens(tokens: &Vec<Token>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>)
|
|
|
|
{
|
|
|
|
let mut depth = 0;
|
|
|
|
let mut starting_control_structure = false;
|
|
|
|
for token in tokens
|
|
|
|
{
|
|
|
|
match token
|
|
|
|
{
|
|
|
|
Token::Keyword(name, line, col) =>
|
|
|
|
{
|
|
|
|
match name.as_str()
|
|
|
|
{
|
|
|
|
"{" =>
|
|
|
|
{
|
|
|
|
if !starting_control_structure
|
|
|
|
{
|
|
|
|
panic!("Control structure without '{{' at {}:{}", line, col);
|
|
|
|
}
|
|
|
|
depth += 1;
|
|
|
|
}
|
|
|
|
"}" =>
|
|
|
|
{
|
|
|
|
depth -= 1;
|
|
|
|
if depth < 0
|
|
|
|
{
|
|
|
|
panic!("Encountered '}}' without matching '{{' at {}:{}", line, col);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ =>
|
|
|
|
{
|
|
|
|
if !CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) && !functions.iter().any(|x| &x.name == name) && !intrinsics.contains_key(name.as_str())
|
|
|
|
{
|
|
|
|
panic!("Unknown word {name} at {}:{}", line, col);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
starting_control_structure = CONTROL_STRUCTURE_NAMES.contains(&name.as_str());
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn validate_queue_integrity(tokens: &Vec<Token>, ins: &Vec<Datatype>, outs: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>)
|
|
|
|
{
|
|
|
|
let mut type_queue: Vec<Datatype> = Vec::new();
|
|
|
|
type_queue.extend_from_slice(ins);
|
|
|
|
for token in tokens
|
|
|
|
{
|
|
|
|
match token
|
|
|
|
{
|
|
|
|
Token::IntLit(_, _, _) => type_queue.push(Datatype::Int),
|
|
|
|
Token::StringLit(_, _, _) => type_queue.push(Datatype::String),
|
|
|
|
Token::Keyword(name, line, col) =>
|
|
|
|
{
|
|
|
|
if intrinsics.contains_key(name.as_str())
|
|
|
|
{
|
|
|
|
for req_type in &intrinsics.get(name.as_str()).unwrap().0
|
|
|
|
{
|
|
|
|
let found_type = type_queue.remove(0);
|
|
|
|
if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type
|
|
|
|
{
|
|
|
|
panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for out_type in &intrinsics.get(name.as_str()).unwrap().1
|
|
|
|
{
|
|
|
|
type_queue.push(out_type.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if let Some(function) = functions.iter().find(|x| &x.name == name)
|
|
|
|
{
|
|
|
|
for req_type in &function.ins
|
|
|
|
{
|
|
|
|
let found_type = type_queue.remove(0);
|
|
|
|
if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type
|
|
|
|
{
|
|
|
|
panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for out_type in &function.outs
|
|
|
|
{
|
|
|
|
type_queue.push(out_type.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if CONTROL_STRUCTURE_NAMES.contains(&name.as_str())
|
|
|
|
{
|
|
|
|
match name.as_str()
|
|
|
|
{
|
|
|
|
"if" =>
|
|
|
|
{
|
|
|
|
let found_type = type_queue.remove(0) ;
|
|
|
|
if found_type != Datatype::Any && found_type != Datatype::Int
|
|
|
|
{
|
|
|
|
panic!("Wrong type {:?} for if at {}:{}", found_type, line, col);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => todo!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
panic!("Unrecognized keyword {} at {}:{}", name, line, col);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn extract_functions(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>) -> Vec<Function>
|
2022-12-05 00:38:20 +01:00
|
|
|
{
|
|
|
|
let mut functions: Vec<Function> = Vec::new();
|
|
|
|
let mut state = FunctionExtractionState::Outside;
|
|
|
|
let mut ins: Vec<Datatype> = Vec::new();
|
|
|
|
let mut outs: Vec<Datatype> = Vec::new();
|
|
|
|
let mut function_name = String::from("");
|
|
|
|
let mut content: Vec<Token> = Vec::new();
|
|
|
|
let mut indices_to_remove: Vec<usize> = Vec::new();
|
2022-12-14 04:12:03 +01:00
|
|
|
let mut depth = 0;
|
2022-12-05 00:38:20 +01:00
|
|
|
for (i, token) in tokens.iter().enumerate()
|
|
|
|
{
|
|
|
|
match state
|
|
|
|
{
|
|
|
|
FunctionExtractionState::Outside =>
|
|
|
|
{
|
|
|
|
if let Token::Keyword(name, _, _) = token
|
|
|
|
{
|
|
|
|
if name == &String::from("function")
|
|
|
|
{
|
|
|
|
state = FunctionExtractionState::Ins;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
FunctionExtractionState::Ins =>
|
|
|
|
{
|
|
|
|
match token
|
|
|
|
{
|
|
|
|
Token::Keyword(name, line, col) =>
|
|
|
|
{
|
|
|
|
match name.as_str()
|
|
|
|
{
|
|
|
|
"int" => ins.push(Datatype::Int),
|
|
|
|
"str" => ins.push(Datatype::String),
|
|
|
|
"ptr" => ins.push(Datatype::Pointer),
|
|
|
|
"any" => ins.push(Datatype::Any),
|
|
|
|
"=>" => state = FunctionExtractionState::Outs,
|
|
|
|
_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
FunctionExtractionState::Outs =>
|
|
|
|
{
|
|
|
|
match token
|
|
|
|
{
|
|
|
|
Token::Keyword(name, _, _) =>
|
|
|
|
{
|
|
|
|
match name.as_str()
|
|
|
|
{
|
|
|
|
"int" => outs.push(Datatype::Int),
|
|
|
|
"str" => outs.push(Datatype::String),
|
|
|
|
"ptr" => outs.push(Datatype::Pointer),
|
|
|
|
"any" => outs.push(Datatype::Any),
|
|
|
|
_ =>
|
|
|
|
{
|
|
|
|
if let Token::Keyword(name, _, _) = token
|
|
|
|
{
|
|
|
|
if functions.iter().any(|x| &x.name == name)
|
|
|
|
{
|
|
|
|
panic!("A function with name {} already exists", name);
|
|
|
|
}
|
2022-12-14 04:12:03 +01:00
|
|
|
if intrinsics.contains_key(name.as_str())
|
|
|
|
{
|
|
|
|
panic!("Function names cannot have the name of intrinsics: {}", name);
|
|
|
|
}
|
2022-12-05 00:38:20 +01:00
|
|
|
function_name = name.clone();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
panic!("Expected a function name") // TODO: Add location
|
|
|
|
}
|
|
|
|
state =FunctionExtractionState::OpenCurly;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
FunctionExtractionState::OpenCurly =>
|
|
|
|
{
|
|
|
|
if let Token::Keyword(name, line, col) = token
|
|
|
|
{
|
|
|
|
if name == "{"
|
|
|
|
{
|
2022-12-14 04:12:03 +01:00
|
|
|
depth += 1;
|
2022-12-05 00:38:20 +01:00
|
|
|
state = FunctionExtractionState::Body
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
panic!("Expected '{{' to open the function's body at {}:{}", line, col)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
panic!("Expected '{{' to open the function's body") // TODO: Add location
|
|
|
|
}
|
|
|
|
}
|
|
|
|
FunctionExtractionState::Body =>
|
|
|
|
{
|
|
|
|
if let Token::Keyword(name, _, _) = token
|
|
|
|
{
|
2022-12-14 04:12:03 +01:00
|
|
|
match name.as_str()
|
2022-12-05 00:38:20 +01:00
|
|
|
{
|
2022-12-14 04:12:03 +01:00
|
|
|
"{" =>
|
|
|
|
{
|
|
|
|
depth += 1;
|
|
|
|
}
|
|
|
|
"}" =>
|
|
|
|
{
|
|
|
|
depth -= 1;
|
|
|
|
if depth == 0
|
|
|
|
{
|
|
|
|
state = FunctionExtractionState::Outside;
|
|
|
|
functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
|
|
|
|
function_name.clear();
|
|
|
|
ins.clear();
|
|
|
|
outs.clear();
|
|
|
|
content.clear();
|
|
|
|
indices_to_remove.push(i);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => {}
|
2022-12-05 00:38:20 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
content.push(token.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if state != FunctionExtractionState::Outside
|
|
|
|
{
|
|
|
|
indices_to_remove.push(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
indices_to_remove.reverse();
|
|
|
|
for i in indices_to_remove
|
|
|
|
{
|
|
|
|
tokens.remove(i);
|
|
|
|
}
|
|
|
|
return functions;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
|
|
enum FunctionExtractionState
|
|
|
|
{
|
|
|
|
Outside,
|
|
|
|
Ins,
|
|
|
|
Outs,
|
|
|
|
OpenCurly,
|
|
|
|
Body,
|
|
|
|
}
|
|
|
|
|
|
|
|
fn usage()
|
|
|
|
{
|
|
|
|
println!("Usage: kurz -c path/to/file");
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2022-11-29 02:04:01 +01:00
|
|
|
fn tokenize(text: &str) -> Vec<Token>
|
|
|
|
{
|
|
|
|
let mut tokens: Vec<Token> = Vec::new();
|
|
|
|
let mut line = 1;
|
|
|
|
let mut col = 1;
|
|
|
|
let mut state = TokenizerState::Whitespace;
|
|
|
|
let mut word = String::new();
|
2022-12-05 00:38:20 +01:00
|
|
|
let mut iter = text.chars().peekable();
|
|
|
|
while let Some(ch) = iter.next()
|
2022-11-29 02:04:01 +01:00
|
|
|
{
|
2022-12-05 00:38:20 +01:00
|
|
|
if ch == '/' && iter.peek() == Some(&'/')
|
|
|
|
{
|
|
|
|
state = TokenizerState::Comment;
|
|
|
|
}
|
2022-11-29 02:04:01 +01:00
|
|
|
match state
|
|
|
|
{
|
2022-12-05 00:38:20 +01:00
|
|
|
TokenizerState::Comment =>
|
|
|
|
{
|
|
|
|
if ch == '\n'
|
|
|
|
{
|
|
|
|
state = TokenizerState::Whitespace;
|
|
|
|
}
|
|
|
|
}
|
2022-11-29 02:04:01 +01:00
|
|
|
TokenizerState::Whitespace =>
|
|
|
|
{
|
|
|
|
// If ch is whitespace, do nothing
|
|
|
|
if !ch.is_whitespace()
|
|
|
|
{
|
|
|
|
match ch
|
|
|
|
{
|
|
|
|
'"' =>
|
|
|
|
{
|
|
|
|
state = TokenizerState::Quote;
|
|
|
|
}
|
|
|
|
_ =>
|
|
|
|
{
|
2022-12-05 00:38:20 +01:00
|
|
|
state = TokenizerState::Keyword;
|
2022-11-29 02:04:01 +01:00
|
|
|
word.push(ch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
TokenizerState::Quote =>
|
|
|
|
{
|
|
|
|
if ch == '"'
|
|
|
|
{
|
|
|
|
state = TokenizerState::Whitespace;
|
|
|
|
tokens.push(Token::StringLit(word.clone(), line, col));
|
|
|
|
word.clear();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
word.push(ch);
|
|
|
|
}
|
|
|
|
}
|
2022-12-05 00:38:20 +01:00
|
|
|
TokenizerState::Keyword =>
|
2022-11-29 02:04:01 +01:00
|
|
|
{
|
|
|
|
if ch.is_whitespace()
|
|
|
|
{
|
|
|
|
state = TokenizerState::Whitespace;
|
2022-12-05 00:38:20 +01:00
|
|
|
if let Ok(number) = word.parse::<i64>()
|
|
|
|
{
|
|
|
|
tokens.push(Token::IntLit(number, line, col));
|
|
|
|
}
|
|
|
|
else
|
2022-11-29 02:04:01 +01:00
|
|
|
{
|
2022-12-05 00:38:20 +01:00
|
|
|
tokens.push(Token::Keyword(word.clone(), line, col));
|
|
|
|
}
|
|
|
|
word.clear();
|
2022-11-29 02:04:01 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
match ch
|
|
|
|
{
|
|
|
|
'"' => panic!("Having '\"' in the middle of a word is not allowed"),
|
|
|
|
_ =>
|
|
|
|
{
|
|
|
|
word.push(ch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
col += 1;
|
|
|
|
if ch == '\n'
|
|
|
|
{
|
|
|
|
col = 1;
|
|
|
|
line += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
match state
|
|
|
|
{
|
|
|
|
TokenizerState::Quote =>
|
|
|
|
{
|
|
|
|
panic!("Encountered EOF before closing string");
|
|
|
|
}
|
2022-12-05 00:38:20 +01:00
|
|
|
TokenizerState::Whitespace | TokenizerState::Comment => {},
|
|
|
|
TokenizerState::Keyword =>
|
2022-11-29 02:04:01 +01:00
|
|
|
{
|
2022-12-05 00:38:20 +01:00
|
|
|
tokens.push(Token::Keyword(word.clone(), line, col));
|
2022-11-29 02:04:01 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
tokens
|
|
|
|
}
|