Implemented function validation and type checking

This commit is contained in:
0x4261756D 2022-12-14 07:57:51 +01:00
parent ac6a835a64
commit d9e7f18049
2 changed files with 420 additions and 224 deletions

View File

@ -2,13 +2,14 @@ use core::panic;
use std::collections::HashMap; use std::collections::HashMap;
use std::env; use std::env;
use std::fs; use std::fs;
use std::iter::Peekable;
use std::process::exit; use std::process::exit;
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
enum Token enum Token
{ {
StringLit(String, i32, i32), StringLit(String, i32, i32),
IntLit(i64, i32, i32), IntLit(String, i32, i32),
Keyword(String, i32, i32), Keyword(String, i32, i32),
} }
@ -20,22 +21,43 @@ enum TokenizerState
Comment, Comment,
} }
#[derive(Debug,Clone,Copy,PartialEq)] #[derive(Debug,Clone,Copy)]
enum Datatype enum Datatype
{ {
Int, Int,
String, String,
Pointer, //Pointer,
Any, Any,
} }
impl PartialEq for Datatype
{
fn eq(&self, other: &Self) -> bool
{
core::mem::discriminant(self) == core::mem::discriminant(&Datatype::Any) ||
core::mem::discriminant(other) == core::mem::discriminant(&Datatype::Any) ||
core::mem::discriminant(self) == core::mem::discriminant(other)
}
}
#[derive(Debug)] #[derive(Debug)]
struct Function struct Function
{ {
name: String, name: String,
ins: Vec<Datatype>, ins: Vec<Datatype>,
outs: Vec<Datatype>, outs: Vec<Datatype>,
content: Vec<Token> content: Vec<Operation>,
}
#[derive(Debug)]
enum Operation
{
Enqueue(Datatype, String, i32, i32),
Dequeue(i32, i32),
Intrinsic(String, i32, i32),
FunctionCall(String, i32, i32),
If(Vec<Operation>, Option<Vec<Operation>>, i32, i32),
While(Vec<Operation>, i32, i32),
} }
fn main() fn main()
@ -43,309 +65,472 @@ fn main()
let intrinsics: HashMap<&str, (Vec<Datatype>, Vec<Datatype>)> = HashMap::from( let intrinsics: HashMap<&str, (Vec<Datatype>, Vec<Datatype>)> = HashMap::from(
[ [
("print", (Vec::from([Datatype::Any]), Vec::new())), ("print", (Vec::from([Datatype::Any]), Vec::new())),
("deq", (Vec::from([Datatype::Any]), Vec::new())), ("-", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))),
("req", (Vec::from([Datatype::Any]), Vec::from([Datatype::Any]))),
]); ]);
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if args.len() < 2 if args.len() < 2
{ {
usage() usage()
} }
let mut debug = false;
for arg in &args[3..]
{
match arg.as_str()
{
"-d" | "--debug" => debug = true,
_ => panic!("Unknown option {}", arg),
}
}
match args[1].as_str() match args[1].as_str()
{ {
"-c" | "--compile" => "-c" | "--compile" =>
{ {
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
let mut tokens: Vec<Token> = tokenize(&file_content); let mut tokens: Vec<Token> = tokenize(&file_content);
println!("{:?}", tokens); println!("---Done tokenizing, got {} tokens---", tokens.len());
let functions: Vec<Function> = extract_functions(&mut tokens, &intrinsics); let functions: Vec<Function> = extract_functions(&mut tokens, &intrinsics, debug);
println!("{:?}", functions); println!("---Done extracting functions, got {} functions and reduced the token count to {}---", functions.len(), tokens.len());
validate(&tokens, &functions, &intrinsics); let operations = parse_until_delimiter(&mut tokens.iter().peekable(), &intrinsics, None, debug);
println!("---Done parsing tokens into {} operations---", operations.len());
validate_function_calls(&operations, &functions, debug);
println!("---Done validating function calls---");
typecheck(&operations, &functions, &intrinsics, debug);
println!("---Done typechecking---");
} }
_ => panic!("Unknown option {}", args[1]) _ => panic!("Unknown option {}", args[1])
} }
} }
fn validate(tokens: &Vec<Token>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>) fn typecheck(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool)
{ {
validate_tokens(tokens, functions, intrinsics);
println!("Validating main scope succeeded");
for function in functions for function in functions
{ {
validate_tokens(&function.content, functions, intrinsics); if debug
println!("Validating function {} succeeded", function.name); {
println!("Now typechecking function '{}'", function.name);
} }
validate_queue_integrity(tokens, &Vec::new(), &Vec::new(), functions, intrinsics); typecheck_block(&function.content, &function.ins, &function.outs, functions, intrinsics, debug);
println!("Validating queue integrity for main scope succeeded"); if debug
for function in functions
{ {
validate_queue_integrity(&function.content, &function.ins, &function.outs, functions, intrinsics); println!("Successfully typechecked function '{}'", function.name);
println!("Validating queue integrity for function {} succeeded", function.name); }
}
if debug
{
println!("Now typechecking main operations");
}
typecheck_block(operations, &Vec::new(), &Vec::new(), functions, intrinsics, debug);
if debug
{
println!("Successfully typechecked main operations");
} }
} }
const CONTROL_STRUCTURE_NAMES: [&'static str; 3] = ["if", "else", "while"]; fn typecheck_block(operations: &Vec<Operation>, ins: &Vec<Datatype>, outs: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool)
fn validate_tokens(tokens: &Vec<Token>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>)
{ {
let mut depth = 0; let actual_outs = get_return_type(operations, ins, functions, intrinsics, debug);
let mut starting_control_structure = false; if &actual_outs != outs
for token in tokens
{ {
match token let (line, col) = match operations.last()
{ {
Token::Keyword(name, line, col) => Some(operation) =>
{ {
match name.as_str() match operation
{ {
"{" => Operation::Enqueue(_, _, line, col) |
{ Operation::FunctionCall(_, line, col) |
if !starting_control_structure Operation::If(_, _, line, col) |
{ Operation::Intrinsic(_, line, col) |
panic!("Control structure without '{{' at {}:{}", line, col); Operation::While(_, line, col) |
} Operation::Dequeue(line, col) => (*line, *col),
depth += 1;
}
"}" =>
{
depth -= 1;
if depth < 0
{
panic!("Encountered '}}' without matching '{{' at {}:{}", line, col);
} }
} }
_ => None => (-1, -1)
{ };
if !CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) && !functions.iter().any(|x| &x.name == name) && !intrinsics.contains_key(name.as_str()) panic!("Wrong queue state at the end of a block, expected {:?} but got {:?} at {}:{}", outs, actual_outs, line, col);
{
panic!("Unknown word {name} at {}:{}", line, col);
}
}
}
starting_control_structure = CONTROL_STRUCTURE_NAMES.contains(&name.as_str());
}
_ => {}
}
} }
} }
fn validate_queue_integrity(tokens: &Vec<Token>, ins: &Vec<Datatype>, outs: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>) fn get_return_type(operations: &Vec<Operation>, ins: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Vec<Datatype>
{ {
let mut type_queue: Vec<Datatype> = Vec::new(); let type_queue: &mut Vec<Datatype> = &mut Vec::new();
type_queue.extend_from_slice(ins); type_queue.extend_from_slice(ins);
for token in tokens let mut debug_string = String::from("");
for operation in operations
{ {
match token if debug
{ {
Token::IntLit(_, _, _) => type_queue.push(Datatype::Int), debug_string = format!("operation: {:?}: {:?}", operation, type_queue);
Token::StringLit(_, _, _) => type_queue.push(Datatype::String), }
Token::Keyword(name, line, col) => match operation
{ {
if intrinsics.contains_key(name.as_str()) Operation::Dequeue(line, col) =>
{ {
for req_type in &intrinsics.get(name.as_str()).unwrap().0 if type_queue.is_empty()
{ {
let found_type = type_queue.remove(0); panic!("Attempted to dequeue an element while the queue was empty at {}:{}", line, col);
if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type }
type_queue.remove(0);
}
Operation::Enqueue(datatype, _, _, _) =>
{ {
panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col); type_queue.push(*datatype);
}
Operation::FunctionCall(function_name, line, col) =>
{
let function = functions.iter().find(|x| &x.name == function_name).unwrap();
if function.ins.len() > type_queue.len()
{
panic!("Attempted to call function '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", function.name, line, col, function.ins, type_queue);
}
for in_type in &function.ins
{
let actual_type = type_queue.remove(0);
if in_type != &actual_type
{
panic!("Attempted to call function '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", function.name, line, col, in_type, actual_type);
} }
} }
for out_type in &intrinsics.get(name.as_str()).unwrap().1 type_queue.extend_from_slice(&function.outs);
{
type_queue.push(out_type.clone());
} }
} Operation::If(if_block, maybe_else_block, line, col) =>
else if let Some(function) = functions.iter().find(|x| &x.name == name)
{ {
for req_type in &function.ins if type_queue.is_empty()
{ {
let found_type = type_queue.remove(0); panic!("Encountered if block with an empty queue at {}:{}", line, col);
if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type }
let comparison_type = type_queue.remove(0);
if comparison_type != Datatype::Int
{ {
panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col); panic!("Expected an int as an if condition but got {:?} instead at {}:{}", comparison_type, line, col);
} }
} if debug
for out_type in &function.outs
{ {
type_queue.push(out_type.clone()); println!("Starting to typecheck if block");
} }
} let if_ret = get_return_type(if_block, &type_queue, functions, intrinsics, debug);
else if CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) let else_ret =
if let Some(else_block) = maybe_else_block
{ {
match name.as_str() if debug
{ {
"if" => println!("Starting to typecheck else block");
{
let found_type = type_queue.remove(0) ;
if found_type != Datatype::Any && found_type != Datatype::Int
{
panic!("Wrong type {:?} for if at {}:{}", found_type, line, col);
}
}
_ => todo!()
} }
get_return_type(else_block, &type_queue, functions, intrinsics, debug)
} }
else else
{ {
panic!("Unrecognized keyword {} at {}:{}", name, line, col); type_queue.clone()
};
if if_ret != else_ret
{
panic!("Incompatible queue states after if/else construction, expected {:?} but got {:?}", if_ret, else_ret);
} }
type_queue.clear();
type_queue.extend_from_slice(&if_ret);
}
Operation::Intrinsic(intrinsic_name, line, col) =>
{
let io = intrinsics.get(intrinsic_name.as_str()).unwrap();
if io.0.len() > type_queue.len()
{
panic!("Attempted to call intrinsic '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", intrinsic_name, line, col, io.0, type_queue);
}
for in_type in &io.0
{
let actual_type = type_queue.remove(0);
if in_type != &actual_type
{
panic!("Attempted to call intrinsic '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", intrinsic_name, line, col, in_type, actual_type);
}
}
type_queue.extend_from_slice(&io.1);
}
Operation::While(while_block, line, col) =>
{
if type_queue.is_empty()
{
panic!("Encountered while block with an empty queue at {}:{}", line, col);
}
let &comparison_type = type_queue.get(0).unwrap();
if comparison_type != Datatype::Int
{
panic!("Expected an int as a while condition but got {:?} instead at {}:{}", comparison_type, line, col);
}
if debug
{
println!("Starting to typecheck while block");
}
typecheck_block(while_block, type_queue, type_queue, functions, intrinsics, debug);
}
}
if debug
{
println!("{} => {:?}", debug_string, type_queue);
}
}
return type_queue.clone();
}
fn validate_function_calls(operations: &Vec<Operation>, functions: &Vec<Function>, debug: bool)
{
for function in functions
{
validate_function_calls_in_block(&function.content, functions, debug);
if debug
{
println!("Successfully validated function calls in function '{}'", function.name);
}
}
validate_function_calls_in_block(operations, functions, debug);
if debug
{
println!("Successfully validated function calls in main operations");
}
}
fn validate_function_calls_in_block(block: &Vec<Operation>, functions: &Vec<Function>, debug: bool)
{
for operation in block
{
match operation
{
Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) => {},
Operation::FunctionCall(function_name, line, col) =>
{
if !functions.iter().any(|x| &x.name == function_name)
{
panic!("Call to unknown function {} at {}:{}", function_name, line, col);
}
}
Operation::If(if_block, maybe_else_block, _, _) =>
{
validate_function_calls_in_block(if_block, functions, debug);
if let Some(else_block) = maybe_else_block
{
validate_function_calls_in_block(else_block, functions, debug);
}
}
Operation::While(while_block, _, _) =>
{
validate_function_calls_in_block(while_block, functions, debug);
} }
} }
} }
} }
fn extract_functions(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>) -> Vec<Function> fn extract_functions(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Vec<Function>
{ {
let mut tokens_iter = tokens.iter().peekable();
let mut functions: Vec<Function> = Vec::new(); let mut functions: Vec<Function> = Vec::new();
let mut state = FunctionExtractionState::Outside; let mut new_tokens: Vec<Token> = Vec::new();
while let Some(token) = tokens_iter.next()
{
if let Token::Keyword(word, line, col) = token
{
if word == "function"
{
if debug
{
print!("Found a function at {}:{}", line, col);
}
let mut ins: Vec<Datatype> = Vec::new(); let mut ins: Vec<Datatype> = Vec::new();
let mut outs: Vec<Datatype> = Vec::new(); loop
let mut function_name = String::from("");
let mut content: Vec<Token> = Vec::new();
let mut indices_to_remove: Vec<usize> = Vec::new();
let mut depth = 0;
for (i, token) in tokens.iter().enumerate()
{ {
match state let maybe_token = tokens_iter.next();
match maybe_token
{ {
FunctionExtractionState::Outside => Some(token) =>
{
if let Token::Keyword(name, _, _) = token
{
if name == &String::from("function")
{
state = FunctionExtractionState::Ins;
}
}
}
FunctionExtractionState::Ins =>
{ {
match token match token
{ {
Token::Keyword(name, line, col) => Token::IntLit(_, line, col) | Token::StringLit(_, line, col) =>
{ {
match name.as_str() panic!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col);
}
Token::Keyword(word, line, col) =>
{
if word == "=>"
{
break;
}
match word.as_str()
{ {
"int" => ins.push(Datatype::Int),
"str" => ins.push(Datatype::String),
"ptr" => ins.push(Datatype::Pointer),
"any" => ins.push(Datatype::Any), "any" => ins.push(Datatype::Any),
"=>" => state = FunctionExtractionState::Outs, "str" => ins.push(Datatype::String),
_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col) "int" => ins.push(Datatype::Int),
} _ => panic!("Expected input parameters for a function but got {} instead at {}:{}", word, line, col)
},
Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
} }
} }
FunctionExtractionState::Outs => }
}
None => panic!("Unexpected end of file while extracting a function")
}
}
if debug
{
println!("ins: {:?}", ins);
}
let mut outs: Vec<Datatype> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
{ {
match token match token
{ {
Token::Keyword(name, _, _) => Token::IntLit(_, line, col) | Token::StringLit(_, line, col) =>
{ {
match name.as_str() panic!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col);
}
Token::Keyword(word, line, col) =>
{
match word.as_str()
{ {
"int" => outs.push(Datatype::Int),
"str" => outs.push(Datatype::String),
"ptr" => outs.push(Datatype::Pointer),
"any" => outs.push(Datatype::Any), "any" => outs.push(Datatype::Any),
"str" => outs.push(Datatype::String),
"int" => outs.push(Datatype::Int),
"{" | "}" => panic!("Expected function name but got {} at {}:{}", word, line, col),
_ => _ =>
{ {
if let Token::Keyword(name, _, _) = token if functions.iter().any(|x| &x.name == word)
{ {
if functions.iter().any(|x| &x.name == name) panic!("Redeclaration of function '{}' at {}:{}", word, line, col);
}
if debug
{ {
panic!("A function with name {} already exists", name); println!("outs: {:?}", outs);
} }
if intrinsics.contains_key(name.as_str()) let block = parse_block(&mut tokens_iter, intrinsics, debug);
{ functions.push(Function {name: word.clone(), ins, outs, content: block});
panic!("Function names cannot have the name of intrinsics: {}", name); break;
}
function_name = name.clone();
}
else
{
panic!("Expected a function name") // TODO: Add location
}
state =FunctionExtractionState::OpenCurly;
} }
} }
},
Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
} }
} }
FunctionExtractionState::OpenCurly =>
{
if let Token::Keyword(name, line, col) = token
{
if name == "{"
{
depth += 1;
state = FunctionExtractionState::Body
} }
else None => panic!("Unexpected end of file while extracting a function")
{ }
panic!("Expected '{{' to open the function's body at {}:{}", line, col)
} }
} }
else else
{ {
panic!("Expected '{{' to open the function's body") // TODO: Add location new_tokens.push(token.clone());
} }
} }
FunctionExtractionState::Body => else
{ {
if let Token::Keyword(name, _, _) = token new_tokens.push(token.clone());
{
match name.as_str()
{
"{" =>
{
depth += 1;
}
"}" =>
{
depth -= 1;
if depth == 0
{
state = FunctionExtractionState::Outside;
functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
function_name.clear();
ins.clear();
outs.clear();
content.clear();
indices_to_remove.push(i);
continue;
} }
} }
_ => {} tokens.clear();
} tokens.extend_from_slice(&new_tokens);
}
content.push(token.clone());
}
}
if state != FunctionExtractionState::Outside
{
indices_to_remove.push(i);
}
}
indices_to_remove.reverse();
for i in indices_to_remove
{
tokens.remove(i);
}
return functions; return functions;
} }
#[derive(Debug, PartialEq)] fn parse_block(tokens_iter: &mut Peekable<std::slice::Iter<Token>>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Vec<Operation>
enum FunctionExtractionState
{ {
Outside, if let Some(Token::Keyword(word, line, col)) = tokens_iter.next()
Ins, {
Outs, if word != "{"
OpenCurly, {
Body, panic!("Expected '{{' to open a block but got {} at {}:{}", word, line, col);
}
}
else
{
panic!("Expected '{{' to open a block");
}
return parse_until_delimiter(tokens_iter, intrinsics, Some("}"), debug);
}
fn parse_until_delimiter(tokens_iter: &mut Peekable<std::slice::Iter<Token>>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, delimiter: Option<&str>, debug: bool) -> Vec<Operation>
{
let mut operations: Vec<Operation> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
{
match token
{
Token::IntLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::Int, value.clone(), *line, *col));
}
Token::StringLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::String, value.clone(), *line, *col));
}
Token::Keyword(word, line, col) =>
{
if intrinsics.contains_key(word.as_str())
{
operations.push(Operation::Intrinsic(word.clone(), *line, *col));
}
else if word == "if"
{
let block = parse_block(tokens_iter, intrinsics, debug);
let else_block =
if let Some(Token::Keyword(maybe_else, _, _)) = tokens_iter.peek()
{
if maybe_else == "else"
{
tokens_iter.next();
Some(parse_block(tokens_iter, intrinsics, debug))
}
else
{
None
}
}
else
{
None
};
operations.push(Operation::If(block, else_block, *line, *col));
}
else if word == "while"
{
operations.push(Operation::While(parse_block(tokens_iter, intrinsics, debug), *line, *col));
}
else if word == "deq"
{
operations.push(Operation::Dequeue(*line, *col));
}
else if Some(word.as_str()) == delimiter
{
return operations;
}
else if word == "{" || word == "function"
{
panic!("Unexpected keyword {} at {}:{}", word, line, col);
}
else
{
operations.push(Operation::FunctionCall(word.clone(), *line, *col));
}
}
}
}
None =>
{
if delimiter.is_some()
{
panic!("Reached the end of the file while parsing a block")
}
else
{
return operations;
}
}
}
}
} }
fn usage() fn usage()
@ -414,9 +599,9 @@ fn tokenize(text: &str) -> Vec<Token>
if ch.is_whitespace() if ch.is_whitespace()
{ {
state = TokenizerState::Whitespace; state = TokenizerState::Whitespace;
if let Ok(number) = word.parse::<i64>() if let Ok(_) = word.parse::<i64>()
{ {
tokens.push(Token::IntLit(number, line, col)); tokens.push(Token::IntLit(word.clone(), line, col));
} }
else else
{ {

View File

@ -23,3 +23,14 @@ function int => str check
"False" "False"
} }
} }
function int => whileFunction
{
while
{
1 - "test" req print
}
deq
}
42 whileFunction