Began validation

This commit is contained in:
0x4261756D 2022-12-14 04:12:03 +01:00
parent 1e8322cc7c
commit ac6a835a64
2 changed files with 183 additions and 14 deletions

View File

@ -1,4 +1,5 @@
use core::panic; use core::panic;
use std::collections::HashMap;
use std::env; use std::env;
use std::fs; use std::fs;
use std::process::exit; use std::process::exit;
@ -10,6 +11,7 @@ enum Token
IntLit(i64, i32, i32), IntLit(i64, i32, i32),
Keyword(String, i32, i32), Keyword(String, i32, i32),
} }
enum TokenizerState enum TokenizerState
{ {
Whitespace, Whitespace,
@ -18,7 +20,7 @@ enum TokenizerState
Comment, Comment,
} }
#[derive(Debug,Clone,Copy)] #[derive(Debug,Clone,Copy,PartialEq)]
enum Datatype enum Datatype
{ {
Int, Int,
@ -38,6 +40,11 @@ struct Function
fn main() fn main()
{ {
let intrinsics: HashMap<&str, (Vec<Datatype>, Vec<Datatype>)> = HashMap::from(
[
("print", (Vec::from([Datatype::Any]), Vec::new())),
("deq", (Vec::from([Datatype::Any]), Vec::new())),
]);
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if args.len() < 2 if args.len() < 2
{ {
@ -50,15 +57,144 @@ fn main()
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
let mut tokens: Vec<Token> = tokenize(&file_content); let mut tokens: Vec<Token> = tokenize(&file_content);
println!("{:?}", tokens); println!("{:?}", tokens);
let functions: Vec<Function> = extract_functions(&mut tokens); let functions: Vec<Function> = extract_functions(&mut tokens, &intrinsics);
println!("{:?}", tokens);
println!("{:?}", functions); println!("{:?}", functions);
validate(&tokens, &functions, &intrinsics);
} }
_ => panic!("Unknown option {}", args[1]) _ => panic!("Unknown option {}", args[1])
} }
} }
fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function> fn validate(tokens: &Vec<Token>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>)
{
validate_tokens(tokens, functions, intrinsics);
println!("Validating main scope succeeded");
for function in functions
{
validate_tokens(&function.content, functions, intrinsics);
println!("Validating function {} succeeded", function.name);
}
validate_queue_integrity(tokens, &Vec::new(), &Vec::new(), functions, intrinsics);
println!("Validating queue integrity for main scope succeeded");
for function in functions
{
validate_queue_integrity(&function.content, &function.ins, &function.outs, functions, intrinsics);
println!("Validating queue integrity for function {} succeeded", function.name);
}
}
const CONTROL_STRUCTURE_NAMES: [&'static str; 3] = ["if", "else", "while"];
fn validate_tokens(tokens: &Vec<Token>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>)
{
let mut depth = 0;
let mut starting_control_structure = false;
for token in tokens
{
match token
{
Token::Keyword(name, line, col) =>
{
match name.as_str()
{
"{" =>
{
if !starting_control_structure
{
panic!("Control structure without '{{' at {}:{}", line, col);
}
depth += 1;
}
"}" =>
{
depth -= 1;
if depth < 0
{
panic!("Encountered '}}' without matching '{{' at {}:{}", line, col);
}
}
_ =>
{
if !CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) && !functions.iter().any(|x| &x.name == name) && !intrinsics.contains_key(name.as_str())
{
panic!("Unknown word {name} at {}:{}", line, col);
}
}
}
starting_control_structure = CONTROL_STRUCTURE_NAMES.contains(&name.as_str());
}
_ => {}
}
}
}
fn validate_queue_integrity(tokens: &Vec<Token>, ins: &Vec<Datatype>, outs: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>)
{
let mut type_queue: Vec<Datatype> = Vec::new();
type_queue.extend_from_slice(ins);
for token in tokens
{
match token
{
Token::IntLit(_, _, _) => type_queue.push(Datatype::Int),
Token::StringLit(_, _, _) => type_queue.push(Datatype::String),
Token::Keyword(name, line, col) =>
{
if intrinsics.contains_key(name.as_str())
{
for req_type in &intrinsics.get(name.as_str()).unwrap().0
{
let found_type = type_queue.remove(0);
if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type
{
panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col);
}
}
for out_type in &intrinsics.get(name.as_str()).unwrap().1
{
type_queue.push(out_type.clone());
}
}
else if let Some(function) = functions.iter().find(|x| &x.name == name)
{
for req_type in &function.ins
{
let found_type = type_queue.remove(0);
if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type
{
panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col);
}
}
for out_type in &function.outs
{
type_queue.push(out_type.clone());
}
}
else if CONTROL_STRUCTURE_NAMES.contains(&name.as_str())
{
match name.as_str()
{
"if" =>
{
let found_type = type_queue.remove(0) ;
if found_type != Datatype::Any && found_type != Datatype::Int
{
panic!("Wrong type {:?} for if at {}:{}", found_type, line, col);
}
}
_ => todo!()
}
}
else
{
panic!("Unrecognized keyword {} at {}:{}", name, line, col);
}
}
}
}
}
fn extract_functions(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>) -> Vec<Function>
{ {
let mut functions: Vec<Function> = Vec::new(); let mut functions: Vec<Function> = Vec::new();
let mut state = FunctionExtractionState::Outside; let mut state = FunctionExtractionState::Outside;
@ -67,6 +203,7 @@ fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
let mut function_name = String::from(""); let mut function_name = String::from("");
let mut content: Vec<Token> = Vec::new(); let mut content: Vec<Token> = Vec::new();
let mut indices_to_remove: Vec<usize> = Vec::new(); let mut indices_to_remove: Vec<usize> = Vec::new();
let mut depth = 0;
for (i, token) in tokens.iter().enumerate() for (i, token) in tokens.iter().enumerate()
{ {
match state match state
@ -120,6 +257,10 @@ fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
{ {
panic!("A function with name {} already exists", name); panic!("A function with name {} already exists", name);
} }
if intrinsics.contains_key(name.as_str())
{
panic!("Function names cannot have the name of intrinsics: {}", name);
}
function_name = name.clone(); function_name = name.clone();
} }
else else
@ -139,6 +280,7 @@ fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
{ {
if name == "{" if name == "{"
{ {
depth += 1;
state = FunctionExtractionState::Body state = FunctionExtractionState::Body
} }
else else
@ -155,16 +297,28 @@ fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
{ {
if let Token::Keyword(name, _, _) = token if let Token::Keyword(name, _, _) = token
{ {
if name == "}" match name.as_str()
{ {
state = FunctionExtractionState::Outside; "{" =>
functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()}); {
function_name.clear(); depth += 1;
ins.clear(); }
outs.clear(); "}" =>
content.clear(); {
indices_to_remove.push(i); depth -= 1;
continue; if depth == 0
{
state = FunctionExtractionState::Outside;
functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
function_name.clear();
ins.clear();
outs.clear();
content.clear();
indices_to_remove.push(i);
continue;
}
}
_ => {}
} }
} }
content.push(token.clone()); content.push(token.clone());

View File

@ -7,4 +7,19 @@ function any => int foo
deq 42 17 print deq 42 17 print
} }
"test2" print "test2" print 1
check
print
function int => str check
{
if
{
"True"
}
else
{
"False"
}
}