From ac6a835a64a8d67315dad5ad8eeeea25669782c1 Mon Sep 17 00:00:00 2001 From: 0x4261756D <–38735823+0x4261756D@users.noreply.github.com> Date: Wed, 14 Dec 2022 04:12:03 +0100 Subject: [PATCH] Began validation --- src/main.rs | 180 ++++++++++++++++++++++++++++++++++++++++++++++++---- test.qbl | 17 ++++- 2 files changed, 183 insertions(+), 14 deletions(-) diff --git a/src/main.rs b/src/main.rs index 191b54b..2c1ae17 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use core::panic; +use std::collections::HashMap; use std::env; use std::fs; use std::process::exit; @@ -10,6 +11,7 @@ enum Token IntLit(i64, i32, i32), Keyword(String, i32, i32), } + enum TokenizerState { Whitespace, @@ -18,7 +20,7 @@ enum TokenizerState Comment, } -#[derive(Debug,Clone,Copy)] +#[derive(Debug,Clone,Copy,PartialEq)] enum Datatype { Int, @@ -38,6 +40,11 @@ struct Function fn main() { + let intrinsics: HashMap<&str, (Vec, Vec)> = HashMap::from( + [ + ("print", (Vec::from([Datatype::Any]), Vec::new())), + ("deq", (Vec::from([Datatype::Any]), Vec::new())), + ]); let args: Vec = env::args().collect(); if args.len() < 2 { @@ -50,15 +57,144 @@ fn main() let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); let mut tokens: Vec = tokenize(&file_content); println!("{:?}", tokens); - let functions: Vec = extract_functions(&mut tokens); - println!("{:?}", tokens); + let functions: Vec = extract_functions(&mut tokens, &intrinsics); println!("{:?}", functions); + validate(&tokens, &functions, &intrinsics); } _ => panic!("Unknown option {}", args[1]) } } -fn extract_functions(tokens: &mut Vec) -> Vec +fn validate(tokens: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) +{ + validate_tokens(tokens, functions, intrinsics); + println!("Validating main scope succeeded"); + for function in functions + { + validate_tokens(&function.content, functions, intrinsics); + println!("Validating function {} succeeded", function.name); + } + validate_queue_integrity(tokens, &Vec::new(), &Vec::new(), functions, intrinsics); + println!("Validating queue integrity for main scope succeeded"); + for function in functions + { + validate_queue_integrity(&function.content, &function.ins, &function.outs, functions, intrinsics); + println!("Validating queue integrity for function {} succeeded", function.name); + } +} + +const CONTROL_STRUCTURE_NAMES: [&'static str; 3] = ["if", "else", "while"]; + +fn validate_tokens(tokens: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) +{ + let mut depth = 0; + let mut starting_control_structure = false; + for token in tokens + { + match token + { + Token::Keyword(name, line, col) => + { + match name.as_str() + { + "{" => + { + if !starting_control_structure + { + panic!("Control structure without '{{' at {}:{}", line, col); + } + depth += 1; + } + "}" => + { + depth -= 1; + if depth < 0 + { + panic!("Encountered '}}' without matching '{{' at {}:{}", line, col); + } + } + _ => + { + if !CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) && !functions.iter().any(|x| &x.name == name) && !intrinsics.contains_key(name.as_str()) + { + panic!("Unknown word {name} at {}:{}", line, col); + } + } + } + starting_control_structure = CONTROL_STRUCTURE_NAMES.contains(&name.as_str()); + } + _ => {} + } + } +} + +fn validate_queue_integrity(tokens: &Vec, ins: &Vec, outs: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) +{ + let mut type_queue: Vec = Vec::new(); + type_queue.extend_from_slice(ins); + for token in tokens + { + match token + { + Token::IntLit(_, _, _) => type_queue.push(Datatype::Int), + Token::StringLit(_, _, _) => type_queue.push(Datatype::String), + Token::Keyword(name, line, col) => + { + if intrinsics.contains_key(name.as_str()) + { + for req_type in &intrinsics.get(name.as_str()).unwrap().0 + { + let found_type = type_queue.remove(0); + if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type + { + panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col); + } + } + for out_type in &intrinsics.get(name.as_str()).unwrap().1 + { + type_queue.push(out_type.clone()); + } + } + else if let Some(function) = functions.iter().find(|x| &x.name == name) + { + for req_type in &function.ins + { + let found_type = type_queue.remove(0); + if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type + { + panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col); + } + } + for out_type in &function.outs + { + type_queue.push(out_type.clone()); + } + } + else if CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) + { + match name.as_str() + { + "if" => + { + let found_type = type_queue.remove(0) ; + if found_type != Datatype::Any && found_type != Datatype::Int + { + panic!("Wrong type {:?} for if at {}:{}", found_type, line, col); + } + } + _ => todo!() + } + } + else + { + panic!("Unrecognized keyword {} at {}:{}", name, line, col); + } + } + } + } +} + +fn extract_functions(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) -> Vec { let mut functions: Vec = Vec::new(); let mut state = FunctionExtractionState::Outside; @@ -67,6 +203,7 @@ fn extract_functions(tokens: &mut Vec) -> Vec let mut function_name = String::from(""); let mut content: Vec = Vec::new(); let mut indices_to_remove: Vec = Vec::new(); + let mut depth = 0; for (i, token) in tokens.iter().enumerate() { match state @@ -120,6 +257,10 @@ fn extract_functions(tokens: &mut Vec) -> Vec { panic!("A function with name {} already exists", name); } + if intrinsics.contains_key(name.as_str()) + { + panic!("Function names cannot have the name of intrinsics: {}", name); + } function_name = name.clone(); } else @@ -139,6 +280,7 @@ fn extract_functions(tokens: &mut Vec) -> Vec { if name == "{" { + depth += 1; state = FunctionExtractionState::Body } else @@ -155,16 +297,28 @@ fn extract_functions(tokens: &mut Vec) -> Vec { if let Token::Keyword(name, _, _) = token { - if name == "}" + match name.as_str() { - state = FunctionExtractionState::Outside; - functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()}); - function_name.clear(); - ins.clear(); - outs.clear(); - content.clear(); - indices_to_remove.push(i); - continue; + "{" => + { + depth += 1; + } + "}" => + { + depth -= 1; + if depth == 0 + { + state = FunctionExtractionState::Outside; + functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()}); + function_name.clear(); + ins.clear(); + outs.clear(); + content.clear(); + indices_to_remove.push(i); + continue; + } + } + _ => {} } } content.push(token.clone()); diff --git a/test.qbl b/test.qbl index 682c18c..2e2b3e0 100644 --- a/test.qbl +++ b/test.qbl @@ -7,4 +7,19 @@ function any => int foo deq 42 17 print } -"test2" print \ No newline at end of file +"test2" print 1 +check +print + + +function int => str check +{ + if + { + "True" + } + else + { + "False" + } +} \ No newline at end of file