kurz/src/main.rs

1214 lines
34 KiB
Rust
Raw Normal View History

use core::panic;
2022-12-14 04:12:03 +01:00
use std::collections::HashMap;
use std::env;
use std::fs;
use std::iter::Peekable;
use std::process::exit;
#[derive(Debug, Clone, PartialEq)]
enum Token
{
StringLit(String, i32, i32),
IntLit(String, i32, i32),
2022-12-14 20:39:51 +01:00
BoolLit(String, i32, i32),
Keyword(String, i32, i32),
Apply(String, String, i32, i32),
}
2022-12-14 04:12:03 +01:00
enum TokenizerState
{
Whitespace,
Quote,
Keyword,
Comment,
}
#[derive(Debug,Clone,Copy)]
enum Datatype
{
Int,
String,
2022-12-14 20:39:51 +01:00
Bool,
//Pointer,
Any,
}
impl PartialEq for Datatype
{
fn eq(&self, other: &Self) -> bool
{
core::mem::discriminant(self) == core::mem::discriminant(&Datatype::Any) ||
core::mem::discriminant(other) == core::mem::discriminant(&Datatype::Any) ||
core::mem::discriminant(self) == core::mem::discriminant(other)
}
}
#[derive(Debug)]
struct Function
{
name: String,
ins: Vec<Datatype>,
outs: Vec<Datatype>,
content: Vec<Operation>,
}
#[derive(Debug)]
struct Arr
{
name: String,
datatype: Datatype,
length: i64,
data: Vec<String>,
}
#[derive(Debug)]
enum Operation
{
Enqueue(Datatype, String, i32, i32),
Dequeue(i32, i32),
2022-12-14 11:46:39 +01:00
// TODO: req can be implemented in terms of dup and dequeue
Requeue(i32, i32),
2022-12-14 11:46:39 +01:00
Swap(i32, i32),
Dup(i32, i32),
Intrinsic(String, i32, i32),
FunctionCall(String, i32, i32),
If(Vec<Operation>, Option<Vec<Operation>>, i32, i32),
While(Vec<Operation>, i32, i32),
Apply(String, String, i32, i32),
2022-12-15 20:34:56 +01:00
Depth(i32, i32),
QueueDiagnostic(i32, i32),
}
fn main()
{
2022-12-14 04:12:03 +01:00
let intrinsics: HashMap<&str, (Vec<Datatype>, Vec<Datatype>)> = HashMap::from(
[
("print", (Vec::from([Datatype::Any]), Vec::new())),
2022-12-15 20:34:56 +01:00
("println", (Vec::from([Datatype::Any]), Vec::new())),
("-", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))),
2022-12-14 11:46:39 +01:00
("+", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))),
2022-12-14 20:39:51 +01:00
("<", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
(">", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("==", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("!=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
2022-12-15 20:34:56 +01:00
("decrease", (Vec::from([Datatype::Int]), Vec::from([Datatype::Int]))),
2022-12-14 04:12:03 +01:00
]);
let args: Vec<String> = env::args().collect();
if args.len() < 2
{
usage()
}
let mut debug = false;
2022-12-14 09:34:41 +01:00
let mut interpret = false;
for arg in &args[3..]
{
match arg.as_str()
{
"-d" | "--debug" => debug = true,
2022-12-14 09:34:41 +01:00
"-i" | "--interpret" => interpret = true,
_ => panic!("Unknown option {}", arg),
}
}
match args[1].as_str()
{
"-t" | "--test" =>
{
2022-12-18 04:05:50 +01:00
let mut count = 0;
for f in fs::read_dir(&args[2]).unwrap()
{
let f = f.unwrap();
let file_content = fs::read_to_string(f.path()).unwrap();
2022-12-19 12:13:40 +01:00
println!("========NOW TESTING {:?}========", f.path());
2022-12-18 04:05:50 +01:00
match compile(&file_content, &intrinsics, interpret, debug)
{
2022-12-18 04:05:50 +01:00
Ok(maybe_msg) =>
{
2022-12-19 12:13:40 +01:00
println!("---Successfully parsed {:?}---", f.path());
2022-12-18 04:05:50 +01:00
if let Some(msg) = &maybe_msg
{
print!("---Output---\n'{}'\n", msg);
}
let expected = &format!("//valid,{}:END:", maybe_msg.unwrap_or(String::new()).replace("\n", "\n//"));
if file_content.starts_with(expected)
{
println!("===PASSED===");
count += 1;
}
else if let Some(index) = file_content.find(":END:")
2022-12-18 04:05:50 +01:00
{
let expected_output = file_content[8..index].replace("\n//", "\n");
println!("\n===FAILED===\nExpected the output to be\n'{}'\n({})", expected_output, expected);
}
else
{
panic!("Could not find an ending marker (:END:) for the expected output in {:?}", f.file_name());
}
}
Err(msg) =>
{
println!("ERROR: {}", msg);
if file_content.starts_with(&format!("//invalid,{}:END:", msg.replace("\n", "\n//")))
{
println!("===PASSED===");
count += 1;
}
else if file_content.starts_with("//invalid,")
2022-12-18 04:05:50 +01:00
{
if let Some(index) = file_content.find(":END:")
{
let expected_output = file_content[10..index].replace("\n//", "\n");
println!("\n\n===FAILED===\nExpected the output to be {}", expected_output);
}
else
{
panic!("Could not find an ending marker (:END:) for the expected output in {:?}", f.file_name());
}
2022-12-18 04:05:50 +01:00
}
else
{
println!("Unexpected error");
2022-12-18 04:05:50 +01:00
}
}
}
}
2022-12-18 04:05:50 +01:00
println!("\n\n=========RESULT=========\n{}/{}", count, fs::read_dir(&args[2]).unwrap().count());
}
"-c" | "--compile" =>
{
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
2022-12-18 04:05:50 +01:00
match compile(&file_content, &intrinsics, interpret, debug)
{
2022-12-18 04:05:50 +01:00
Ok(maybe_msg) =>
{
if let Some(msg) = maybe_msg
{
print!("---Output---\n\n{}", msg);
}
}
Err(msg) => println!("ERROR: {}", msg),
2022-12-14 09:34:41 +01:00
}
}
_ => panic!("Unknown option {}", args[1])
}
}
2022-12-18 04:05:50 +01:00
fn compile(file_content: &String, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, interpret: bool, debug: bool) -> Result<Option<String>, String>
{
let mut tokens: Vec<Token> = tokenize(&file_content)?;
println!("---Done tokenizing, got {} tokens---", tokens.len());
let functions: Vec<Function> = extract_functions(&mut tokens, &intrinsics, debug)?;
println!("---Done extracting functions, got {} functions and reduced the token count to {}---", functions.len(), tokens.len());
let mut arrays: Vec<Arr> = extract_arrays(&mut tokens, &intrinsics, &functions, debug)?;
println!("---Done extracting arrays, got {} arrays and reduced the token count to {}---", arrays.len(), tokens.len());
let operations = parse_until_delimiter(&mut tokens.iter().peekable(), &intrinsics, None, debug)?;
println!("---Done parsing tokens into {} operations---", operations.len());
validate_function_calls(&operations, &functions, &arrays, debug)?;
println!("---Done validating function calls---");
typecheck(&operations, &functions, &intrinsics, &arrays, debug)?;
println!("---Done typechecking---");
2022-12-18 04:05:50 +01:00
let output = if interpret
{
2022-12-18 04:05:50 +01:00
println!("---Starting to interpret the program---");
Some(interpret_program(&operations, &mut Vec::new(), &functions, &mut arrays, &intrinsics, debug)?)
}
2022-12-18 04:05:50 +01:00
else
{
None
};
return Ok(output);
}
fn interpret_program(operations: &Vec<Operation>, queue: &mut Vec<String>, functions: &Vec<Function>, arrays: &mut Vec<Arr>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Result<String,String>
2022-12-14 09:34:41 +01:00
{
2022-12-18 04:05:50 +01:00
let mut output = String::new();
2022-12-14 09:34:41 +01:00
for operation in operations
{
if debug
{
2022-12-18 04:05:50 +01:00
println!("before: {:?}: {:?}, '{}'", operation, queue, output);
2022-12-14 09:34:41 +01:00
}
match operation
{
Operation::Dequeue(_, _) =>
{
queue.remove(0);
}
Operation::Enqueue(_, value, _, _) =>
{
queue.push(value.clone());
}
Operation::Requeue(_, _) =>
{
let val = queue.remove(0);
queue.push(val);
}
2022-12-14 11:46:39 +01:00
Operation::Dup(_, _) =>
{
let val = queue.get(0).unwrap();
queue.push(val.clone());
}
Operation::Swap(_, _) =>
{
let first = queue.remove(0);
let second = queue.remove(0);
queue.push(second);
queue.push(first);
}
2022-12-14 09:34:41 +01:00
Operation::FunctionCall(function_name, _, _) =>
{
2022-12-15 20:34:56 +01:00
let function = functions.iter().find(|x| &x.name == function_name).unwrap();
let function_context: &mut Vec<String> = &mut Vec::new();
for _ in 0..function.ins.len()
{
let val = queue.remove(0);
function_context.push(val);
}
output += interpret_program(&function.content, function_context, functions, arrays, intrinsics, debug)?.as_str();
2022-12-15 20:34:56 +01:00
for val in function_context
{
queue.push(val.to_string());
}
2022-12-14 09:34:41 +01:00
}
Operation::If(if_block, maybe_else_block, _, _) =>
{
let val = queue.remove(0);
2022-12-14 20:39:51 +01:00
if val == "true"
2022-12-14 09:34:41 +01:00
{
output += interpret_program(if_block, queue, functions, arrays, intrinsics, debug)?.as_str();
2022-12-14 09:34:41 +01:00
}
else if let Some(else_block) = maybe_else_block
{
output += interpret_program(else_block, queue, functions, arrays, intrinsics, debug)?.as_str();
2022-12-14 09:34:41 +01:00
}
}
Operation::Intrinsic(intrinsic_name, line, col) =>
{
match intrinsic_name.as_str()
{
"print" =>
{
2022-12-18 04:05:50 +01:00
output += format!("{}", queue.remove(0)).as_str();
2022-12-14 09:34:41 +01:00
}
"-" =>
{
let minuend = queue.remove(0).parse::<i64>().unwrap();
let subtrahend = queue.remove(0).parse::<i64>().unwrap();
queue.push((minuend - subtrahend).to_string());
}
2022-12-14 11:46:39 +01:00
"+" =>
{
let addend1 = queue.remove(0).parse::<i64>().unwrap();
let addend2 = queue.remove(0).parse::<i64>().unwrap();
queue.push((addend1 + addend2).to_string());
}
2022-12-14 20:39:51 +01:00
">" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first > second).to_string());
}
"<" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first < second).to_string());
}
"==" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first == second).to_string());
}
"!=" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first != second).to_string());
}
2022-12-15 20:34:56 +01:00
"decrease" =>
{
let val = queue.remove(0).parse::<i64>().unwrap();
queue.push((val - 1).to_string());
}
"println" =>
{
2022-12-18 04:05:50 +01:00
output += format!("{}\n", queue.remove(0)).as_str();
2022-12-15 20:34:56 +01:00
}
2022-12-14 09:34:41 +01:00
_ =>
{
return Err(format!("Unexpected intrinsic '{}' at {}:{}", intrinsic_name, line, col));
2022-12-14 09:34:41 +01:00
}
}
}
Operation::Apply(name, word, line, col) =>
{
let arr: &mut Arr = arrays.iter_mut().find(|x| &x.name == name).unwrap();
match word.as_str()
{
"write" =>
{
let position: i64 = queue.remove(0).parse::<i64>().unwrap();
if position >= arr.length
{
return Err(format!("Attempted an out of bounds write for array {} ({} >= {}) at {}:{}", arr.name, position, arr.length, line, col));
}
if position < 0
{
return Err(format!("Attempted an out of bounds write for array {} ({} < 0) at {}:{}", arr.name, position, line, col));
}
arr.data[position as usize] = queue.remove(0);
}
"read" =>
{
let position: i64 = queue.remove(0).parse::<i64>().unwrap();
if position >= arr.length
{
return Err(format!("Attempted an out of bounds read for array {} ({} >= {}) at {}:{}", arr.name, position, arr.length, line, col));
}
if position < 0
{
return Err(format!("Attempted an out of bounds read for array {} ({} < 0) at {}:{}", arr.name, position, line, col));
}
queue.push(arr.data[position as usize].clone());
}
"length" =>
{
queue.push(arr.length.to_string());
}
_ => return Err(format!("Unexpected application '{}' at {}:{}", word, line, col))
}
}
2022-12-14 09:34:41 +01:00
Operation::While(while_block, _, _) =>
{
loop
{
2022-12-14 11:46:39 +01:00
let val = queue.remove(0);
2022-12-14 20:39:51 +01:00
if val == "false"
2022-12-14 09:34:41 +01:00
{
break;
}
output += interpret_program(while_block, queue, functions, arrays, intrinsics, debug)?.as_str();
2022-12-14 09:34:41 +01:00
}
}
2022-12-15 20:34:56 +01:00
Operation::Depth(_, _) =>
{
let depth = queue.len();
queue.push(depth.to_string());
}
Operation::QueueDiagnostic(line, col) =>
{
println!("---Queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, queue.len(), queue);
}
2022-12-14 09:34:41 +01:00
}
if debug
{
2022-12-18 04:05:50 +01:00
println!("after: {:?}: {:?}, '{}'", operation, queue, output);
2022-12-14 09:34:41 +01:00
}
}
return Ok(output);
2022-12-14 09:34:41 +01:00
}
fn typecheck(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
2022-12-14 04:12:03 +01:00
{
for function in functions
{
if debug
{
println!("Now typechecking function '{}'", function.name);
}
typecheck_block(&function.content, &function.ins, &function.outs, functions, intrinsics, arrays, debug)?;
if debug
{
println!("Successfully typechecked function '{}'", function.name);
}
2022-12-14 04:12:03 +01:00
}
if debug
2022-12-14 04:12:03 +01:00
{
println!("Now typechecking main operations");
}
typecheck_block(operations, &Vec::new(), &Vec::new(), functions, intrinsics, arrays, debug)?;
if debug
{
println!("Successfully typechecked main operations");
2022-12-14 04:12:03 +01:00
}
return Ok(());
2022-12-14 04:12:03 +01:00
}
fn typecheck_block(operations: &Vec<Operation>, ins: &Vec<Datatype>, outs: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
2022-12-14 04:12:03 +01:00
{
let actual_outs = get_return_type(operations, ins, functions, intrinsics, arrays, debug)?;
if &actual_outs != outs
2022-12-14 04:12:03 +01:00
{
let (line, col) = match operations.last()
2022-12-14 04:12:03 +01:00
{
Some(operation) =>
2022-12-14 04:12:03 +01:00
{
match operation
2022-12-14 04:12:03 +01:00
{
Operation::Enqueue(_, _, line, col) |
2022-12-15 20:34:56 +01:00
Operation::Dequeue(line, col) |
Operation::Requeue(line, col) |
2022-12-14 11:46:39 +01:00
Operation::Dup(line, col) |
Operation::Swap(line, col) |
Operation::FunctionCall(_, line, col) |
Operation::If(_, _, line, col) |
Operation::Intrinsic(_, line, col) |
Operation::While(_, line, col) |
2022-12-15 20:34:56 +01:00
Operation::QueueDiagnostic(line, col) |
Operation::Apply(_, _, line, col) |
2022-12-15 20:34:56 +01:00
Operation::Depth(line, col) => (*line, *col),
2022-12-14 04:12:03 +01:00
}
}
None => (-1, -1)
};
return Err(format!("Wrong queue state at the end of a block, expected {:?} but got {:?} at {}:{}", outs, actual_outs, line, col));
2022-12-14 04:12:03 +01:00
}
return Ok(());
2022-12-14 04:12:03 +01:00
}
fn get_return_type(operations: &Vec<Operation>, ins: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<Vec<Datatype>, String>
2022-12-14 04:12:03 +01:00
{
let type_queue: &mut Vec<Datatype> = &mut Vec::new();
2022-12-14 04:12:03 +01:00
type_queue.extend_from_slice(ins);
let mut debug_string = String::from("");
for operation in operations
2022-12-14 04:12:03 +01:00
{
if debug
{
debug_string = format!("operation: {:?}: {:?}", operation, type_queue);
}
match operation
2022-12-14 04:12:03 +01:00
{
Operation::Dequeue(line, col) =>
2022-12-14 04:12:03 +01:00
{
if type_queue.is_empty()
2022-12-14 04:12:03 +01:00
{
return Err(format!("Attempted to dequeue an element while the queue was empty at {}:{}", line, col));
}
type_queue.remove(0);
}
Operation::Enqueue(datatype, _, _, _) =>
{
type_queue.push(*datatype);
}
2022-12-14 11:46:39 +01:00
Operation::Dup(line, col) =>
{
if let Some(typ) = type_queue.get(0)
{
type_queue.push(typ.clone());
}
else
{
return Err(format!("Attempted to dup an element while the queue was empty at {}:{}", line, col));
2022-12-14 11:46:39 +01:00
}
}
Operation::Requeue(line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Attempted to requeue an element while the queue was empty at {}:{}", line, col));
}
let typ = type_queue.remove(0);
type_queue.push(typ);
}
2022-12-14 11:46:39 +01:00
Operation::Swap(line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Attempted to get the first element for a swap while the queue was empty at {}:{}", line, col));
2022-12-14 11:46:39 +01:00
}
let first_typ = type_queue.remove(0);
if type_queue.is_empty()
{
return Err(format!("Attempted to get the second element for a swap while the queue was empty at {}:{}", line, col));
2022-12-14 11:46:39 +01:00
}
let second_typ = type_queue.remove(0);
type_queue.push(second_typ);
type_queue.push(first_typ);
}
Operation::FunctionCall(function_name, line, col) =>
{
let function = functions.iter().find(|x| &x.name == function_name).unwrap();
if function.ins.len() > type_queue.len()
{
return Err(format!("Attempted to call function '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", function.name, line, col, function.ins, type_queue));
2022-12-14 04:12:03 +01:00
}
for in_type in &function.ins
2022-12-14 04:12:03 +01:00
{
let actual_type = type_queue.remove(0);
if in_type != &actual_type
2022-12-14 04:12:03 +01:00
{
return Err(format!("Attempted to call function '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", function.name, line, col, in_type, actual_type));
2022-12-14 04:12:03 +01:00
}
}
type_queue.extend_from_slice(&function.outs);
}
Operation::If(if_block, maybe_else_block, line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Encountered if block with an empty queue at {}:{}", line, col));
}
let comparison_type = type_queue.remove(0);
2022-12-14 20:39:51 +01:00
if comparison_type != Datatype::Bool
{
return Err(format!("Expected a Bool as an if condition but got {:?} instead at {}:{}", comparison_type, line, col));
}
if debug
{
println!("Starting to typecheck if block");
}
let if_ret = get_return_type(if_block, &type_queue, functions, intrinsics, arrays, debug)?;
let else_ret =
if let Some(else_block) = maybe_else_block
{
if debug
2022-12-14 04:12:03 +01:00
{
println!("Starting to typecheck else block");
2022-12-14 04:12:03 +01:00
}
get_return_type(else_block, &type_queue, functions, intrinsics, arrays, debug)?
2022-12-14 04:12:03 +01:00
}
else
{
type_queue.clone()
};
if if_ret != else_ret
{
return Err(format!("Incompatible queue states after if/else construction, expected {:?} but got {:?}", if_ret, else_ret));
}
type_queue.clear();
type_queue.extend_from_slice(&if_ret);
}
Operation::Intrinsic(intrinsic_name, line, col) =>
{
let io = intrinsics.get(intrinsic_name.as_str()).unwrap();
if io.0.len() > type_queue.len()
2022-12-14 04:12:03 +01:00
{
return Err(format!("Attempted to call intrinsic '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", intrinsic_name, line, col, io.0, type_queue));
}
for in_type in &io.0
{
let actual_type = type_queue.remove(0);
if in_type != &actual_type
2022-12-14 04:12:03 +01:00
{
return Err(format!("Attempted to call intrinsic '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", intrinsic_name, line, col, in_type, actual_type));
2022-12-14 04:12:03 +01:00
}
}
type_queue.extend_from_slice(&io.1);
}
Operation::While(while_block, line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Encountered while block with an empty queue at {}:{}", line, col));
}
2022-12-14 11:46:39 +01:00
let comparison_type = type_queue.remove(0);
2022-12-14 20:39:51 +01:00
if comparison_type != Datatype::Bool
{
return Err(format!("Expected a Bool as a while condition but got {:?} instead at {}:{}", comparison_type, line, col));
}
if debug
2022-12-14 04:12:03 +01:00
{
println!("Starting to typecheck while block");
2022-12-14 04:12:03 +01:00
}
2022-12-14 11:46:39 +01:00
let mut outs = type_queue.clone();
2022-12-14 20:39:51 +01:00
outs.insert(0, Datatype::Bool);
typecheck_block(while_block, type_queue, &outs, functions, intrinsics, arrays, debug)?;
2022-12-14 04:12:03 +01:00
}
2022-12-15 20:34:56 +01:00
Operation::Depth(_, _) =>
{
type_queue.push(Datatype::Int);
}
Operation::QueueDiagnostic(line, col) =>
{
println!("---Type queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, type_queue.len(), type_queue);
}
Operation::Apply(name, word, line, col) =>
{
match word.as_str()
{
"write" =>
{
if type_queue.is_empty() || type_queue.remove(0) != Datatype::Int
{
return Err(format!("Expected a position for a write application at {}:{}", line, col));
}
let expected_type = arrays.iter().find(|x| &x.name == name).unwrap().datatype;
if type_queue.is_empty()
{
return Err(format!("Expected data for a write application at {}:{}", line, col));
}
let actual_type = type_queue.remove(0);
if actual_type != expected_type
{
return Err(format!("Expected a {:?} value but got a {:?} value at {}:{}", expected_type, actual_type, line, col));
}
}
"read" =>
{
if type_queue.is_empty() || type_queue.remove(0) != Datatype::Int
{
return Err(format!("Expected a position for a read application at {}:{}", line, col));
}
let typ = arrays.iter().find(|x| &x.name == name).unwrap().datatype;
type_queue.push(typ);
}
"length" =>
{
type_queue.push(Datatype::Int);
}
_ => return Err(format!("Encountered unknown application '{}' at {}:{}", word, line, col))
}
}
2022-12-14 04:12:03 +01:00
}
if debug
{
println!("{} => {:?}", debug_string, type_queue);
}
2022-12-14 04:12:03 +01:00
}
return Ok(type_queue.clone());
2022-12-14 04:12:03 +01:00
}
fn validate_function_calls(operations: &Vec<Operation>, functions: &Vec<Function>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
{
for function in functions
{
validate_function_calls_in_block(&function.content, functions, arrays, debug)?;
if debug
{
println!("Successfully validated function calls in function '{}'", function.name);
}
}
validate_function_calls_in_block(operations, functions, arrays, debug)?;
if debug
{
println!("Successfully validated function calls in main operations");
}
return Ok(());
}
fn validate_function_calls_in_block(block: &Vec<Operation>, functions: &Vec<Function>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
{
for operation in block
{
match operation
{
2022-12-15 20:34:56 +01:00
Operation::Depth(_, _) | Operation::QueueDiagnostic(_, _) | Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) |
Operation::Requeue(_, _) | Operation::Dup(_, _) | Operation::Swap(_, _) | Operation::Apply(_, _, _, _) => {},
Operation::FunctionCall(function_name, line, col) =>
{
if !functions.iter().any(|x| &x.name == function_name)
{
return Err(format!("Call to unknown function '{}' at {}:{}", function_name, line, col));
}
}
Operation::If(if_block, maybe_else_block, _, _) =>
{
validate_function_calls_in_block(if_block, functions, arrays, debug)?;
if let Some(else_block) = maybe_else_block
{
validate_function_calls_in_block(else_block, functions, arrays, debug)?;
}
}
Operation::While(while_block, _, _) =>
{
validate_function_calls_in_block(while_block, functions, arrays, debug)?;
}
}
}
return Ok(());
}
fn extract_arrays(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, functions: &Vec<Function>, debug: bool) -> Result<Vec<Arr>, String>
{
let mut tokens_iter = tokens.iter().peekable();
let mut arrays: Vec<Arr> = Vec::new();
let mut new_tokens: Vec<Token> = Vec::new();
while let Some(token) = tokens_iter.next()
{
if let Token::Keyword(word, line, col) = token
{
if word == "arr"
{
if debug
{
println!("Found an array at {}:{}", line, col);
}
if let Some(Token::Keyword(name, _, _)) = tokens_iter.next()
{
if functions.iter().any(|x| &x.name == name)
{
return Err(format!("Cannot redeclare an array with the same name as a function {}:{}", line, col));
}
if arrays.iter().any(|x| &x.name == name)
{
return Err(format!("Cannot redeclare an array with the same name as an array {}:{}", line, col));
}
if intrinsics.contains_key(name.as_str())
{
return Err(format!("An array cannot have the same name as an intrinsic ({}) at {}:{}", name, line, col));
}
if let Some(Token::Keyword(open_curly, _, _)) = tokens_iter.next()
{
if open_curly != "{"
{
return Err(format!("Expected '{{' in array declaration at {}:{}", line, col));
}
}
else
{
return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col));
}
if let Some(Token::Keyword(typ, _, _)) = tokens_iter.next()
{
let datatype = str_to_datatype(typ, *line, *col)?;
if let Some(Token::IntLit(size_str, _, _)) = tokens_iter.next()
{
let size = size_str.parse::<i64>().unwrap();
if let Some(Token::Keyword(close_curly, _, _)) = tokens_iter.next()
{
if close_curly != "}"
{
return Err(format!("Expected '}}' in array declaration at {}:{}", line, col));
}
}
else
{
return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col));
}
let mut data: Vec<String> = Vec::new();
let default_val = match datatype
{
Datatype::Any | Datatype::String => String::new(),
Datatype::Bool => String::from("false"),
Datatype::Int => String::from("0"),
};
for _ in 0..size
{
data.push(default_val.clone());
}
arrays.push(Arr { name: name.clone(), datatype, length: size , data });
}
}
else
{
return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col))
}
}
else
{
return Err(format!("Expected array name, at {}:{}", line, col));
}
}
else
{
new_tokens.push(token.clone());
}
}
else
{
new_tokens.push(token.clone());
}
}
tokens.clear();
tokens.extend_from_slice(&new_tokens);
return Ok(arrays);
}
fn str_to_datatype(s: &str, line: i32, col: i32) -> Result<Datatype, String>
{
match s
{
"any" => Ok(Datatype::Any),
"bool" => Ok(Datatype::Bool),
"int" => Ok(Datatype::Int),
"str" => Ok(Datatype::String),
_ => return Err(format!("Expected a datatype for the array, got {} instead at {}:{}", s, line, col))
}
}
fn extract_functions(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Result<Vec<Function>, String>
{
let mut tokens_iter = tokens.iter().peekable();
let mut functions: Vec<Function> = Vec::new();
let mut new_tokens: Vec<Token> = Vec::new();
while let Some(token) = tokens_iter.next()
{
if let Token::Keyword(word, line, col) = token
{
if word == "function"
{
if debug
{
2022-12-15 20:34:56 +01:00
println!("Found a function at {}:{}", line, col);
}
let mut ins: Vec<Datatype> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
{
match token
{
Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) |
Token::Apply(_, _, line, col) =>
{
return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col));
}
Token::Keyword(word, line, col) =>
{
if word == "=>"
{
break;
}
match word.as_str()
2022-12-14 04:12:03 +01:00
{
"any" => ins.push(Datatype::Any),
"str" => ins.push(Datatype::String),
"int" => ins.push(Datatype::Int),
2022-12-14 20:39:51 +01:00
"bool" => ins.push(Datatype::Bool),
_ => return Err(format!("Expected input parameters for a function but got {} instead at {}:{}", word, line, col))
2022-12-14 04:12:03 +01:00
}
}
}
}
None => return Err(format!("Unexpected end of file while extracting a function"))
}
}
if debug
{
println!("ins: {:?}", ins);
}
let mut outs: Vec<Datatype> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
2022-12-14 04:12:03 +01:00
{
match token
2022-12-14 04:12:03 +01:00
{
Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) |
Token::Apply(_, _, line, col) =>
{
return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col));
}
Token::Keyword(word, line, col) =>
{
match word.as_str()
{
"any" => outs.push(Datatype::Any),
"str" => outs.push(Datatype::String),
"int" => outs.push(Datatype::Int),
2022-12-15 20:34:56 +01:00
"bool" => outs.push(Datatype::Bool),
"{" | "}" | "deq" | "req" | "dup" | "swp" | "true" | "false" | "depth" | "???" => return Err(format!("Expected function name but got {} at {}:{}", word, line, col)),
_ =>
{
if functions.iter().any(|x| &x.name == word)
{
return Err(format!("Redeclaration of function '{}' at {}:{}", word, line, col));
}
2022-12-14 09:34:41 +01:00
if intrinsics.contains_key(word.as_str())
{
return Err(format!("Function name {} at {}:{} is already an intrinsic", word, line, col));
2022-12-14 09:34:41 +01:00
}
if debug
{
println!("outs: {:?}", outs);
}
let block = parse_block(&mut tokens_iter, intrinsics, debug)?;
functions.push(Function {name: word.clone(), ins, outs, content: block});
break;
}
}
}
2022-12-14 04:12:03 +01:00
}
}
None => return Err(format!("Unexpected end of file while extracting a function"))
}
}
}
else
{
new_tokens.push(token.clone());
}
}
else
{
new_tokens.push(token.clone());
}
}
tokens.clear();
tokens.extend_from_slice(&new_tokens);
return Ok(functions);
}
fn parse_block(tokens_iter: &mut Peekable<std::slice::Iter<Token>>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Result<Vec<Operation>, String>
{
if let Some(Token::Keyword(word, line, col)) = tokens_iter.next()
{
if word != "{"
{
return Err(format!("Expected '{{' to open a block but got {} at {}:{}", word, line, col));
}
}
else
{
return Err(format!("Expected '{{' to open a block"));
}
return parse_until_delimiter(tokens_iter, intrinsics, Some("}"), debug);
}
fn parse_until_delimiter(tokens_iter: &mut Peekable<std::slice::Iter<Token>>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, delimiter: Option<&str>, debug: bool) -> Result<Vec<Operation>, String>
{
let mut operations: Vec<Operation> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
{
match token
{
Token::IntLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::Int, value.clone(), *line, *col));
}
Token::StringLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::String, value.clone(), *line, *col));
}
2022-12-14 20:39:51 +01:00
Token::BoolLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::Bool, value.clone(), *line, *col));
}
Token::Apply(name, word, line, col) =>
{
operations.push(Operation::Apply(name.clone(), word.clone(), *line, *col));
}
Token::Keyword(word, line, col) =>
{
if intrinsics.contains_key(word.as_str())
{
operations.push(Operation::Intrinsic(word.clone(), *line, *col));
}
else if word == "if"
{
let block = parse_block(tokens_iter, intrinsics, debug)?;
let else_block =
if let Some(Token::Keyword(maybe_else, _, _)) = tokens_iter.peek()
{
if maybe_else == "else"
{
tokens_iter.next();
Some(parse_block(tokens_iter, intrinsics, debug)?)
}
else
{
None
}
}
else
{
None
};
operations.push(Operation::If(block, else_block, *line, *col));
}
else if word == "while"
{
operations.push(Operation::While(parse_block(tokens_iter, intrinsics, debug)?, *line, *col));
}
else if word == "deq"
{
operations.push(Operation::Dequeue(*line, *col));
}
else if word == "req"
{
operations.push(Operation::Requeue(*line, *col));
}
2022-12-14 11:46:39 +01:00
else if word == "dup"
{
operations.push(Operation::Dup(*line, *col));
}
else if word == "swp"
{
operations.push(Operation::Swap(*line, *col));
}
2022-12-15 20:34:56 +01:00
else if word == "depth"
{
operations.push(Operation::Depth(*line, *col));
}
else if word == "???"
{
operations.push(Operation::QueueDiagnostic(*line, *col));
}
else if Some(word.as_str()) == delimiter
{
return Ok(operations);
}
else if word == "{" || word == "function"
{
return Err(format!("Unexpected keyword {} at {}:{}", word, line, col));
}
else
{
operations.push(Operation::FunctionCall(word.clone(), *line, *col));
}
}
}
}
None =>
{
if delimiter.is_some()
{
return Err(format!("Reached the end of the file while parsing a block"));
}
else
{
return Ok(operations);
}
}
}
}
}
fn usage()
{
println!("Usage: kurz -c path/to/file");
exit(0);
}
fn tokenize(text: &str) -> Result<Vec<Token>, String>
{
let mut tokens: Vec<Token> = Vec::new();
let mut line = 1;
let mut col = 1;
let mut state = TokenizerState::Whitespace;
let mut word = String::new();
let mut iter = text.chars().peekable();
let mut application_name = String::new();
while let Some(ch) = iter.next()
{
if ch == '/' && iter.peek() == Some(&'/')
{
state = TokenizerState::Comment;
}
match state
{
TokenizerState::Comment =>
{
if ch == '\n'
{
state = TokenizerState::Whitespace;
}
}
TokenizerState::Whitespace =>
{
// If ch is whitespace, do nothing
if !ch.is_whitespace()
{
match ch
{
'"' =>
{
state = TokenizerState::Quote;
}
_ =>
{
state = TokenizerState::Keyword;
word.push(ch);
}
}
}
}
TokenizerState::Quote =>
{
if ch == '"'
{
state = TokenizerState::Whitespace;
2022-12-14 20:39:51 +01:00
tokens.push(Token::StringLit(word.clone().replace("\\n", "\n"), line, col));
word.clear();
}
else
{
word.push(ch);
}
}
TokenizerState::Keyword =>
{
if ch.is_whitespace()
{
state = TokenizerState::Whitespace;
if application_name.is_empty()
{
if let Ok(_) = word.parse::<i64>()
{
tokens.push(Token::IntLit(word.clone(), line, col));
}
else if word == "true" || word == "false"
{
tokens.push(Token::BoolLit(word.clone(), line, col));
}
else
{
tokens.push(Token::Keyword(word.clone(), line, col));
}
2022-12-14 20:39:51 +01:00
}
else
{
tokens.push(Token::Apply(application_name.clone(), word.clone(), line, col));
application_name.clear();
}
word.clear();
}
else
{
match ch
{
'"' => return Err(format!("Having '\"' in the middle of a word is not allowed")),
'.' =>
{
application_name = word.clone();
word.clear();
}
_ =>
{
word.push(ch);
}
}
}
}
}
col += 1;
if ch == '\n'
{
col = 1;
line += 1;
}
}
match state
{
TokenizerState::Quote =>
{
return Err(format!("Encountered EOF before closing string"));
}
TokenizerState::Whitespace | TokenizerState::Comment => {},
TokenizerState::Keyword =>
{
if application_name.is_empty()
{
tokens.push(Token::Keyword(word.clone(), line, col));
}
else
{
tokens.push(Token::Apply(application_name.clone(), word.clone(), line, col));
}
}
}
Ok(tokens)
}