Merge branch 'main' of https://gittea.dev/0x4261756D/kurz

Implement function extraction and their removal from the token stream
Initial commit, barebones tokenizer working
2022-12-14 01:48:14 +01:00 · 2022-12-05 00:38:20 +01:00 · 2022-11-29 02:04:01 +01:00
5 changed files with 332 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
@ -0,0 +1,7 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 3
 [[package]]
 name = "kurz"
 version = "0.1.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,8 @@
 [package]
 name = "kurz"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,306 @@
 use core::panic;
 use std::env;
 use std::fs;
 use std::process::exit;
 #[derive(Debug, Clone, PartialEq)]
 enum Token
 {
 	StringLit(String, i32, i32),
 	IntLit(i64, i32, i32),
 	Keyword(String, i32, i32),
 }
 enum TokenizerState
 {
 	Whitespace,
 	Quote,
 	Keyword,
 	Comment,
 }
 #[derive(Debug,Clone,Copy)]
 enum Datatype
 {
 	Int,
 	String,
 	Pointer,
 	Any,
 }
 #[derive(Debug)]
 struct Function
 {
 	name: String,
 	ins: Vec<Datatype>,
 	outs: Vec<Datatype>,
 	content: Vec<Token>
 }
 fn main()
 {
 	let args: Vec<String> = env::args().collect();
 	if args.len() < 2
 	{
 		usage()
 	}
 	match args[1].as_str()
 	{
 		"-c" | "--compile" =>
 		{
 			let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
 			let mut tokens: Vec<Token> = tokenize(&file_content);
 			println!("{:?}", tokens);
 			let functions: Vec<Function> = extract_functions(&mut tokens);
 			println!("{:?}", tokens);
 			println!("{:?}", functions);
 		}
 		_ => panic!("Unknown option {}", args[1])
 	}
 }
 fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
 {
 	let mut functions: Vec<Function> = Vec::new();
 	let mut state = FunctionExtractionState::Outside;
 	let mut ins: Vec<Datatype> = Vec::new();
 	let mut outs: Vec<Datatype> = Vec::new();
 	let mut function_name = String::from("");
 	let mut content: Vec<Token> = Vec::new();
 	let mut indices_to_remove: Vec<usize> = Vec::new();
 	for (i, token) in tokens.iter().enumerate()
 	{
 		match state
 		{
 			FunctionExtractionState::Outside =>
 			{
 				if let Token::Keyword(name, _, _) = token
 				{
 					if name == &String::from("function")
 					{
 						state = FunctionExtractionState::Ins;
 					}
 				}
 			}
 			FunctionExtractionState::Ins =>
 			{
 				match token
 				{
 					Token::Keyword(name, line, col) =>
 					{
 						match name.as_str()
 						{
 							"int" => ins.push(Datatype::Int),
 							"str" => ins.push(Datatype::String),
 							"ptr" => ins.push(Datatype::Pointer),
 							"any" => ins.push(Datatype::Any),
 							"=>" => state = FunctionExtractionState::Outs,
 							_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col)
 						}
 					},
 					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
 				}
 			}
 			FunctionExtractionState::Outs =>
 			{
 				match token
 				{
 					Token::Keyword(name, _, _) =>
 					{
 						match name.as_str()
 						{
 							"int" => outs.push(Datatype::Int),
 							"str" => outs.push(Datatype::String),
 							"ptr" => outs.push(Datatype::Pointer),
 							"any" => outs.push(Datatype::Any),
 							_ =>
 							{
 								if let Token::Keyword(name, _, _) = token
 								{
 									if functions.iter().any(|x| &x.name == name)
 									{
 										panic!("A function with name {} already exists", name);
 									}
 									function_name = name.clone();
 								}
 								else
 								{
 									panic!("Expected a function name") // TODO: Add location
 								}
 								state =FunctionExtractionState::OpenCurly;
 							}
 						}
 					},
 					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
 				}
 			}
 			FunctionExtractionState::OpenCurly =>
 			{
 				if let Token::Keyword(name, line, col) = token
 				{
 					if name == "{"
 					{
 						state = FunctionExtractionState::Body
 					}
 					else
 					{
 						panic!("Expected '{{' to open the function's body at {}:{}", line, col)
 					}
 				}
 				else
 				{
 					panic!("Expected '{{' to open the function's body") // TODO: Add location
 				}
 			}
 			FunctionExtractionState::Body =>
 			{
 				if let Token::Keyword(name, _, _) = token
 				{
 					if name == "}"
 					{
 						state = FunctionExtractionState::Outside;
 						functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
 						function_name.clear();
 						ins.clear();
 						outs.clear();
 						content.clear();
 						indices_to_remove.push(i);
 						continue;
 					}
 				}
 				content.push(token.clone());
 			}
 		}
 		if state != FunctionExtractionState::Outside
 		{
 			indices_to_remove.push(i);
 		}
 	}
 	indices_to_remove.reverse();
 	for i in indices_to_remove
 	{
 		tokens.remove(i);
 	}
 	return functions;
 }
 #[derive(Debug, PartialEq)]
 enum FunctionExtractionState
 {
 	Outside,
 	Ins,
 	Outs,
 	OpenCurly,
 	Body,
 }
 fn usage()
 {
 	println!("Usage: kurz -c path/to/file");
 	exit(0);
 }
 fn tokenize(text: &str) -> Vec<Token>
 {
 	let mut tokens: Vec<Token> = Vec::new();
 	let mut line = 1;
 	let mut col = 1;
 	let mut state = TokenizerState::Whitespace;
 	let mut word = String::new();
 	let mut iter = text.chars().peekable();
 	while let Some(ch) = iter.next()
 	{
 		if ch == '/' && iter.peek() == Some(&'/')
 		{
 			state = TokenizerState::Comment;
 		}
 		match state
 		{
 			TokenizerState::Comment =>
 			{
 				if ch == '\n'
 				{
 					state = TokenizerState::Whitespace;
 				}
 			}
 			TokenizerState::Whitespace =>
 			{
 				// If ch is whitespace, do nothing
 				if !ch.is_whitespace()
 				{
 					match ch
 					{
 						'"' =>
 						{
 							state = TokenizerState::Quote;
 						}
 						_ =>
 						{
 							state = TokenizerState::Keyword;
 							word.push(ch);
 						}
 					}
 				}
 			}
 			TokenizerState::Quote =>
 			{
 				if ch == '"'
 				{
 					state = TokenizerState::Whitespace;
 					tokens.push(Token::StringLit(word.clone(), line, col));
 					word.clear();
 				}
 				else
 				{
 					word.push(ch);
 				}
 			}
 			TokenizerState::Keyword =>
 			{
 				if ch.is_whitespace()
 				{
 					state = TokenizerState::Whitespace;
 					if let Ok(number) = word.parse::<i64>()
 					{
 						tokens.push(Token::IntLit(number, line, col));
 					}
 					else
 					{
 						tokens.push(Token::Keyword(word.clone(), line, col));
 					}
 					word.clear();
 				}
 				else
 				{
 					match ch
 					{
 						'"' => panic!("Having '\"' in the middle of a word is not allowed"),
 						_ =>
 						{
 							word.push(ch);
 						}
 					}
 				}
 			}
 		}
 		col += 1;
 		if ch == '\n'
 		{
 			col = 1;
 			line += 1;
 		}
 	}
 	match state
 	{
 		TokenizerState::Quote =>
 		{
 			panic!("Encountered EOF before closing string");
 		}
 		TokenizerState::Whitespace | TokenizerState::Comment => {},
 		TokenizerState::Keyword =>
 		{
 			tokens.push(Token::Keyword(word.clone(), line, col));
 		}
 	}
 	tokens
 }
--- a/test.qbl
+++ b/test.qbl
@ -0,0 +1,10 @@
 "Hello, World!\n" print 43 foo foo deq
 // Dequeues, enqueues 42 and 17, prints the head
 function any => int foo
 {
 	deq 42 17 print
 }
 "test2" print
Author	SHA1	Message	Date
0x4261756D	1e8322cc7c	Merge branch 'main' of https://gittea.dev/0x4261756D/kurz	2022-12-14 01:48:14 +01:00
0x4261756D	6a2bc25eaf	Implement function extraction and their removal from the token stream	2022-12-05 00:38:20 +01:00
0x4261756D	9976ef9fe9	Initial commit, barebones tokenizer working	2022-11-29 02:04:01 +01:00