kurz/src/main.rs

use core::panic;
use std::env;
use std::fs;
use std::process::exit;

#[derive(Debug, Clone, PartialEq)]
enum Token
{
	StringLit(String, i32, i32),
	IntLit(i64, i32, i32),
	Keyword(String, i32, i32),
}
enum TokenizerState
{
	Whitespace,
	Quote,
	Keyword,
	Comment,
}

#[derive(Debug,Clone,Copy)]
enum Datatype
{
	Int,
	String,
	Pointer,
	Any,
}

#[derive(Debug)]
struct Function
{
	name: String,
	ins: Vec<Datatype>,
	outs: Vec<Datatype>,
	content: Vec<Token>
}

fn main()
{
	let args: Vec<String> = env::args().collect();
	if args.len() < 2
	{
		usage()
	}
	match args[1].as_str()
	{
		"-c" | "--compile" =>
		{
			let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
			let mut tokens: Vec<Token> = tokenize(&file_content);
			println!("{:?}", tokens);
			let functions: Vec<Function> = extract_functions(&mut tokens);
			println!("{:?}", tokens);
			println!("{:?}", functions);
		}
		_ => panic!("Unknown option {}", args[1])
	}
}

fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
{
	let mut functions: Vec<Function> = Vec::new();
	let mut state = FunctionExtractionState::Outside;
	let mut ins: Vec<Datatype> = Vec::new();
	let mut outs: Vec<Datatype> = Vec::new();
	let mut function_name = String::from("");
	let mut content: Vec<Token> = Vec::new();
	let mut indices_to_remove: Vec<usize> = Vec::new();
	for (i, token) in tokens.iter().enumerate()
	{
		match state
		{
			FunctionExtractionState::Outside =>
			{
				if let Token::Keyword(name, _, _) = token
				{
					if name == &String::from("function")
					{
						state = FunctionExtractionState::Ins;
					}
				}
			}
			FunctionExtractionState::Ins =>
			{
				match token
				{
					Token::Keyword(name, line, col) =>
					{
						match name.as_str()
						{
							"int" => ins.push(Datatype::Int),
							"str" => ins.push(Datatype::String),
							"ptr" => ins.push(Datatype::Pointer),
							"any" => ins.push(Datatype::Any),
							"=>" => state = FunctionExtractionState::Outs,
							_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col)
						}
					},
					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
				}
			}
			FunctionExtractionState::Outs =>
			{
				match token
				{
					Token::Keyword(name, _, _) =>
					{
						match name.as_str()
						{
							"int" => outs.push(Datatype::Int),
							"str" => outs.push(Datatype::String),
							"ptr" => outs.push(Datatype::Pointer),
							"any" => outs.push(Datatype::Any),
							_ =>
							{
								if let Token::Keyword(name, _, _) = token
								{
									if functions.iter().any(|x| &x.name == name)
									{
										panic!("A function with name {} already exists", name);
									}
									function_name = name.clone();
								}
								else
								{
									panic!("Expected a function name") // TODO: Add location
								}
								state =FunctionExtractionState::OpenCurly;
							}
						}
					},
					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
				}
			}
			FunctionExtractionState::OpenCurly =>
			{
				if let Token::Keyword(name, line, col) = token
				{
					if name == "{"
					{
						state = FunctionExtractionState::Body
					}
					else
					{
						panic!("Expected '{{' to open the function's body at {}:{}", line, col)
					}
				}
				else
				{
					panic!("Expected '{{' to open the function's body") // TODO: Add location
				}
			}
			FunctionExtractionState::Body =>
			{
				if let Token::Keyword(name, _, _) = token
				{
					if name == "}"
					{
						state = FunctionExtractionState::Outside;
						functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
						function_name.clear();
						ins.clear();
						outs.clear();
						content.clear();
						indices_to_remove.push(i);
						continue;
					}
				}
				content.push(token.clone());
			}
		}

		if state != FunctionExtractionState::Outside
		{
			indices_to_remove.push(i);
		}
	}
	indices_to_remove.reverse();
	for i in indices_to_remove
	{
		tokens.remove(i);
	}
	return functions;
}

#[derive(Debug, PartialEq)]
enum FunctionExtractionState
{
	Outside,
	Ins,
	Outs,
	OpenCurly,
	Body,
}

fn usage()
{
	println!("Usage: kurz -c path/to/file");
	exit(0);
}

fn tokenize(text: &str) -> Vec<Token>
{
	let mut tokens: Vec<Token> = Vec::new();
	let mut line = 1;
	let mut col = 1;
	let mut state = TokenizerState::Whitespace;
	let mut word = String::new();
	let mut iter = text.chars().peekable();
	while let Some(ch) = iter.next()
	{
		if ch == '/' && iter.peek() == Some(&'/')
		{
			state = TokenizerState::Comment;
		}
		match state
		{
			TokenizerState::Comment =>
			{
				if ch == '\n'
				{
					state = TokenizerState::Whitespace;
				}
			}
			TokenizerState::Whitespace =>
			{
				// If ch is whitespace, do nothing
				if !ch.is_whitespace()
				{
					match ch
					{
						'"' =>
						{
							state = TokenizerState::Quote;
						}
						_ =>
						{
							state = TokenizerState::Keyword;
							word.push(ch);
						}
					}
				}
			}
			TokenizerState::Quote =>
			{
				if ch == '"'
				{
					state = TokenizerState::Whitespace;
					tokens.push(Token::StringLit(word.clone(), line, col));
					word.clear();
				}
				else
				{
					word.push(ch);
				}
			}
			TokenizerState::Keyword =>
			{
				if ch.is_whitespace()
				{
					state = TokenizerState::Whitespace;
					if let Ok(number) = word.parse::<i64>()
					{
						tokens.push(Token::IntLit(number, line, col));
					}
					else
					{
						tokens.push(Token::Keyword(word.clone(), line, col));
					}
					word.clear();
				}
				else
				{
					match ch
					{
						'"' => panic!("Having '\"' in the middle of a word is not allowed"),
						_ =>
						{
							word.push(ch);
						}
					}
				}
			}
		}
		col += 1;
		if ch == '\n'
		{
			col = 1;
			line += 1;
		}
	}
	match state
	{
		TokenizerState::Quote =>
		{
			panic!("Encountered EOF before closing string");
		}
		TokenizerState::Whitespace | TokenizerState::Comment => {},
		TokenizerState::Keyword =>
		{
			tokens.push(Token::Keyword(word.clone(), line, col));
		}
	}
	tokens
}
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`use core::panic;`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`use std::env;`
			`use std::fs;`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`use std::process::exit;`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`#[derive(Debug, Clone, PartialEq)]`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`enum Token`
			`{`
			`StringLit(String, i32, i32),`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`IntLit(i64, i32, i32),`
			`Keyword(String, i32, i32),`
			`}`
			`enum TokenizerState`
			`{`
			`Whitespace,`
			`Quote,`
			`Keyword,`
			`Comment,`
			`}`

			`#[derive(Debug,Clone,Copy)]`
			`enum Datatype`
			`{`
			`Int,`
			`String,`
			`Pointer,`
			`Any,`
			`}`

			`#[derive(Debug)]`
			`struct Function`
			`{`
			`name: String,`
			`ins: Vec<Datatype>,`
			`outs: Vec<Datatype>,`
			`content: Vec<Token>`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`}`

			`fn main()`
			`{`
			`let args: Vec<String> = env::args().collect();`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`if args.len() < 2`
			`{`
			`usage()`
			`}`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`match args[1].as_str()`
			`{`
			`"-c" \| "--compile" =>`
			`{`
			`let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`let mut tokens: Vec<Token> = tokenize(&file_content);`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`println!("{:?}", tokens);`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`let functions: Vec<Function> = extract_functions(&mut tokens);`
			`println!("{:?}", tokens);`
			`println!("{:?}", functions);`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`}`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`_ => panic!("Unknown option {}", args[1])`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`}`
			`}`

Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>`
			`{`
			`let mut functions: Vec<Function> = Vec::new();`
			`let mut state = FunctionExtractionState::Outside;`
			`let mut ins: Vec<Datatype> = Vec::new();`
			`let mut outs: Vec<Datatype> = Vec::new();`
			`let mut function_name = String::from("");`
			`let mut content: Vec<Token> = Vec::new();`
			`let mut indices_to_remove: Vec<usize> = Vec::new();`
			`for (i, token) in tokens.iter().enumerate()`
			`{`
			`match state`
			`{`
			`FunctionExtractionState::Outside =>`
			`{`
			`if let Token::Keyword(name, _, _) = token`
			`{`
			`if name == &String::from("function")`
			`{`
			`state = FunctionExtractionState::Ins;`
			`}`
			`}`
			`}`
			`FunctionExtractionState::Ins =>`
			`{`
			`match token`
			`{`
			`Token::Keyword(name, line, col) =>`
			`{`
			`match name.as_str()`
			`{`
			`"int" => ins.push(Datatype::Int),`
			`"str" => ins.push(Datatype::String),`
			`"ptr" => ins.push(Datatype::Pointer),`
			`"any" => ins.push(Datatype::Any),`
			`"=>" => state = FunctionExtractionState::Outs,`
			`_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col)`
			`}`
			`},`
			`Token::StringLit(_, line, col) \| Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),`
			`}`
			`}`
			`FunctionExtractionState::Outs =>`
			`{`
			`match token`
			`{`
			`Token::Keyword(name, _, _) =>`
			`{`
			`match name.as_str()`
			`{`
			`"int" => outs.push(Datatype::Int),`
			`"str" => outs.push(Datatype::String),`
			`"ptr" => outs.push(Datatype::Pointer),`
			`"any" => outs.push(Datatype::Any),`
			`_ =>`
			`{`
			`if let Token::Keyword(name, _, _) = token`
			`{`
			`if functions.iter().any(\|x\| &x.name == name)`
			`{`
			`panic!("A function with name {} already exists", name);`
			`}`
			`function_name = name.clone();`
			`}`
			`else`
			`{`
			`panic!("Expected a function name") // TODO: Add location`
			`}`
			`state =FunctionExtractionState::OpenCurly;`
			`}`
			`}`
			`},`
			`Token::StringLit(_, line, col) \| Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),`
			`}`
			`}`
			`FunctionExtractionState::OpenCurly =>`
			`{`
			`if let Token::Keyword(name, line, col) = token`
			`{`
			`if name == "{"`
			`{`
			`state = FunctionExtractionState::Body`
			`}`
			`else`
			`{`
			`panic!("Expected '{{' to open the function's body at {}:{}", line, col)`
			`}`
			`}`
			`else`
			`{`
			`panic!("Expected '{{' to open the function's body") // TODO: Add location`
			`}`
			`}`
			`FunctionExtractionState::Body =>`
			`{`
			`if let Token::Keyword(name, _, _) = token`
			`{`
			`if name == "}"`
			`{`
			`state = FunctionExtractionState::Outside;`
			`functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});`
			`function_name.clear();`
			`ins.clear();`
			`outs.clear();`
			`content.clear();`
			`indices_to_remove.push(i);`
			`continue;`
			`}`
			`}`
			`content.push(token.clone());`
			`}`
			`}`

			`if state != FunctionExtractionState::Outside`
			`{`
			`indices_to_remove.push(i);`
			`}`
			`}`
			`indices_to_remove.reverse();`
			`for i in indices_to_remove`
			`{`
			`tokens.remove(i);`
			`}`
			`return functions;`
			`}`

			`#[derive(Debug, PartialEq)]`
			`enum FunctionExtractionState`
			`{`
			`Outside,`
			`Ins,`
			`Outs,`
			`OpenCurly,`
			`Body,`
			`}`

			`fn usage()`
			`{`
			`println!("Usage: kurz -c path/to/file");`
			`exit(0);`
			`}`

Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`fn tokenize(text: &str) -> Vec<Token>`
			`{`
			`let mut tokens: Vec<Token> = Vec::new();`
			`let mut line = 1;`
			`let mut col = 1;`
			`let mut state = TokenizerState::Whitespace;`
			`let mut word = String::new();`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`let mut iter = text.chars().peekable();`
			`while let Some(ch) = iter.next()`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`{`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`if ch == '/' && iter.peek() == Some(&'/')`
			`{`
			`state = TokenizerState::Comment;`
			`}`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`match state`
			`{`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`TokenizerState::Comment =>`
			`{`
			`if ch == '\n'`
			`{`
			`state = TokenizerState::Whitespace;`
			`}`
			`}`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`TokenizerState::Whitespace =>`
			`{`
			`// If ch is whitespace, do nothing`
			`if !ch.is_whitespace()`
			`{`
			`match ch`
			`{`
			`'"' =>`
			`{`
			`state = TokenizerState::Quote;`
			`}`
			`_ =>`
			`{`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`state = TokenizerState::Keyword;`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`word.push(ch);`
			`}`
			`}`
			`}`
			`}`
			`TokenizerState::Quote =>`
			`{`
			`if ch == '"'`
			`{`
			`state = TokenizerState::Whitespace;`
			`tokens.push(Token::StringLit(word.clone(), line, col));`
			`word.clear();`
			`}`
			`else`
			`{`
			`word.push(ch);`
			`}`
			`}`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`TokenizerState::Keyword =>`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`{`
			`if ch.is_whitespace()`
			`{`
			`state = TokenizerState::Whitespace;`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`if let Ok(number) = word.parse::<i64>()`
			`{`
			`tokens.push(Token::IntLit(number, line, col));`
			`}`
			`else`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`{`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`tokens.push(Token::Keyword(word.clone(), line, col));`
			`}`
			`word.clear();`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`}`
			`else`
			`{`
			`match ch`
			`{`
			`'"' => panic!("Having '\"' in the middle of a word is not allowed"),`
			`_ =>`
			`{`
			`word.push(ch);`
			`}`
			`}`
			`}`
			`}`
			`}`
			`col += 1;`
			`if ch == '\n'`
			`{`
			`col = 1;`
			`line += 1;`
			`}`
			`}`
			`match state`
			`{`
			`TokenizerState::Quote =>`
			`{`
			`panic!("Encountered EOF before closing string");`
			`}`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`TokenizerState::Whitespace \| TokenizerState::Comment => {},`
			`TokenizerState::Keyword =>`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`{`
Implement function extraction and their removal from the token stream 2022-12-05 00:38:20 +01:00			`tokens.push(Token::Keyword(word.clone(), line, col));`
Initial commit, barebones tokenizer working 2022-11-29 02:04:01 +01:00			`}`
			`}`
			`tokens`
			`}`