luaaaaah/Tokenizer.cs

2843 lines
58 KiB
C#

using System;
using System.Collections.Generic;
using System.Text;
namespace luaaaaah;
class Tokenizer
{
private readonly List<Token> tokens = [];
private State state = State.Start;
int? lastIndex;
int index;
int openingLongBracketLevel;
int closingLongBracketLevel;
Token? currentToken;
CodeLocation currentLocation = new(line: 0, col: 0);
long escapeSequenceNumber;
public Token[] Tokenize(string content)
{
if(content.StartsWith('#'))
{
content = content[content.IndexOf('\n')..];
}
while(index < content.Length)
{
TokenizeChar(content[index]);
if(content[index] == '\n')
{
currentLocation.line += 1;
currentLocation.col = 0;
}
else
{
currentLocation.col += 1;
}
index += 1;
}
TokenizeChar('\n');
return [.. tokens];
}
private void AppendDataChar(char ch)
{
if((Token.StringData?)currentToken!.data == null)
{
currentToken!.data = new Token.StringData($"{ch}");
}
else
{
((Token.StringData?)currentToken!.data!).data += ch;
}
currentToken.region.end = new(currentLocation);
}
private void AppendDataInt(char ch)
{
if((Token.NumeralData?)currentToken!.data == null)
{
currentToken!.data = new Token.NumeralData(new INumeral.Integer(ch - '0'));
}
else
{
((INumeral.Integer)((Token.NumeralData?)currentToken!.data!).numeral).value *= 10;
((INumeral.Integer)((Token.NumeralData?)currentToken!.data!).numeral).value += ch - '0';
}
currentToken.region.end = new(currentLocation);
}
private void AppendDataIntHex(char ch)
{
int v = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
if((Token.NumeralData?)currentToken!.data == null)
{
currentToken!.data = new Token.NumeralData(new INumeral.Integer(v));
}
else
{
((INumeral.Integer)((Token.NumeralData?)currentToken!.data!).numeral).value *= 16;
((INumeral.Integer)((Token.NumeralData?)currentToken!.data!).numeral).value += v;
}
currentToken.region.end = new(currentLocation);
}
private void TokenizeTerminal(State newState, TokenType type)
{
lastIndex = index;
state = newState;
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: type);
}
private void TokenizeTerminalName(State newState, char ch)
{
lastIndex = index;
state = newState;
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.Name, data: new Token.StringData($"{ch}"));
}
private void Backtrack(TokenType newType)
{
if(currentToken == null || currentToken.type == null)
{
throw new Exception($"Lexer error at {currentLocation}");
}
currentToken.type = newType;
currentToken.data = null;
currentLocation = new(currentToken.region.end);
tokens.Add(currentToken);
currentToken = null;
index = lastIndex!.Value;
lastIndex = null;
state = State.Start;
}
private void BacktrackNoClear(TokenType newType)
{
if(currentToken == null || currentToken.type == null)
{
throw new Exception($"Lexer error at {currentLocation}");
}
currentToken.type = newType;
currentLocation = new(currentToken.region.end);
tokens.Add(currentToken);
currentToken = null;
index = lastIndex!.Value;
lastIndex = null;
state = State.Start;
}
private void BacktrackNoTypeChange()
{
if(currentToken == null || currentToken.type == null)
{
throw new Exception($"Lexer error at {currentLocation}");
}
currentLocation = new(currentToken.region.end);
tokens.Add(currentToken);
currentToken = null;
index = lastIndex!.Value;
lastIndex = null;
state = State.Start;
}
private void TokenizeChar(char ch)
{
switch(state)
{
case State.Start:
{
switch(ch)
{
case '-':
TokenizeTerminal(State.Minus, TokenType.Minus);
break;
case ',':
TokenizeTerminal(State.Comma, TokenType.Comma);
break;
case '=':
TokenizeTerminal(State.Equals, TokenType.Equals);
break;
case '(':
TokenizeTerminal(State.RoundOpen, TokenType.RoundOpen);
break;
case ')':
TokenizeTerminal(State.RoundClosed, TokenType.RoundClosed);
break;
case '.':
TokenizeTerminal(State.Dot, TokenType.Dot);
break;
case ':':
TokenizeTerminal(State.Colon, TokenType.Colon);
break;
case '{':
TokenizeTerminal(State.CurlyOpen, TokenType.CurlyOpen);
break;
case '}':
TokenizeTerminal(State.CurlyClosed, TokenType.CurlyClosed);
break;
case '[':
TokenizeTerminal(State.SquareOpen, TokenType.SquareOpen);
break;
case ']':
TokenizeTerminal(State.SquareClosed, TokenType.SquareClosed);
break;
case '+':
TokenizeTerminal(State.Plus, TokenType.Plus);
break;
case '~':
TokenizeTerminal(State.Tilde, TokenType.Tilde);
break;
case '>':
TokenizeTerminal(State.Gt, TokenType.Gt);
break;
case '<':
TokenizeTerminal(State.Lt, TokenType.Lt);
break;
case '#':
TokenizeTerminal(State.Hash, TokenType.Hash);
break;
case '|':
TokenizeTerminal(State.Pipe, TokenType.Pipe);
break;
case '&':
TokenizeTerminal(State.Ampersand, TokenType.Ampersand);
break;
case '%':
TokenizeTerminal(State.Percent, TokenType.Percent);
break;
case '*':
TokenizeTerminal(State.Star, TokenType.Star);
break;
case '/':
TokenizeTerminal(State.Slash, TokenType.Slash);
break;
case ';':
TokenizeTerminal(State.Semicolon, TokenType.Semicolon);
break;
case '^':
TokenizeTerminal(State.Caret, TokenType.Caret);
break;
case 'a':
TokenizeTerminalName(State.A, ch);
break;
case 'b':
TokenizeTerminalName(State.B, ch);
break;
case 'd':
TokenizeTerminalName(State.D, ch);
break;
case 'e':
TokenizeTerminalName(State.E, ch);
break;
case 'f':
TokenizeTerminalName(State.F, ch);
break;
case 'i':
TokenizeTerminalName(State.I, ch);
break;
case 'g':
TokenizeTerminalName(State.G, ch);
break;
case 'l':
TokenizeTerminalName(State.L, ch);
break;
case 'n':
TokenizeTerminalName(State.N, ch);
break;
case 'o':
TokenizeTerminalName(State.O, ch);
break;
case 'r':
TokenizeTerminalName(State.R, ch);
break;
case 't':
TokenizeTerminalName(State.T, ch);
break;
case 'u':
TokenizeTerminalName(State.U, ch);
break;
case 'w':
TokenizeTerminalName(State.W, ch);
break;
case '0':
{
lastIndex = index;
state = State.Zero;
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.Numeral, data: new Token.NumeralData(new INumeral.Integer(0)));
} /* tokenizeTerminalIntNum(TokenType.Numeral, TokenizerState.Zero, tokenNumeral, ch); */
break;
case '"':
{
state = State.Quote;
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.StringLiteral);
}
break;
case '\'':
{
state = State.SingleQuote;
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.StringLiteral);
}
break;
default:
{
if(char.IsWhiteSpace(ch)) { }
else if(char.IsAsciiLetter(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.Name, data: new Token.StringData($"{ch}"));
}
else if(char.IsDigit(ch))
{
lastIndex = index;
state = State.Integer;
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.Numeral, data: new Token.NumeralData(new INumeral.Integer(ch - '0')));
}
else
{
throw new NotImplementedException($"{ch} at {currentLocation}");
}
}
break;
}
}
break;
case State.Quote:
{
if(ch == '\\')
{
state = State.QuoteBackslash;
}
else if(ch == '"')
{
lastIndex = index;
state = State.String;
if(currentToken == null || currentToken.type == null)
{
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.StringLiteral);
}
else
{
currentToken.type = TokenType.StringLiteral;
currentToken.region.end = new(currentLocation);
currentToken.data ??= new Token.StringData("");
}
}
else
{
AppendDataChar(ch);
}
}
break;
case State.QuoteBackslash:
{
switch(ch)
{
case 'a':
{
AppendDataChar('\u0007');
state = State.Quote;
}
break;
case 'b':
{
AppendDataChar('\u0008');
state = State.Quote;
}
break;
case 't':
{
AppendDataChar('\t');
state = State.Quote;
}
break;
case 'n':
case '\n':
{
AppendDataChar('\n');
state = State.Quote;
}
break;
case 'v':
{
AppendDataChar('\u000b');
state = State.Quote;
}
break;
case 'f':
{
AppendDataChar('\u000c');
state = State.Quote;
}
break;
case 'r':
{
AppendDataChar('\r');
state = State.Quote;
}
break;
case '\\':
{
AppendDataChar('\\');
state = State.Quote;
}
break;
case '"':
{
AppendDataChar('"');
state = State.Quote;
}
break;
case '\'':
{
AppendDataChar('\'');
state = State.Quote;
}
break;
case 'z':
{
state = State.QuoteBackslashZ;
}
break;
case 'x':
{
state = State.QuoteBackslashX;
throw new NotImplementedException($"\\x escape sequences are broken right now");
}
case 'u':
{
state = State.QuoteBackslashU;
throw new NotImplementedException($"\\u escape sequences are broken right now");
}
default: throw new Exception($"Unknown escape sequence: \\{ch} at {currentLocation}");
}
}
break;
case State.QuoteBackslashU:
{
if(ch == '{')
{
state = State.QuoteBackslashUBracket;
}
else
{
throw new Exception($"Expected `{{` to continue \\u escape sequence at {currentLocation}, got {ch}");
}
}
break;
case State.QuoteBackslashUBracket:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.QuoteBackslashUBracketHex;
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
}
else
{
throw new Exception($"Expected hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
}
}
break;
case State.QuoteBackslashUBracketHex:
{
if(char.IsAsciiHexDigit(ch))
{
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
if(escapeSequenceNumber > uint.MaxValue)
{
throw new Exception($"{currentLocation}: \\u escape sequence has a value > 2^31 which is not permitted");
}
}
else if(ch == '}')
{
state = State.Quote;
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
char[] chars = Encoding.UTF8.GetChars(BitConverter.GetBytes((uint)escapeSequenceNumber));
for(int i = 0; i < chars.Length; i++)
{
AppendDataChar(chars[i]);
}
escapeSequenceNumber = 0;
}
else
{
throw new Exception($"Expected second hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
}
}
break;
case State.QuoteBackslashZ:
{
if(ch == '\\')
{
state = State.QuoteBackslash;
}
else if(ch == '"')
{
lastIndex = index;
state = State.String;
if(currentToken == null || currentToken.type == null)
{
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.StringLiteral);
}
else
{
currentToken.type = TokenType.StringLiteral;
currentToken.region.end = new(currentLocation);
currentToken.data = new Token.StringData("");
}
}
else if(!char.IsWhiteSpace(ch))
{
AppendDataChar(ch);
state = State.Quote;
}
else
{
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
}
}
break;
case State.SingleQuote:
{
if(ch == '\\')
{
state = State.SingleQuoteBackslash;
}
else if(ch == '\'')
{
lastIndex = index;
state = State.String;
if(currentToken == null || currentToken.type == null)
{
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.StringLiteral);
}
else
{
currentToken.type = TokenType.StringLiteral;
currentToken.region.end = new(currentLocation);
currentToken.data ??= new Token.StringData("");
}
}
else
{
AppendDataChar(ch);
}
}
break;
case State.SingleQuoteBackslash:
{
switch(ch)
{
case 'a':
{
AppendDataChar('\u0007');
state = State.SingleQuote;
}
break;
case 'b':
{
AppendDataChar('\u0008');
state = State.SingleQuote;
}
break;
case 't':
{
AppendDataChar('\t');
state = State.SingleQuote;
}
break;
case 'n':
case '\n':
{
AppendDataChar('\n');
state = State.SingleQuote;
}
break;
case 'v':
{
AppendDataChar('\u000b');
state = State.SingleQuote;
}
break;
case 'f':
{
AppendDataChar('\u000c');
state = State.SingleQuote;
}
break;
case 'r':
{
AppendDataChar('\r');
state = State.SingleQuote;
}
break;
case '\\':
{
AppendDataChar('\\');
state = State.SingleQuote;
}
break;
case '"':
{
AppendDataChar('"');
state = State.SingleQuote;
}
break;
case '\'':
{
AppendDataChar('\'');
state = State.SingleQuote;
}
break;
case 'z':
state = State.SingleQuoteBackslashZ;
break;
case 'x':
state = State.SingleQuoteBackslashX;
break;
case 'u':
state = State.SingleQuoteBackslashU;
break;
default: throw new Exception($"Unknown escape sequence: \\{ch}");
}
}
break;
case State.SingleQuoteBackslashU:
state = ch == '{'
? State.SingleQuoteBackslashUBracket
: throw new Exception($"Expected `{{` to continue \\u escape sequence at {currentLocation}, got {ch}");
break;
case State.SingleQuoteBackslashUBracket:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.SingleQuoteBackslashUBracketHex;
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
}
else
{
throw new Exception($"Expected hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
}
}
break;
case State.SingleQuoteBackslashUBracketHex:
{
if(char.IsAsciiHexDigit(ch))
{
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
if(escapeSequenceNumber > uint.MaxValue)
{
throw new Exception($"{currentLocation}: \\u escape sequence has a value > 2^31 which is not permitted");
}
}
else if(ch == '}')
{
state = State.SingleQuote;
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
char[] chars = Encoding.UTF8.GetChars(BitConverter.GetBytes((uint)escapeSequenceNumber));
for(int i = 0; i < chars.Length; i++)
{
AppendDataChar(chars[i]);
}
escapeSequenceNumber = 0;
}
else
{
throw new Exception($"Expected second hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
}
}
break;
case State.SingleQuoteBackslashZ:
{
if(ch == '\\')
{
state = State.SingleQuoteBackslash;
}
else if(ch == '\'')
{
lastIndex = index;
state = State.String;
if(currentToken == null || currentToken.type == null)
{
currentToken = new(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.StringLiteral);
}
else
{
currentToken.type = TokenType.StringLiteral;
currentToken.region.end = new(currentLocation);
}
}
else if(!char.IsWhiteSpace(ch))
{
AppendDataChar(ch);
state = State.SingleQuote;
}
else
{
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
}
}
break;
case State.SingleQuoteBackslashX:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.SingleQuoteBackslashXHex;
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
}
else
{
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.SingleQuoteBackslashXHex:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.SingleQuote;
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
{
AppendDataChar(c);
}
escapeSequenceNumber = 0;
}
else
{
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.QuoteBackslashX:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.QuoteBackslashXHex;
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
}
else
{
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.QuoteBackslashXHex:
{
if(char.IsAsciiHexDigit(ch))
{
state = State.Quote;
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
{
AppendDataChar(c);
}
escapeSequenceNumber = 0;
}
else
{
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
}
}
break;
case State.String:
{
BacktrackNoClear(TokenType.StringLiteral);
}
break;
case State.Name:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Zero:
{
if(ch is 'x' or 'X')
{
currentToken!.type = null;
state = State.HexNumberX;
}
else if(ch == '.')
{
state = State.Float;
currentToken!.type = null;
currentToken!.data = null;
AppendDataChar('0');
AppendDataChar('.');
}
else if(char.IsAsciiDigit(ch))
{
lastIndex = index;
AppendDataInt(ch);
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Float:
{
if(char.IsAsciiDigit(ch))
{
lastIndex = index;
AppendDataChar(ch);
}
else
{
if(currentToken == null)
{
throw new Exception($"Lexer error at {currentLocation}");
}
currentLocation = new(currentToken.region.end);
currentToken.type = TokenType.Numeral;
currentToken.data = new Token.NumeralData(new INumeral.Float(float.Parse(((Token.StringData)currentToken.data!).data)));
tokens.Add(currentToken);
currentToken = null;
index = lastIndex!.Value;
lastIndex = null;
state = State.Start;
}
}
break;
case State.HexNumberX:
{
if(char.IsAsciiHexDigit(ch))
{
lastIndex = index;
currentToken!.type = TokenType.Numeral;
AppendDataIntHex(ch);
state = State.HexNumber;
}
else if(ch == '.')
{
throw new NotImplementedException($"{currentLocation}: Hex floats at are not implemented");
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.HexNumber:
{
if(ch == 'p')
{
currentToken!.type = null;
state = State.HexExpNumber;
}
else if(char.IsAsciiHexDigit(ch))
{
lastIndex = index;
currentToken!.type = TokenType.Numeral;
AppendDataIntHex(ch);
}
else if(ch == '.')
{
throw new NotImplementedException($"{currentLocation}: Hex floats at are not implemented");
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Integer:
{
if(ch == 'e')
{
currentToken!.type = null;
state = State.ExpNumber;
}
else if(ch == '.')
{
currentToken!.type = null;
currentToken.data = new Token.StringData($"{((INumeral.Integer)((Token.NumeralData)currentToken!.data!).numeral).value}.");
state = State.Float;
}
else if(char.IsAsciiDigit(ch))
{
lastIndex = index;
currentToken!.type = TokenType.Numeral;
AppendDataInt(ch);
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.SquareOpen:
{
if(ch == '[')
{
currentToken = new Token(region: new(start: new(currentLocation), end: new(currentLocation)), type: TokenType.StringLiteral);
state = State.StringWithLongBracket;
}
else if(ch == '=')
{
openingLongBracketLevel = 1;
state = State.StringStartLongBracket;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Comma:
case State.RoundOpen:
case State.RoundClosed:
case State.CurlyOpen:
case State.CurlyClosed:
case State.Plus:
case State.TildeEquals:
case State.EqualsEquals:
case State.Hash:
case State.GtEquals:
case State.LtEquals:
case State.SquareClosed:
case State.Pipe:
case State.Ampersand:
case State.Percent:
case State.Star:
case State.Semicolon:
case State.Caret:
case State.DotDotDot:
case State.GtGt:
case State.LtLt:
case State.ColonColon:
case State.SlashSlash:
{
BacktrackNoTypeChange();
}
break;
case State.Tilde:
{
if(ch == '=')
{
lastIndex = index;
state = State.TildeEquals;
currentToken!.type = TokenType.TildeEquals;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Gt:
{
if(ch == '=')
{
lastIndex = index;
state = State.GtEquals;
currentToken!.type = TokenType.GtEquals;
}
else if(ch == '>')
{
lastIndex = index;
state = State.GtGt;
currentToken!.type = TokenType.GtGt;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Lt:
{
if(ch == '=')
{
lastIndex = index;
state = State.LtEquals;
currentToken!.type = TokenType.LtEquals;
}
else if(ch == '<')
{
lastIndex = index;
state = State.LtLt;
currentToken!.type = TokenType.LtLt;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Slash:
{
if(ch == '/')
{
lastIndex = index;
state = State.SlashSlash;
currentToken!.type = TokenType.SlashSlash;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Dot:
{
if(ch == '.')
{
lastIndex = index;
state = State.DotDot;
currentToken!.type = TokenType.DotDot;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.DotDot:
{
if(ch == '.')
{
lastIndex = index;
state = State.DotDotDot;
currentToken!.type = TokenType.DotDotDot;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Colon:
{
if(ch == ':')
{
lastIndex = index;
state = State.ColonColon;
currentToken!.type = TokenType.ColonColon;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Equals:
{
if(ch == '=')
{
lastIndex = index;
state = State.EqualsEquals;
currentToken!.type = TokenType.EqualsEquals;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.Minus:
{
if(ch == '-')
{
lastIndex = index;
state = State.SmallCommentStart;
currentToken = null;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.SmallCommentStart:
{
if(ch == '[')
{
state = State.BigCommentStartLongBracket;
}
else if(ch == '\n')
{
state = State.Start;
lastIndex = null;
}
else
{
state = State.SmallComment;
}
}
break;
case State.SmallComment:
{
if(ch == '\n')
{
state = State.Start;
lastIndex = null;
}
}
break;
case State.BigCommentStartLongBracket:
{
if(ch == '=')
{
openingLongBracketLevel += 1;
}
else if(ch == '[')
{
state = State.BigComment;
}
else if(ch == '\n')
{
state = State.Start;
}
else
{
state = State.SmallComment;
}
}
break;
case State.BigComment:
{
if(ch == ']')
{
state = State.BigCommentEndLongBracket;
closingLongBracketLevel = 0;
}
}
break;
case State.BigCommentEndLongBracket:
{
if(ch == '=')
{
closingLongBracketLevel += 1;
if(openingLongBracketLevel < closingLongBracketLevel)
{
state = State.BigComment;
}
}
else if(ch == ']' && openingLongBracketLevel == closingLongBracketLevel)
{
state = State.Start;
openingLongBracketLevel = 0;
closingLongBracketLevel = 0;
}
else
{
closingLongBracketLevel = 0;
state = State.BigComment;
}
}
break;
case State.StringStartLongBracket:
{
if(ch == '=')
{
openingLongBracketLevel += 1;
}
else if(ch == '[')
{
state = State.StringWithLongBracket;
}
else
{
BacktrackNoTypeChange();
}
}
break;
case State.StringWithLongBracket:
{
if(ch == ']')
{
state = State.StringEndLongBracket;
closingLongBracketLevel = 0;
}
else
{
AppendDataChar(ch);
}
}
break;
case State.StringEndLongBracket:
{
if(ch == '=')
{
closingLongBracketLevel += 1;
if(openingLongBracketLevel < closingLongBracketLevel)
{
state = State.StringWithLongBracket;
}
AppendDataChar(ch);
}
else if(ch == ']' && openingLongBracketLevel == closingLongBracketLevel)
{
if(currentToken == null || currentToken.type == null)
{
throw new Exception($"Lexer error at {currentLocation}");
}
if((Token.StringData?)currentToken.data == null)
{
currentToken.data = new Token.StringData("");
}
currentToken.type = TokenType.StringLiteral;
((Token.StringData)currentToken.data).data = ((Token.StringData)currentToken.data).data.Remove(((Token.StringData)currentToken.data).data.Length - closingLongBracketLevel);
currentLocation = new(currentToken.region.end);
tokens.Add(currentToken);
currentToken = null;
lastIndex = null;
state = State.Start;
openingLongBracketLevel = 0;
closingLongBracketLevel = 0;
}
else
{
closingLongBracketLevel = 0;
AppendDataChar(ch);
state = State.StringWithLongBracket;
}
}
break;
case State.A:
{
if(ch == 'n')
{
lastIndex = index;
state = State.An;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.An:
{
if(ch == 'd')
{
lastIndex = index;
state = State.And;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.And:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.And);
}
}
break;
case State.W:
{
if(ch == 'h')
{
lastIndex = index;
state = State.Wh;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Wh:
{
if(ch == 'i')
{
lastIndex = index;
state = State.Whi;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Whi:
{
if(ch == 'l')
{
lastIndex = index;
state = State.Whil;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Whil:
{
if(ch == 'e')
{
lastIndex = index;
state = State.While;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.While:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.While);
}
}
break;
case State.B:
{
if(ch == 'r')
{
lastIndex = index;
state = State.Br;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Br:
{
if(ch == 'e')
{
lastIndex = index;
state = State.Bre;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Bre:
{
if(ch == 'a')
{
lastIndex = index;
state = State.Brea;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Brea:
{
if(ch == 'k')
{
lastIndex = index;
state = State.Break;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Break:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Break);
}
}
break;
case State.G:
{
if(ch == 'o')
{
lastIndex = index;
state = State.Go;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Go:
{
if(ch == 't')
{
lastIndex = index;
state = State.Got;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Got:
{
if(ch == 'o')
{
lastIndex = index;
state = State.Goto;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Goto:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Goto);
}
}
break;
case State.R:
{
if(ch == 'e')
{
lastIndex = index;
state = State.Re;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Re:
{
if(ch == 't')
{
lastIndex = index;
state = State.Ret;
AppendDataChar(ch);
}
else if(ch == 'p')
{
lastIndex = index;
state = State.Rep;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Ret:
{
if(ch == 'u')
{
lastIndex = index;
state = State.Retu;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Retu:
{
if(ch == 'r')
{
lastIndex = index;
state = State.Retur;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Retur:
{
if(ch == 'n')
{
lastIndex = index;
state = State.Return;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Return:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Return);
}
}
break;
case State.Rep:
{
if(ch == 'e')
{
lastIndex = index;
state = State.Repe;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Repe:
{
if(ch == 'a')
{
lastIndex = index;
state = State.Repea;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Repea:
{
if(ch == 't')
{
lastIndex = index;
state = State.Repeat;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Repeat:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Repeat);
}
}
break;
case State.N:
{
if(ch == 'i')
{
lastIndex = index;
state = State.Ni;
AppendDataChar(ch);
}
else if(ch == 'o')
{
lastIndex = index;
state = State.No;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Ni:
{
if(ch == 'l')
{
lastIndex = index;
state = State.Nil;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Nil:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Nil);
}
}
break;
case State.No:
{
if(ch == 't')
{
lastIndex = index;
state = State.Not;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Not:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Not);
}
}
break;
case State.T:
{
if(ch == 'h')
{
lastIndex = index;
state = State.Th;
AppendDataChar(ch);
}
else if(ch == 'r')
{
lastIndex = index;
state = State.Tr;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Th:
{
if(ch == 'e')
{
lastIndex = index;
state = State.The;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.The:
{
if(ch == 'n')
{
lastIndex = index;
state = State.Then;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Then:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Then);
}
}
break;
case State.Tr:
{
if(ch == 'u')
{
lastIndex = index;
state = State.Tru;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Tru:
{
if(ch == 'e')
{
lastIndex = index;
state = State.True;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.True:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.True);
}
}
break;
case State.E:
{
if(ch == 'l')
{
lastIndex = index;
state = State.El;
AppendDataChar(ch);
}
else if(ch == 'n')
{
lastIndex = index;
state = State.En;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.El:
{
if(ch == 's')
{
lastIndex = index;
state = State.Els;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Els:
{
if(ch == 'e')
{
lastIndex = index;
state = State.Else;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Else:
{
if(ch == 'i')
{
lastIndex = index;
state = State.Elsei;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Else);
}
}
break;
case State.Elsei:
{
if(ch == 'f')
{
lastIndex = index;
state = State.Elseif;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Elseif:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Elseif);
}
}
break;
case State.En:
{
if(ch == 'd')
{
lastIndex = index;
state = State.End;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.End:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.End);
}
}
break;
case State.O:
{
if(ch == 'r')
{
lastIndex = index;
state = State.Or;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Or:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Or);
}
}
break;
case State.D:
{
if(ch == 'o')
{
lastIndex = index;
state = State.Do;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Do:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Do);
}
}
break;
case State.I:
{
if(ch == 'f')
{
lastIndex = index;
state = State.If;
AppendDataChar(ch);
}
else if(ch == 'n')
{
lastIndex = index;
state = State.In;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.In:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.In);
}
}
break;
case State.If:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.If);
}
}
break;
case State.F:
{
if(ch == 'u')
{
lastIndex = index;
state = State.Fu;
AppendDataChar(ch);
}
else if(ch == 'a')
{
lastIndex = index;
state = State.Fa;
AppendDataChar(ch);
}
else if(ch == 'o')
{
lastIndex = index;
state = State.Fo;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Fu:
{
if(ch == 'n')
{
lastIndex = index;
state = State.Fun;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Fun:
{
if(ch == 'c')
{
lastIndex = index;
state = State.Func;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Func:
{
if(ch == 't')
{
lastIndex = index;
state = State.Funct;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Funct:
{
if(ch == 'i')
{
lastIndex = index;
state = State.Functi;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Functi:
{
if(ch == 'o')
{
lastIndex = index;
state = State.Functio;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Functio:
{
if(ch == 'n')
{
lastIndex = index;
state = State.Function;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Function:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Function);
}
}
break;
case State.Fa:
{
if(ch == 'l')
{
lastIndex = index;
state = State.Fal;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Fal:
{
if(ch == 's')
{
lastIndex = index;
state = State.Fals;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Fals:
{
if(ch == 'e')
{
lastIndex = index;
state = State.False;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.False:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.False);
}
}
break;
case State.Fo:
{
if(ch == 'r')
{
lastIndex = index;
state = State.For;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.For:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.For);
}
}
break;
case State.L:
{
if(ch == 'o')
{
lastIndex = index;
state = State.Lo;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Lo:
{
if(ch == 'c')
{
lastIndex = index;
state = State.Loc;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Loc:
{
if(ch == 'a')
{
lastIndex = index;
state = State.Loca;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Loca:
{
if(ch == 'l')
{
lastIndex = index;
state = State.Local;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Local:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Local);
}
}
break;
case State.U:
{
if(ch == 'n')
{
lastIndex = index;
state = State.Un;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Un:
{
if(ch == 't')
{
lastIndex = index;
state = State.Unt;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Unt:
{
if(ch == 'i')
{
lastIndex = index;
state = State.Unti;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Unti:
{
if(ch == 'l')
{
lastIndex = index;
state = State.Until;
AppendDataChar(ch);
}
else if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
BacktrackNoClear(TokenType.Name);
}
}
break;
case State.Until:
{
if(char.IsAsciiLetterOrDigit(ch) || ch == '_')
{
lastIndex = index;
state = State.Name;
currentToken!.type = TokenType.Name;
AppendDataChar(ch);
}
else
{
Backtrack(TokenType.Until);
}
}
break;
default:
throw new NotImplementedException(state.ToString());
}
}
private enum State
{
Start,
Quote, SingleQuote, Name, Integer, Float, Zero,
A, B, D, E, F, G, I, L, N, O, R, T, U, W,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, StringStartLongBracket, StringWithLongBracket, StringEndLongBracket,
Colon, Semicolon, Comma, Dot,
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, QuoteBackslashX, SingleQuoteBackslashX, QuoteBackslashXHex, SingleQuoteBackslashXHex,
SingleQuoteBackslashU, SingleQuoteBackslashUBracket, SingleQuoteBackslashUBracketHex,
QuoteBackslashU, QuoteBackslashUBracket, QuoteBackslashUBracketHex,
SmallComment, BigComment, BigCommentStartLongBracket, BigCommentEndLongBracket,
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
Elseif, Functi, Repeat, Return,
Functio,
Function,
}
}
internal class Token(CodeRegion region, TokenType? type = null, Token.IData? data = null)
{
public CodeRegion region = region;
public IData? data = data;
public TokenType? type = type;
public interface IData { }
public class NumeralData(INumeral numeral) : IData
{
public INumeral numeral = numeral;
public override string ToString()
{
return $"NumeralData {numeral}";
}
}
public class StringData(string data) : IData
{
public string data = data;
public override string ToString()
{
return $"StringData \"{data}\"";
}
}
}
public enum TokenType
{
Name,
And, Break, Do, Else, Elseif, End,
False, For, Function, Goto, If, In,
Local, Nil, Not, Or, Repeat, Return,
Then, True, Until, While,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash,
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
Numeral,
StringLiteral,
}