luaaaaah/src/tokenizer.zig

1247 lines
47 KiB
Zig

const types = @import("types.zig");
const std = @import("std");
const CodeRegion = @import("types.zig").CodeRegion;
const CodeLocation = @import("types.zig").CodeLocation;
pub const TokenType = enum
{
Name,
And, Break, Do, Else, Elseif, End,
False, For, Function, Goto, If, In,
Local, Nil, Not, Or, Repeat, Return,
Then, True, Until, While,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash,
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
Numeral,
StringLiteral,
};
const TokenData = union(enum)
{
string: []u8,
numeral: types.Numeral,
none,
};
pub const Token = struct
{
tokenType: TokenType,
tokenData: TokenData,
region: CodeRegion,
};
const TokenizerState = enum
{
Start,
Quote, SingleQuote, Name, Number, Zero,
A, B, D, E, F, G, I, L, N, O, R, T, U, W,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed,
Colon, Semicolon, Comma, Dot,
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
BigCommentLongBracketStart, SmallComment,
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
BigComment, BigCommentLongBracketEnd,
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
Elseif, Functi, Repeat, Return,
Functio,
Function,
};
fn tokenizeUpdateIndexAndState(lastIndex: *?usize, index: ?usize, state: *TokenizerState, newState: TokenizerState, region: *CodeRegion) void
{
lastIndex.* = index;
state.* = newState;
if(index == null)
{
region.*.start = null;
region.*.length = 0;
}
else
{
if(region.*.start == null)
{
// TODO: There is no line/col info here and plumbing it to here would be pain.
region.*.start = CodeLocation { .col = 0, .line = 0 };
}
region.*.length += 1;
}
}
fn tokenizeTerminalBase(lastIndex: *?usize, index: ?usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState, region: *CodeRegion) void
{
tokenizeUpdateIndexAndState(lastIndex, index, state, newState, region);
tokenType.* = newTokenType;
}
fn tokenizeTerminalStr(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8, region: *CodeRegion) !void
{
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState, region);
try tokenStr.append(ch);
}
fn tokenizeTerminalIntNum(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: TokenType, newState: TokenizerState, tokenNumeral: *?types.Numeral, ch: u8, region: *CodeRegion) !void
{
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState, region);
if(!std.ascii.isDigit(ch))
{
return error.NoDigit;
}
const digitValue = @as(i64, ch - '0');
if(tokenNumeral.* == null)
{
tokenNumeral.* = types.Numeral { .Integer = digitValue };
}
else
{
switch(tokenNumeral.*.?)
{
.Integer => |*n| n.* = n.* * 10 + digitValue,
.Float => return error.ExpectedIntGotFloat
}
}
}
fn tokenizeTerminalNoToken(lastIndex: *?usize, index: usize, state: *TokenizerState, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8, region: *CodeRegion) !void
{
tokenizeUpdateIndexAndState(lastIndex, index, state, newState, region);
try tokenStr.*.append(ch);
}
fn tokenizeBacktrack(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, allocator: std.mem.Allocator, region: *CodeRegion) !void
{
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, tokenType.*.?, allocator, region);
}
fn tokenizeBacktrackCustomToken(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, newTokenType: TokenType, allocator: std.mem.Allocator, region: *CodeRegion) !void
{
if(lastIndex.* == null or tokenType.* == null)
{
return error.LexError;
}
if(newTokenType == TokenType.StringLiteral or newTokenType == TokenType.Name)
{
const content = try allocator.alloc(u8, tokenStr.*.items.len);
@memcpy(content, tokenStr.*.items);
try tokens.append(Token { .tokenType = newTokenType, .tokenData = TokenData { .string = content }, .region = region.* });
}
else
{
try tokens.append(Token { .tokenType = newTokenType, .region = region.*, .tokenData = if(tokenType.*.? == TokenType.Numeral) TokenData { .numeral = tokenNumeral.*.? }
else TokenData.none
});
}
tokenNumeral.* = null;
index.* = lastIndex.*.?;
tokenStr.*.clearAndFree();
// region is reset in tokenizeTerminalBase since null is passed as index
tokenizeTerminalBase(lastIndex, null, tokenType, state, null, TokenizerState.Start, region);
}
fn tokenizeAlphanumericNonstart(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, ch: u8, newTokenType: TokenType, allocator: std.mem.Allocator, region: *CodeRegion) !void
{
if(std.ascii.isAlphanumeric(ch) or ch == '_')
{
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch, region);
}
else
{
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, newTokenType, allocator, region);
}
}
fn tokenizeChar(state: *TokenizerState, ch: u8, lastIndex: *?usize, index: *usize, tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, tokens: *std.ArrayList(Token), longBracketLevel: *u32, region: *CodeRegion, allocator: std.mem.Allocator) !void
{
switch(state.*)
{
TokenizerState.Start =>
{
switch(ch)
{
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Minus, TokenizerState.Minus, region),
',' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Comma, TokenizerState.Comma, region),
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Equals, TokenizerState.Equals, region),
'(' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundOpen, TokenizerState.RoundOpen, region),
')' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundClosed, TokenizerState.RoundClosed, region),
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Dot, TokenizerState.Dot, region),
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Colon, TokenizerState.Colon, region),
'{' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyOpen, TokenizerState.CurlyOpen, region),
'}' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyClosed, TokenizerState.CurlyClosed, region),
'[' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareOpen, TokenizerState.SquareOpen, region),
']' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareClosed, TokenizerState.SquareClosed, region),
'+' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Plus, TokenizerState.Plus, region),
'~' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Tilde, TokenizerState.Tilde, region),
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Gt, TokenizerState.Gt, region),
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Lt, TokenizerState.Lt, region),
'#' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Hash, TokenizerState.Hash, region),
'|' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Pipe, TokenizerState.Pipe, region),
'&' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Ampersand, TokenizerState.Ampersand, region),
'%' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Percent, TokenizerState.Percent, region),
'*' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Star, TokenizerState.Star, region),
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Slash, TokenizerState.Slash, region),
';' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Semicolon, TokenizerState.Semicolon, region),
'^' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Caret, TokenizerState.Caret, region),
'a' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.A, tokenStr, ch, region),
'b' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.B, tokenStr, ch, region),
'd' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.D, tokenStr, ch, region),
'e' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.E, tokenStr, ch, region),
'f' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.F, tokenStr, ch, region),
'i' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.I, tokenStr, ch, region),
'g' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.G, tokenStr, ch, region),
'l' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.L, tokenStr, ch, region),
'n' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.N, tokenStr, ch, region),
'o' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.O, tokenStr, ch, region),
'r' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.R, tokenStr, ch, region),
't' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.T, tokenStr, ch, region),
'u' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.U, tokenStr, ch, region),
'w' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.W, tokenStr, ch, region),
'0' => try tokenizeTerminalIntNum(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Zero, tokenNumeral, ch, region),
'"' =>
{
tokenType.* = null;
state.* = TokenizerState.Quote;
},
'\'' =>
{
tokenType.* = null;
state.* = TokenizerState.SingleQuote;
},
else =>
{
if(std.ascii.isWhitespace(ch))
{
}
else if(std.ascii.isAlphabetic(ch) or ch == '_')
{
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch, region);
}
else if(std.ascii.isDigit(ch))
{
try tokenizeTerminalIntNum(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Number, tokenNumeral, ch, region);
}
else
{
std.debug.print("{}: {c}\n", .{state.*, ch});
return error.NotImplemented;
}
}
}
},
TokenizerState.Quote =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.QuoteBackslash,
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
else => try tokenStr.*.append(ch),
}
},
TokenizerState.QuoteBackslash =>
{
switch(ch)
{
'a' =>
{
try tokenStr.append('\u{0007}');
state.* = TokenizerState.Quote;
},
'b' =>
{
try tokenStr.append('\u{0008}');
state.* = TokenizerState.Quote;
},
't' =>
{
try tokenStr.append('\t');
state.* = TokenizerState.Quote;
},
'n' | '\n' =>
{
try tokenStr.append('\n');
state.* = TokenizerState.Quote;
},
'v' =>
{
try tokenStr.append('\u{000b}');
state.* = TokenizerState.Quote;
},
'f' =>
{
try tokenStr.append('\u{000c}');
state.* = TokenizerState.Quote;
},
'r' =>
{
try tokenStr.append('\r');
state.* = TokenizerState.Quote;
},
'\\' =>
{
try tokenStr.append('\\');
state.* = TokenizerState.Quote;
},
'"' =>
{
try tokenStr.append('\"');
state.* = TokenizerState.Quote;
},
'\'' =>
{
try tokenStr.append('\'');
state.* = TokenizerState.Quote;
},
'z' =>
{
state.* = TokenizerState.QuoteBackslashZ;
},
else => return error.UnknownEscapeSequence,
}
},
TokenizerState.QuoteBackslashZ =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.QuoteBackslash,
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
else =>
{
if(!std.ascii.isWhitespace(ch))
{
try tokenStr.append(ch);
state.* = TokenizerState.Quote;
}
else
{
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
}
}
}
},
TokenizerState.SingleQuote =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
else => try tokenStr.append(ch),
}
},
TokenizerState.SingleQuoteBackslash =>
{
switch(ch)
{
'a' =>
{
try tokenStr.append('\u{0007}');
state.* = TokenizerState.SingleQuote;
},
'b' =>
{
try tokenStr.append('\u{0008}');
state.* = TokenizerState.SingleQuote;
},
't' =>
{
try tokenStr.append('\t');
state.* = TokenizerState.SingleQuote;
},
'n' | '\n' =>
{
try tokenStr.append('\n');
state.* = TokenizerState.SingleQuote;
},
'v' =>
{
try tokenStr.append('\u{000b}');
state.* = TokenizerState.SingleQuote;
},
'f' =>
{
try tokenStr.append('\u{000c}');
state.* = TokenizerState.SingleQuote;
},
'r' =>
{
try tokenStr.append('\r');
state.* = TokenizerState.SingleQuote;
},
'\\' =>
{
try tokenStr.append('\\');
state.* = TokenizerState.SingleQuote;
},
'"' =>
{
try tokenStr.append('\"');
state.* = TokenizerState.SingleQuote;
},
'\'' =>
{
try tokenStr.append('\'');
state.* = TokenizerState.SingleQuote;
},
'z' =>
{
state.* = TokenizerState.SingleQuoteBackslashZ;
},
else => return error.UnknownEscapeSequence,
}
},
TokenizerState.SingleQuoteBackslashZ =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
else =>
{
if(!std.ascii.isWhitespace(ch))
{
try tokenStr.append(ch);
state.* = TokenizerState.SingleQuote;
}
else
{
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
}
}
}
},
TokenizerState.String => try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, TokenType.StringLiteral, allocator, region),
TokenizerState.Name => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
TokenizerState.Zero =>
{
switch(ch)
{
'x' =>
{
try tokenStr.*.append(ch);
tokenType.* = null;
state.* = TokenizerState.HexNumberX;
},
'.' => return error.NotImplemented,
else =>
{
if(std.ascii.isDigit(ch))
{
const digitValue = @as(i64, ch - '0');
lastIndex.* = index.*;
if(tokenNumeral.* == null)
{
tokenNumeral.* = types.Numeral { .Integer = digitValue };
tokenType.* = TokenType.Numeral;
}
else
{
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
}
}
}
},
TokenizerState.HexNumberX =>
{
if(std.ascii.isHex(ch))
{
lastIndex.* = index.*;
tokenType.* = TokenType.Numeral;
if(std.ascii.isDigit(ch))
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
}
else
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
}
},
TokenizerState.HexNumber =>
{
switch(ch)
{
'p' =>
{
try tokenStr.*.append(ch);
tokenType.* = null;
state.* = TokenizerState.HexExpNumber;
},
else =>
{
if(std.ascii.isHex(ch))
{
lastIndex.* = index.*;
tokenType.* = TokenType.Numeral;
if(std.ascii.isDigit(ch))
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
}
else
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
}
}
}
},
TokenizerState.Number =>
{
switch(ch)
{
'e' =>
{
try tokenStr.*.append(ch);
tokenType.* = null;
state.* = TokenizerState.ExpNumber;
},
'.' => return error.NotImplemented,
else =>
{
if(std.ascii.isDigit(ch))
{
const digitValue = @as(i64, ch - '0');
lastIndex.* = index.*;
if(tokenNumeral.* == null)
{
tokenNumeral.* = types.Numeral { .Integer = digitValue };
tokenType.* = TokenType.Numeral;
}
else
{
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
}
}
}
},
TokenizerState.Comma, TokenizerState.RoundOpen, TokenizerState.RoundClosed,
TokenizerState.CurlyOpen, TokenizerState.CurlyClosed, TokenizerState.Plus,
TokenizerState.TildeEquals, TokenizerState.EqualsEquals, TokenizerState.Hash,
TokenizerState.GtEquals, TokenizerState.LtEquals, TokenizerState.SquareOpen,
TokenizerState.SquareClosed, TokenizerState.Pipe, TokenizerState.Ampersand,
TokenizerState.Percent, TokenizerState.Star, TokenizerState.Semicolon,
TokenizerState.Caret, TokenizerState.DotDotDot, TokenizerState.GtGt,
TokenizerState.LtLt, TokenizerState.SlashSlash => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
TokenizerState.Tilde =>
{
switch(ch)
{
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.TildeEquals, TokenizerState.TildeEquals, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.Gt =>
{
switch (ch)
{
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtGt, TokenizerState.GtGt, region),
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtEquals, TokenizerState.GtEquals, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.Lt =>
{
switch(ch)
{
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtLt, TokenizerState.LtLt, region),
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtEquals, TokenizerState.LtEquals, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.Slash =>
{
switch(ch)
{
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SlashSlash, TokenizerState.SlashSlash, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.Dot =>
{
switch(ch)
{
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDot, TokenizerState.DotDot, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.DotDot =>
{
switch(ch)
{
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDotDot, TokenizerState.DotDotDot, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.Colon =>
{
switch(ch)
{
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.ColonColon, TokenizerState.ColonColon, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.Equals =>
{
switch(ch)
{
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.EqualsEquals, TokenizerState.EqualsEquals, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.Minus =>
{
switch(ch)
{
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, null, TokenizerState.SmallCommentStart, region),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
}
},
TokenizerState.SmallCommentStart =>
{
switch(ch)
{
'[' =>
{
tokenType.* = null;
state.* = TokenizerState.BigCommentLongBracketStart;
},
'\n' =>
{
state.* = TokenizerState.Start;
lastIndex.* = null;
},
else =>
{
state.* = TokenizerState.SmallComment;
},
}
},
TokenizerState.SmallComment =>
{
switch(ch)
{
'\n' =>
{
state.* = TokenizerState.Start;
lastIndex.* = null;
},
else => { }
}
},
TokenizerState.BigCommentLongBracketStart =>
{
switch(ch)
{
'=' =>
{
longBracketLevel.* += 1;
},
'[' =>
{
state.* = TokenizerState.BigComment;
},
else => return error.LongBracketMalformedStartBigComment,
}
},
TokenizerState.BigComment =>
{
switch(ch)
{
']' =>
{
state.* = TokenizerState.BigCommentLongBracketEnd;
},
else => { },
}
},
TokenizerState.BigCommentLongBracketEnd =>
{
switch(ch)
{
'=' =>
{
if(longBracketLevel.* == 0)
{
return error.LongBracketLevelTooBigEndBigComment;
}
longBracketLevel.* -= 1;
},
']' =>
{
if(longBracketLevel.* != 0)
{
return error.LongBracketLevelTooSmallEndBigComment;
}
state.* = TokenizerState.Start;
},
else => return error.LongBracketMalformedSmallEndBigComment,
}
},
TokenizerState.A =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.An, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.An =>
{
switch(ch)
{
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.And, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.And => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.And, allocator, region),
TokenizerState.W =>
{
switch(ch)
{
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Wh, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Wh =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whi, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Whi =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whil, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Whil =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.While, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.While => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.While, allocator, region),
TokenizerState.B =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Br, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Br =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Bre, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Bre =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Brea, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Brea =>
{
switch(ch)
{
'k' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Break, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Break => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Break, allocator, region),
TokenizerState.G =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Go, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Go =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Got, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Got =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Goto, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Goto => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Goto, allocator, region),
TokenizerState.R =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Re, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Re =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ret, tokenStr, ch, region),
'p' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Rep, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Ret =>
{
switch(ch)
{
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retu, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Retu =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retur, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Retur =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Return, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Return => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Return, allocator, region),
TokenizerState.Rep =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repe, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Repe =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repea, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Repea =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repeat, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Repeat => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Repeat, allocator, region),
TokenizerState.N =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ni, tokenStr, ch, region),
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.No, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.No =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Not, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Not => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Not, allocator, region),
TokenizerState.Ni =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Nil, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Nil => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Nil, allocator, region),
TokenizerState.T =>
{
switch(ch)
{
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Th, tokenStr, ch, region),
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tr, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Th =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.The, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.The =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Then, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Then => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Then, allocator, region),
TokenizerState.Tr =>
{
switch(ch)
{
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tru, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Tru =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.True, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.True => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.True, allocator, region),
TokenizerState.E =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.El, tokenStr, ch, region),
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.En, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.En =>
{
switch(ch)
{
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.End, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.End => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.End, allocator, region),
TokenizerState.El =>
{
switch(ch)
{
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Els, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Els =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Else, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Else =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elsei, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Else, allocator, region),
}
},
TokenizerState.Elsei =>
{
switch(ch)
{
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elseif, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Elseif => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Elseif, allocator, region),
TokenizerState.O =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Or, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Or => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Or, allocator, region),
TokenizerState.D =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Do, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Do => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Do, allocator, region),
TokenizerState.I =>
{
switch(ch)
{
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.If, tokenStr, ch, region),
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.In, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.In => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.In, allocator, region),
TokenizerState.If => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.If, allocator, region),
TokenizerState.F =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fa, tokenStr, ch, region),
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fo, tokenStr, ch, region),
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fu, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Fu =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fun, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Fun =>
{
switch(ch)
{
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Func, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Func =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Funct, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Funct =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functi, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Functi =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functio, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Functio =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Function, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Function => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Function, allocator, region),
TokenizerState.Fa =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fal, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Fal =>
{
switch(ch)
{
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fals, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Fals =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.False, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.False => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.False, allocator, region),
TokenizerState.Fo =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.For, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.For => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.For, allocator, region),
TokenizerState.L =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Lo, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Lo =>
{
switch(ch)
{
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loc, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Loc =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loca, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Loca =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Local, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Local => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Local, allocator, region),
TokenizerState.U =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Un, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Un =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unt, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Unt =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unti, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Unti =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Until, tokenStr, ch, region),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
}
},
TokenizerState.Until => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Until, allocator, region),
else =>
{
std.debug.print("{}\n", . {state.*});
return error.NotImplemented;
}
}
}
pub fn tokenize(fileContent: []u8, allocator: std.mem.Allocator) ![]Token
{
var tokens = std.ArrayList(Token).init(allocator);
var state: TokenizerState = TokenizerState.Start;
var lastIndex: ?usize = null;
var index: usize = 0;
var tokenType: ?TokenType = null;
var tokenStr = std.ArrayList(u8).init(allocator);
defer tokenStr.deinit();
var tokenNumeral: ?types.Numeral = null;
var longBracketLevel: u32 = 0;
var region = CodeRegion { .start = null, .length = 0 };
while(index < fileContent.len)
{
const ch = fileContent[index];
try tokenizeChar(&state, ch, &lastIndex, &index, &tokenType, &tokenStr, &tokenNumeral, &tokens, &longBracketLevel, &region, allocator);
if(region.start != null and region.start.?.col == 0 and region.start.?.line == 0)
{
region.start = calculatePoint(fileContent, index);
}
index += 1;
}
if(longBracketLevel != 0)
{
return error.UnbalancedLongBracketLevel;
}
try tokenizeChar(&state, '\n', &lastIndex, &index, &tokenType, &tokenStr, &tokenNumeral, &tokens, &longBracketLevel, &region, allocator);
if(region.start != null and region.start.?.col == 0 and region.start.?.line == 0)
{
region.start = calculatePoint(fileContent, index);
}
return tokens.toOwnedSlice();
}
fn calculatePoint(fileContent: []u8, index: usize) CodeLocation
{
var ret = CodeLocation { .col = 1, .line = 1 };
for(0..index) |i|
{
ret.col += 1;
if(fileContent[i] == '\n')
{
ret.line += 1;
ret.col = 1;
}
}
return ret;
}