2023-09-15 11:07:50 +02:00
|
|
|
const types = @import("types.zig");
|
|
|
|
const std = @import("std");
|
2023-10-08 21:40:44 +02:00
|
|
|
const CodeRegion = @import("types.zig").CodeRegion;
|
|
|
|
const CodeLocation = @import("types.zig").CodeLocation;
|
2023-09-15 11:07:50 +02:00
|
|
|
|
2023-09-21 18:30:50 +02:00
|
|
|
pub const TokenType = enum
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
Name,
|
|
|
|
And, Break, Do, Else, Elseif, End,
|
|
|
|
False, For, Function, Goto, If, In,
|
|
|
|
Local, Nil, Not, Or, Repeat, Return,
|
|
|
|
Then, True, Until, While,
|
|
|
|
Plus, Minus, Star, Slash, Percent, Caret, Hash,
|
|
|
|
Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash,
|
|
|
|
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
|
|
|
|
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
|
|
|
|
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
|
|
|
|
Numeral,
|
|
|
|
StringLiteral,
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
const TokenData = union(enum)
|
|
|
|
{
|
|
|
|
string: []u8,
|
|
|
|
numeral: types.Numeral,
|
|
|
|
none,
|
|
|
|
};
|
|
|
|
|
2023-09-21 18:30:50 +02:00
|
|
|
pub const Token = struct
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
tokenType: TokenType,
|
|
|
|
tokenData: TokenData,
|
2023-10-08 21:40:44 +02:00
|
|
|
region: CodeRegion,
|
2023-09-15 11:07:50 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
const TokenizerState = enum
|
|
|
|
{
|
|
|
|
Start,
|
|
|
|
Quote, SingleQuote, Name, Number, Zero,
|
|
|
|
A, B, D, E, F, G, I, L, N, O, R, T, U, W,
|
|
|
|
Plus, Minus, Star, Slash, Percent, Caret, Hash,
|
|
|
|
Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed,
|
|
|
|
Colon, Semicolon, Comma, Dot,
|
|
|
|
|
|
|
|
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
|
|
|
|
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
|
|
|
|
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
|
|
|
|
|
|
|
|
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
|
|
|
|
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
|
|
|
|
BigCommentLongBracketStart, SmallComment,
|
|
|
|
|
|
|
|
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
|
|
|
|
BigComment, BigCommentLongBracketEnd,
|
|
|
|
|
|
|
|
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
|
|
|
|
|
|
|
|
Elseif, Functi, Repeat, Return,
|
|
|
|
|
|
|
|
Functio,
|
|
|
|
|
|
|
|
Function,
|
|
|
|
};
|
|
|
|
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeUpdateIndexAndState(lastIndex: *?usize, index: ?usize, state: *TokenizerState, newState: TokenizerState, region: *CodeRegion) void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
lastIndex.* = index;
|
|
|
|
state.* = newState;
|
2023-09-21 18:30:50 +02:00
|
|
|
if(index == null)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
region.*.start = null;
|
|
|
|
region.*.length = 0;
|
2023-09-21 18:30:50 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
if(region.*.start == null)
|
2023-09-21 18:30:50 +02:00
|
|
|
{
|
|
|
|
// TODO: There is no line/col info here and plumbing it to here would be pain.
|
2023-10-08 21:40:44 +02:00
|
|
|
region.*.start = CodeLocation { .col = 0, .line = 0 };
|
2023-09-21 18:30:50 +02:00
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
region.*.length += 1;
|
2023-09-21 18:30:50 +02:00
|
|
|
}
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeTerminalBase(lastIndex: *?usize, index: ?usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState, region: *CodeRegion) void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
tokenizeUpdateIndexAndState(lastIndex, index, state, newState, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
tokenType.* = newTokenType;
|
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeTerminalStr(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8, region: *CodeRegion) !void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
try tokenStr.append(ch);
|
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeTerminalIntNum(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: TokenType, newState: TokenizerState, tokenNumeral: *?types.Numeral, ch: u8, region: *CodeRegion) !void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
if(!std.ascii.isDigit(ch))
|
|
|
|
{
|
|
|
|
return error.NoDigit;
|
|
|
|
}
|
|
|
|
const digitValue = @as(i64, ch - '0');
|
2023-09-21 18:30:50 +02:00
|
|
|
if(tokenNumeral.* == null)
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
tokenNumeral.* = types.Numeral { .Integer = digitValue };
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
switch(tokenNumeral.*.?)
|
|
|
|
{
|
|
|
|
.Integer => |*n| n.* = n.* * 10 + digitValue,
|
|
|
|
.Float => return error.ExpectedIntGotFloat
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeTerminalNoToken(lastIndex: *?usize, index: usize, state: *TokenizerState, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8, region: *CodeRegion) !void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
tokenizeUpdateIndexAndState(lastIndex, index, state, newState, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
try tokenStr.*.append(ch);
|
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeBacktrack(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, allocator: std.mem.Allocator, region: *CodeRegion) !void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, tokenType.*.?, allocator, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeBacktrackCustomToken(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, newTokenType: TokenType, allocator: std.mem.Allocator, region: *CodeRegion) !void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
if(lastIndex.* == null or tokenType.* == null)
|
|
|
|
{
|
|
|
|
return error.LexError;
|
|
|
|
}
|
|
|
|
if(newTokenType == TokenType.StringLiteral or newTokenType == TokenType.Name)
|
|
|
|
{
|
|
|
|
const content = try allocator.alloc(u8, tokenStr.*.items.len);
|
|
|
|
@memcpy(content, tokenStr.*.items);
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokens.append(Token { .tokenType = newTokenType, .tokenData = TokenData { .string = content }, .region = region.* });
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokens.append(Token { .tokenType = newTokenType, .region = region.*, .tokenData = if(tokenType.*.? == TokenType.Numeral) TokenData { .numeral = tokenNumeral.*.? }
|
2023-09-15 11:07:50 +02:00
|
|
|
else TokenData.none
|
|
|
|
});
|
|
|
|
}
|
|
|
|
tokenNumeral.* = null;
|
|
|
|
index.* = lastIndex.*.?;
|
|
|
|
tokenStr.*.clearAndFree();
|
2023-10-08 21:40:44 +02:00
|
|
|
// region is reset in tokenizeTerminalBase since null is passed as index
|
|
|
|
tokenizeTerminalBase(lastIndex, null, tokenType, state, null, TokenizerState.Start, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeAlphanumericNonstart(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, ch: u8, newTokenType: TokenType, allocator: std.mem.Allocator, region: *CodeRegion) !void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
if(std.ascii.isAlphanumeric(ch) or ch == '_')
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, newTokenType, allocator, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
}
|
2023-10-08 21:40:44 +02:00
|
|
|
fn tokenizeChar(state: *TokenizerState, ch: u8, lastIndex: *?usize, index: *usize, tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, tokens: *std.ArrayList(Token), longBracketLevel: *u32, region: *CodeRegion, allocator: std.mem.Allocator) !void
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
switch(state.*)
|
|
|
|
{
|
|
|
|
TokenizerState.Start =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Minus, TokenizerState.Minus, region),
|
|
|
|
',' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Comma, TokenizerState.Comma, region),
|
|
|
|
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Equals, TokenizerState.Equals, region),
|
|
|
|
'(' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundOpen, TokenizerState.RoundOpen, region),
|
|
|
|
')' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundClosed, TokenizerState.RoundClosed, region),
|
|
|
|
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Dot, TokenizerState.Dot, region),
|
|
|
|
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Colon, TokenizerState.Colon, region),
|
|
|
|
'{' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyOpen, TokenizerState.CurlyOpen, region),
|
|
|
|
'}' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyClosed, TokenizerState.CurlyClosed, region),
|
|
|
|
'[' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareOpen, TokenizerState.SquareOpen, region),
|
|
|
|
']' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareClosed, TokenizerState.SquareClosed, region),
|
|
|
|
'+' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Plus, TokenizerState.Plus, region),
|
|
|
|
'~' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Tilde, TokenizerState.Tilde, region),
|
|
|
|
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Gt, TokenizerState.Gt, region),
|
|
|
|
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Lt, TokenizerState.Lt, region),
|
|
|
|
'#' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Hash, TokenizerState.Hash, region),
|
|
|
|
'|' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Pipe, TokenizerState.Pipe, region),
|
|
|
|
'&' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Ampersand, TokenizerState.Ampersand, region),
|
|
|
|
'%' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Percent, TokenizerState.Percent, region),
|
|
|
|
'*' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Star, TokenizerState.Star, region),
|
|
|
|
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Slash, TokenizerState.Slash, region),
|
|
|
|
';' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Semicolon, TokenizerState.Semicolon, region),
|
|
|
|
'^' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Caret, TokenizerState.Caret, region),
|
|
|
|
'a' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.A, tokenStr, ch, region),
|
|
|
|
'b' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.B, tokenStr, ch, region),
|
|
|
|
'd' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.D, tokenStr, ch, region),
|
|
|
|
'e' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.E, tokenStr, ch, region),
|
|
|
|
'f' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.F, tokenStr, ch, region),
|
|
|
|
'i' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.I, tokenStr, ch, region),
|
|
|
|
'g' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.G, tokenStr, ch, region),
|
|
|
|
'l' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.L, tokenStr, ch, region),
|
|
|
|
'n' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.N, tokenStr, ch, region),
|
|
|
|
'o' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.O, tokenStr, ch, region),
|
|
|
|
'r' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.R, tokenStr, ch, region),
|
|
|
|
't' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.T, tokenStr, ch, region),
|
|
|
|
'u' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.U, tokenStr, ch, region),
|
|
|
|
'w' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.W, tokenStr, ch, region),
|
|
|
|
'0' => try tokenizeTerminalIntNum(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Zero, tokenNumeral, ch, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
'"' =>
|
|
|
|
{
|
|
|
|
tokenType.* = null;
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'\'' =>
|
|
|
|
{
|
|
|
|
tokenType.* = null;
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
else =>
|
|
|
|
{
|
|
|
|
if(std.ascii.isWhitespace(ch))
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
else if(std.ascii.isAlphabetic(ch) or ch == '_')
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
else if(std.ascii.isDigit(ch))
|
|
|
|
{
|
2023-11-28 04:04:57 +01:00
|
|
|
try tokenizeTerminalIntNum(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Number, tokenNumeral, ch, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std.debug.print("{}: {c}\n", .{state.*, ch});
|
|
|
|
return error.NotImplemented;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Quote =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'\\' => state.* = TokenizerState.QuoteBackslash,
|
2023-10-08 21:40:44 +02:00
|
|
|
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
else => try tokenStr.*.append(ch),
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.QuoteBackslash =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'a' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{0007}');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'b' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{0008}');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
't' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\t');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'n' | '\n' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\n');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'v' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{000b}');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'f' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{000c}');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'r' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\r');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'\\' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\\');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'"' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\"');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'\'' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\'');
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
},
|
|
|
|
'z' =>
|
|
|
|
{
|
|
|
|
state.* = TokenizerState.QuoteBackslashZ;
|
|
|
|
},
|
|
|
|
else => return error.UnknownEscapeSequence,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.QuoteBackslashZ =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'\\' => state.* = TokenizerState.QuoteBackslash,
|
2023-10-08 21:40:44 +02:00
|
|
|
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
else =>
|
|
|
|
{
|
|
|
|
if(!std.ascii.isWhitespace(ch))
|
|
|
|
{
|
|
|
|
try tokenStr.append(ch);
|
|
|
|
state.* = TokenizerState.Quote;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
|
|
|
|
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.SingleQuote =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
|
2023-10-08 21:40:44 +02:00
|
|
|
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
else => try tokenStr.append(ch),
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.SingleQuoteBackslash =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'a' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{0007}');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'b' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{0008}');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
't' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\t');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'n' | '\n' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\n');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'v' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{000b}');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'f' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\u{000c}');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'r' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\r');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'\\' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\\');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'"' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\"');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'\'' =>
|
|
|
|
{
|
|
|
|
try tokenStr.append('\'');
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
},
|
|
|
|
'z' =>
|
|
|
|
{
|
|
|
|
state.* = TokenizerState.SingleQuoteBackslashZ;
|
|
|
|
},
|
|
|
|
else => return error.UnknownEscapeSequence,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.SingleQuoteBackslashZ =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
|
2023-10-08 21:40:44 +02:00
|
|
|
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
else =>
|
|
|
|
{
|
|
|
|
if(!std.ascii.isWhitespace(ch))
|
|
|
|
{
|
|
|
|
try tokenStr.append(ch);
|
|
|
|
state.* = TokenizerState.SingleQuote;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
|
|
|
|
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.String => try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, TokenType.StringLiteral, allocator, region),
|
|
|
|
TokenizerState.Name => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.Zero =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'x' =>
|
|
|
|
{
|
|
|
|
try tokenStr.*.append(ch);
|
|
|
|
tokenType.* = null;
|
|
|
|
state.* = TokenizerState.HexNumberX;
|
|
|
|
},
|
|
|
|
'.' => return error.NotImplemented,
|
|
|
|
else =>
|
|
|
|
{
|
|
|
|
if(std.ascii.isDigit(ch))
|
|
|
|
{
|
|
|
|
const digitValue = @as(i64, ch - '0');
|
|
|
|
lastIndex.* = index.*;
|
|
|
|
if(tokenNumeral.* == null)
|
|
|
|
{
|
|
|
|
tokenNumeral.* = types.Numeral { .Integer = digitValue };
|
|
|
|
tokenType.* = TokenType.Numeral;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.HexNumberX =>
|
|
|
|
{
|
|
|
|
if(std.ascii.isHex(ch))
|
|
|
|
{
|
|
|
|
lastIndex.* = index.*;
|
|
|
|
tokenType.* = TokenType.Numeral;
|
|
|
|
if(std.ascii.isDigit(ch))
|
|
|
|
{
|
|
|
|
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.HexNumber =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'p' =>
|
|
|
|
{
|
|
|
|
try tokenStr.*.append(ch);
|
|
|
|
tokenType.* = null;
|
|
|
|
state.* = TokenizerState.HexExpNumber;
|
|
|
|
},
|
|
|
|
else =>
|
|
|
|
{
|
|
|
|
if(std.ascii.isHex(ch))
|
|
|
|
{
|
|
|
|
lastIndex.* = index.*;
|
|
|
|
tokenType.* = TokenType.Numeral;
|
|
|
|
if(std.ascii.isDigit(ch))
|
|
|
|
{
|
|
|
|
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Number =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'e' =>
|
|
|
|
{
|
|
|
|
try tokenStr.*.append(ch);
|
|
|
|
tokenType.* = null;
|
|
|
|
state.* = TokenizerState.ExpNumber;
|
|
|
|
},
|
|
|
|
'.' => return error.NotImplemented,
|
|
|
|
else =>
|
|
|
|
{
|
|
|
|
if(std.ascii.isDigit(ch))
|
|
|
|
{
|
|
|
|
const digitValue = @as(i64, ch - '0');
|
|
|
|
lastIndex.* = index.*;
|
|
|
|
if(tokenNumeral.* == null)
|
|
|
|
{
|
|
|
|
tokenNumeral.* = types.Numeral { .Integer = digitValue };
|
|
|
|
tokenType.* = TokenType.Numeral;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region);
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Comma, TokenizerState.RoundOpen, TokenizerState.RoundClosed,
|
|
|
|
TokenizerState.CurlyOpen, TokenizerState.CurlyClosed, TokenizerState.Plus,
|
|
|
|
TokenizerState.TildeEquals, TokenizerState.EqualsEquals, TokenizerState.Hash,
|
|
|
|
TokenizerState.GtEquals, TokenizerState.LtEquals, TokenizerState.SquareOpen,
|
|
|
|
TokenizerState.SquareClosed, TokenizerState.Pipe, TokenizerState.Ampersand,
|
|
|
|
TokenizerState.Percent, TokenizerState.Star, TokenizerState.Semicolon,
|
|
|
|
TokenizerState.Caret, TokenizerState.DotDotDot, TokenizerState.GtGt,
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.LtLt, TokenizerState.SlashSlash => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.Tilde =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.TildeEquals, TokenizerState.TildeEquals, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Gt =>
|
|
|
|
{
|
|
|
|
switch (ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtGt, TokenizerState.GtGt, region),
|
|
|
|
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtEquals, TokenizerState.GtEquals, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Lt =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtLt, TokenizerState.LtLt, region),
|
|
|
|
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtEquals, TokenizerState.LtEquals, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Slash =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SlashSlash, TokenizerState.SlashSlash, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Dot =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDot, TokenizerState.DotDot, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.DotDot =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDotDot, TokenizerState.DotDotDot, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Colon =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.ColonColon, TokenizerState.ColonColon, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Equals =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.EqualsEquals, TokenizerState.EqualsEquals, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Minus =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, null, TokenizerState.SmallCommentStart, region),
|
|
|
|
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.SmallCommentStart =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'[' =>
|
|
|
|
{
|
|
|
|
tokenType.* = null;
|
|
|
|
state.* = TokenizerState.BigCommentLongBracketStart;
|
|
|
|
},
|
|
|
|
'\n' =>
|
|
|
|
{
|
|
|
|
state.* = TokenizerState.Start;
|
|
|
|
lastIndex.* = null;
|
|
|
|
},
|
|
|
|
else =>
|
|
|
|
{
|
|
|
|
state.* = TokenizerState.SmallComment;
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.SmallComment =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'\n' =>
|
|
|
|
{
|
|
|
|
state.* = TokenizerState.Start;
|
|
|
|
lastIndex.* = null;
|
|
|
|
},
|
|
|
|
else => { }
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.BigCommentLongBracketStart =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'=' =>
|
|
|
|
{
|
|
|
|
longBracketLevel.* += 1;
|
|
|
|
},
|
|
|
|
'[' =>
|
|
|
|
{
|
|
|
|
state.* = TokenizerState.BigComment;
|
|
|
|
},
|
|
|
|
else => return error.LongBracketMalformedStartBigComment,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.BigComment =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
']' =>
|
|
|
|
{
|
|
|
|
state.* = TokenizerState.BigCommentLongBracketEnd;
|
|
|
|
},
|
|
|
|
else => { },
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.BigCommentLongBracketEnd =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
'=' =>
|
|
|
|
{
|
|
|
|
if(longBracketLevel.* == 0)
|
|
|
|
{
|
|
|
|
return error.LongBracketLevelTooBigEndBigComment;
|
|
|
|
}
|
|
|
|
longBracketLevel.* -= 1;
|
|
|
|
},
|
|
|
|
']' =>
|
|
|
|
{
|
|
|
|
if(longBracketLevel.* != 0)
|
|
|
|
{
|
|
|
|
return error.LongBracketLevelTooSmallEndBigComment;
|
|
|
|
}
|
|
|
|
state.* = TokenizerState.Start;
|
|
|
|
},
|
|
|
|
else => return error.LongBracketMalformedSmallEndBigComment,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.A =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.An, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.An =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.And, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.And => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.And, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.W =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Wh, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Wh =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whi, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Whi =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whil, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Whil =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.While, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.While => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.While, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.B =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Br, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Br =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Bre, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Bre =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Brea, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Brea =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'k' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Break, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Break => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Break, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.G =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Go, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Go =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Got, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Got =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Goto, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Goto => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Goto, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.R =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Re, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Re =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ret, tokenStr, ch, region),
|
|
|
|
'p' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Rep, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Ret =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retu, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Retu =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retur, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Retur =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Return, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Return => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Return, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.Rep =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repe, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Repe =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repea, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Repea =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repeat, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Repeat => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Repeat, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.N =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ni, tokenStr, ch, region),
|
|
|
|
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.No, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.No =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Not, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Not => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Not, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.Ni =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Nil, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Nil => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Nil, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.T =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Th, tokenStr, ch, region),
|
|
|
|
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tr, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Th =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.The, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.The =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Then, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Then => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Then, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.Tr =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tru, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Tru =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.True, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.True => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.True, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.E =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.El, tokenStr, ch, region),
|
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.En, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.En =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.End, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.End => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.End, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.El =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Els, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Els =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Else, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Else =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elsei, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Else, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Elsei =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elseif, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Elseif => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Elseif, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.O =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Or, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Or => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Or, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.D =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Do, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Do => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Do, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.I =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.If, tokenStr, ch, region),
|
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.In, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.In => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.In, allocator, region),
|
|
|
|
TokenizerState.If => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.If, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.F =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fa, tokenStr, ch, region),
|
|
|
|
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fo, tokenStr, ch, region),
|
|
|
|
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fu, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Fu =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fun, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Fun =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Func, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Func =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Funct, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Funct =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functi, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Functi =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functio, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Functio =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Function, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Function => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Function, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.Fa =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fal, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Fal =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fals, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Fals =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.False, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.False => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.False, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.Fo =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.For, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.For => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.For, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.L =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Lo, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Lo =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loc, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Loc =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loca, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Loca =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Local, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Local => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Local, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
TokenizerState.U =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Un, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Un =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unt, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Unt =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unti, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
TokenizerState.Unti =>
|
|
|
|
{
|
|
|
|
switch(ch)
|
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Until, tokenStr, ch, region),
|
|
|
|
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
}
|
|
|
|
},
|
2023-10-08 21:40:44 +02:00
|
|
|
TokenizerState.Until => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Until, allocator, region),
|
2023-09-15 11:07:50 +02:00
|
|
|
else =>
|
|
|
|
{
|
|
|
|
std.debug.print("{}\n", . {state.*});
|
|
|
|
return error.NotImplemented;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-21 18:30:50 +02:00
|
|
|
pub fn tokenize(fileContent: []u8, allocator: std.mem.Allocator) ![]Token
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
|
|
|
var tokens = std.ArrayList(Token).init(allocator);
|
|
|
|
var state: TokenizerState = TokenizerState.Start;
|
|
|
|
var lastIndex: ?usize = null;
|
|
|
|
var index: usize = 0;
|
|
|
|
var tokenType: ?TokenType = null;
|
|
|
|
var tokenStr = std.ArrayList(u8).init(allocator);
|
|
|
|
defer tokenStr.deinit();
|
|
|
|
var tokenNumeral: ?types.Numeral = null;
|
|
|
|
var longBracketLevel: u32 = 0;
|
2023-10-08 21:40:44 +02:00
|
|
|
var region = CodeRegion { .start = null, .length = 0 };
|
2023-09-15 11:07:50 +02:00
|
|
|
|
2023-09-21 18:30:50 +02:00
|
|
|
while(index < fileContent.len)
|
2023-09-15 11:07:50 +02:00
|
|
|
{
|
2023-09-21 18:30:50 +02:00
|
|
|
const ch = fileContent[index];
|
2023-10-08 21:40:44 +02:00
|
|
|
try tokenizeChar(&state, ch, &lastIndex, &index, &tokenType, &tokenStr, &tokenNumeral, &tokens, &longBracketLevel, ®ion, allocator);
|
|
|
|
if(region.start != null and region.start.?.col == 0 and region.start.?.line == 0)
|
2023-09-21 18:30:50 +02:00
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
region.start = calculatePoint(fileContent, index);
|
2023-09-21 18:30:50 +02:00
|
|
|
}
|
2023-09-15 11:07:50 +02:00
|
|
|
index += 1;
|
|
|
|
}
|
2024-01-15 21:28:40 +01:00
|
|
|
if(longBracketLevel != 0)
|
|
|
|
{
|
|
|
|
return error.UnbalancedLongBracketLevel;
|
|
|
|
}
|
|
|
|
try tokenizeChar(&state, '\n', &lastIndex, &index, &tokenType, &tokenStr, &tokenNumeral, &tokens, &longBracketLevel, ®ion, allocator);
|
|
|
|
if(region.start != null and region.start.?.col == 0 and region.start.?.line == 0)
|
|
|
|
{
|
|
|
|
region.start = calculatePoint(fileContent, index);
|
|
|
|
}
|
2023-09-15 11:07:50 +02:00
|
|
|
return tokens.toOwnedSlice();
|
|
|
|
}
|
2023-09-21 18:30:50 +02:00
|
|
|
|
2023-10-08 21:40:44 +02:00
|
|
|
fn calculatePoint(fileContent: []u8, index: usize) CodeLocation
|
2023-09-21 18:30:50 +02:00
|
|
|
{
|
2023-10-08 21:40:44 +02:00
|
|
|
var ret = CodeLocation { .col = 1, .line = 1 };
|
2023-09-21 18:30:50 +02:00
|
|
|
for(0..index) |i|
|
|
|
|
{
|
|
|
|
ret.col += 1;
|
|
|
|
if(fileContent[i] == '\n')
|
|
|
|
{
|
|
|
|
ret.line += 1;
|
|
|
|
ret.col = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|