Add unfinished implementation for \u escape sequences
Also add a note for \x escape sequences since they are also broken
This commit is contained in:
parent
ab4be05bf4
commit
8cbfb8b941
121
Tokenizer.cs
121
Tokenizer.cs
@ -14,7 +14,7 @@ class Tokenizer
|
||||
int closingLongBracketLevel;
|
||||
Token? currentToken;
|
||||
CodeLocation currentLocation = new(line: 0, col: 0);
|
||||
int escapeSequenceNumber;
|
||||
long escapeSequenceNumber;
|
||||
|
||||
public Token[] Tokenize(string content)
|
||||
{
|
||||
@ -495,10 +495,68 @@ class Tokenizer
|
||||
state = State.QuoteBackslashX;
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
{
|
||||
state = State.QuoteBackslashU;
|
||||
}
|
||||
break;
|
||||
default: throw new Exception($"Unknown escape sequence: \\{ch} at {currentLocation}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.QuoteBackslashU:
|
||||
{
|
||||
if(ch == '{')
|
||||
{
|
||||
state = State.QuoteBackslashUBracket;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Expected `{{` to continue \\u escape sequence at {currentLocation}, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.QuoteBackslashUBracket:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
state = State.QuoteBackslashUBracketHex;
|
||||
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Expected hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.QuoteBackslashUBracketHex:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||
if(escapeSequenceNumber > uint.MaxValue)
|
||||
{
|
||||
throw new Exception($"{currentLocation}: \\u escape sequence has a value > 2^31 which is not permitted");
|
||||
}
|
||||
}
|
||||
else if(ch == '}')
|
||||
{
|
||||
state = State.Quote;
|
||||
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
|
||||
char[] chars = Encoding.UTF8.GetChars(BitConverter.GetBytes((uint)escapeSequenceNumber));
|
||||
for(int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
AppendDataChar(chars[i]);
|
||||
}
|
||||
|
||||
escapeSequenceNumber = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Expected second hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.QuoteBackslashZ:
|
||||
{
|
||||
if(ch == '\\')
|
||||
@ -634,10 +692,67 @@ class Tokenizer
|
||||
state = State.SingleQuoteBackslashX;
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
{
|
||||
state = State.SingleQuoteBackslashU;
|
||||
}
|
||||
break;
|
||||
default: throw new Exception($"Unknown escape sequence: \\{ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.SingleQuoteBackslashU:
|
||||
{
|
||||
if(ch == '{')
|
||||
{
|
||||
state = State.SingleQuoteBackslashUBracket;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Expected `{{` to continue \\u escape sequence at {currentLocation}, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.SingleQuoteBackslashUBracket:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
state = State.SingleQuoteBackslashUBracketHex;
|
||||
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Expected hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.SingleQuoteBackslashUBracketHex:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||
if(escapeSequenceNumber > uint.MaxValue)
|
||||
{
|
||||
throw new Exception($"{currentLocation}: \\u escape sequence has a value > 2^31 which is not permitted");
|
||||
}
|
||||
}
|
||||
else if(ch == '}')
|
||||
{
|
||||
state = State.SingleQuote;
|
||||
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
|
||||
char[] chars = Encoding.UTF8.GetChars(BitConverter.GetBytes((uint)escapeSequenceNumber));
|
||||
for(int i = 0; i < chars.Length; i++)
|
||||
{
|
||||
AppendDataChar(chars[i]);
|
||||
}
|
||||
escapeSequenceNumber = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Expected second hex digit to continue \\u escape sequence at {currentLocation}, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.SingleQuoteBackslashZ:
|
||||
{
|
||||
if(ch == '\\')
|
||||
@ -689,6 +804,7 @@ class Tokenizer
|
||||
{
|
||||
state = State.SingleQuote;
|
||||
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
|
||||
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
|
||||
{
|
||||
AppendDataChar(c);
|
||||
@ -720,6 +836,7 @@ class Tokenizer
|
||||
{
|
||||
state = State.Quote;
|
||||
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||
// TODO: THIS IS WRONG, there is zero padding due to the fixed size array
|
||||
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
|
||||
{
|
||||
AppendDataChar(c);
|
||||
@ -3735,6 +3852,8 @@ class Tokenizer
|
||||
|
||||
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
|
||||
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, QuoteBackslashX, SingleQuoteBackslashX, QuoteBackslashXHex, SingleQuoteBackslashXHex,
|
||||
SingleQuoteBackslashU, SingleQuoteBackslashUBracket, SingleQuoteBackslashUBracketHex,
|
||||
QuoteBackslashU, QuoteBackslashUBracket, QuoteBackslashUBracketHex,
|
||||
SmallComment, BigComment, BigCommentStartLongBracket, BigCommentEndLongBracket,
|
||||
|
||||
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
|
||||
|
Loading…
x
Reference in New Issue
Block a user