Implement \x escape sequences
This commit is contained in:
parent
ad3bb57dcc
commit
637638d889
80
Tokenizer.cs
80
Tokenizer.cs
@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace luaaaaah;
|
||||
@ -13,6 +14,7 @@ class Tokenizer
|
||||
int closingLongBracketLevel;
|
||||
Token? currentToken;
|
||||
CodeLocation currentLocation = new(line: 0, col: 0);
|
||||
int escapeSequenceNumber;
|
||||
|
||||
public Token[] Tokenize(string content)
|
||||
{
|
||||
@ -385,7 +387,7 @@ class Tokenizer
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotImplementedException(ch.ToString());
|
||||
throw new NotImplementedException($"{ch} at {currentLocation}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -488,7 +490,12 @@ class Tokenizer
|
||||
state = State.QuoteBackslashZ;
|
||||
}
|
||||
break;
|
||||
default: throw new Exception($"Unknown escape sequence: \\{ch}");
|
||||
case 'x':
|
||||
{
|
||||
state = State.QuoteBackslashX;
|
||||
}
|
||||
break;
|
||||
default: throw new Exception($"Unknown escape sequence: \\{ch} at {currentLocation}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -622,6 +629,11 @@ class Tokenizer
|
||||
state = State.SingleQuoteBackslashZ;
|
||||
}
|
||||
break;
|
||||
case 'x':
|
||||
{
|
||||
state = State.SingleQuoteBackslashX;
|
||||
}
|
||||
break;
|
||||
default: throw new Exception($"Unknown escape sequence: \\{ch}");
|
||||
}
|
||||
}
|
||||
@ -658,6 +670,68 @@ class Tokenizer
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.SingleQuoteBackslashX:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
state = State.SingleQuoteBackslashXHex;
|
||||
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.SingleQuoteBackslashXHex:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
state = State.SingleQuote;
|
||||
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
|
||||
{
|
||||
AppendDataChar(c);
|
||||
}
|
||||
escapeSequenceNumber = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.QuoteBackslashX:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
state = State.QuoteBackslashXHex;
|
||||
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.QuoteBackslashXHex:
|
||||
{
|
||||
if(char.IsAsciiHexDigit(ch))
|
||||
{
|
||||
state = State.Quote;
|
||||
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
|
||||
{
|
||||
AppendDataChar(c);
|
||||
}
|
||||
escapeSequenceNumber = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case State.String:
|
||||
{
|
||||
if(currentToken == null || currentToken.type == null)
|
||||
@ -3660,7 +3734,7 @@ class Tokenizer
|
||||
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
|
||||
|
||||
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
|
||||
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
|
||||
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, QuoteBackslashX, SingleQuoteBackslashX, QuoteBackslashXHex, SingleQuoteBackslashXHex,
|
||||
SmallComment, BigComment, BigCommentStartLongBracket, BigCommentEndLongBracket,
|
||||
|
||||
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
|
||||
|
@ -1,4 +1,4 @@
|
||||
"test" "\z
|
||||
"test" "\z
|
||||
|
||||
|
||||
|
||||
@ -7,4 +7,6 @@
|
||||
|
||||
|
||||
|
||||
abc" "123" "sdlfkgj<3" "asldkfj" zzz "" "" "" "" "" "fasd!" "afd" "" "as" zzzz
|
||||
abc" "123" "sdlfkgj<3" "asldkfj" zzz "" "" "" "" "" "fasd!" "afd" "" "as" zzzz
|
||||
|
||||
"\xf7\xAff\x43"
|
||||
|
Loading…
x
Reference in New Issue
Block a user