Implement \x escape sequences
This commit is contained in:
parent
ad3bb57dcc
commit
637638d889
80
Tokenizer.cs
80
Tokenizer.cs
@ -1,5 +1,6 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.Text;
|
||||||
using System.Text.Json.Serialization;
|
using System.Text.Json.Serialization;
|
||||||
|
|
||||||
namespace luaaaaah;
|
namespace luaaaaah;
|
||||||
@ -13,6 +14,7 @@ class Tokenizer
|
|||||||
int closingLongBracketLevel;
|
int closingLongBracketLevel;
|
||||||
Token? currentToken;
|
Token? currentToken;
|
||||||
CodeLocation currentLocation = new(line: 0, col: 0);
|
CodeLocation currentLocation = new(line: 0, col: 0);
|
||||||
|
int escapeSequenceNumber;
|
||||||
|
|
||||||
public Token[] Tokenize(string content)
|
public Token[] Tokenize(string content)
|
||||||
{
|
{
|
||||||
@ -385,7 +387,7 @@ class Tokenizer
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw new NotImplementedException(ch.ToString());
|
throw new NotImplementedException($"{ch} at {currentLocation}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -488,7 +490,12 @@ class Tokenizer
|
|||||||
state = State.QuoteBackslashZ;
|
state = State.QuoteBackslashZ;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default: throw new Exception($"Unknown escape sequence: \\{ch}");
|
case 'x':
|
||||||
|
{
|
||||||
|
state = State.QuoteBackslashX;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default: throw new Exception($"Unknown escape sequence: \\{ch} at {currentLocation}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -622,6 +629,11 @@ class Tokenizer
|
|||||||
state = State.SingleQuoteBackslashZ;
|
state = State.SingleQuoteBackslashZ;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'x':
|
||||||
|
{
|
||||||
|
state = State.SingleQuoteBackslashX;
|
||||||
|
}
|
||||||
|
break;
|
||||||
default: throw new Exception($"Unknown escape sequence: \\{ch}");
|
default: throw new Exception($"Unknown escape sequence: \\{ch}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -658,6 +670,68 @@ class Tokenizer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case State.SingleQuoteBackslashX:
|
||||||
|
{
|
||||||
|
if(char.IsAsciiHexDigit(ch))
|
||||||
|
{
|
||||||
|
state = State.SingleQuoteBackslashXHex;
|
||||||
|
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case State.SingleQuoteBackslashXHex:
|
||||||
|
{
|
||||||
|
if(char.IsAsciiHexDigit(ch))
|
||||||
|
{
|
||||||
|
state = State.SingleQuote;
|
||||||
|
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||||
|
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
|
||||||
|
{
|
||||||
|
AppendDataChar(c);
|
||||||
|
}
|
||||||
|
escapeSequenceNumber = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case State.QuoteBackslashX:
|
||||||
|
{
|
||||||
|
if(char.IsAsciiHexDigit(ch))
|
||||||
|
{
|
||||||
|
state = State.QuoteBackslashXHex;
|
||||||
|
escapeSequenceNumber = char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception($"{currentLocation}: Expected hex digit in \\x escape sequence, got {ch}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case State.QuoteBackslashXHex:
|
||||||
|
{
|
||||||
|
if(char.IsAsciiHexDigit(ch))
|
||||||
|
{
|
||||||
|
state = State.Quote;
|
||||||
|
escapeSequenceNumber = (escapeSequenceNumber * 16) + (char.IsAsciiDigit(ch) ? ch - '0' : 10 + char.ToLower(ch) - 'a');
|
||||||
|
foreach(char c in Encoding.UTF8.GetChars(BitConverter.GetBytes(escapeSequenceNumber)))
|
||||||
|
{
|
||||||
|
AppendDataChar(c);
|
||||||
|
}
|
||||||
|
escapeSequenceNumber = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception($"{currentLocation}: Expected second hex digit in \\x escape sequence, got {ch}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
case State.String:
|
case State.String:
|
||||||
{
|
{
|
||||||
if(currentToken == null || currentToken.type == null)
|
if(currentToken == null || currentToken.type == null)
|
||||||
@ -3660,7 +3734,7 @@ class Tokenizer
|
|||||||
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
|
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
|
||||||
|
|
||||||
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
|
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
|
||||||
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
|
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, QuoteBackslashX, SingleQuoteBackslashX, QuoteBackslashXHex, SingleQuoteBackslashXHex,
|
||||||
SmallComment, BigComment, BigCommentStartLongBracket, BigCommentEndLongBracket,
|
SmallComment, BigComment, BigCommentStartLongBracket, BigCommentEndLongBracket,
|
||||||
|
|
||||||
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
|
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
|
||||||
|
@ -8,3 +8,5 @@
|
|||||||
|
|
||||||
|
|
||||||
abc" "123" "sdlfkgj<3" "asldkfj" zzz "" "" "" "" "" "fasd!" "afd" "" "as" zzzz
|
abc" "123" "sdlfkgj<3" "asldkfj" zzz "" "" "" "" "" "fasd!" "afd" "" "as" zzzz
|
||||||
|
|
||||||
|
"\xf7\xAff\x43"
|
||||||
|
Reference in New Issue
Block a user